3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 =head1 Lexer interface
27 This is the lower layer of the Perl parser, managing characters and tokens.
29 =for apidoc AmU|yy_parser *|PL_parser
31 Pointer to a structure encapsulating the state of the parsing operation
32 currently in progress. The pointer can be locally changed to perform
33 a nested parse without interfering with the state of an outer parse.
34 Individual members of C<PL_parser> have their own documentation.
40 #define PERL_IN_TOKE_C
42 #include "dquote_static.c"
44 #define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
47 #define pl_yylval (PL_parser->yylval)
49 /* XXX temporary backwards compatibility */
50 #define PL_lex_brackets (PL_parser->lex_brackets)
51 #define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52 #define PL_lex_fakeeof (PL_parser->lex_fakeeof)
53 #define PL_lex_brackstack (PL_parser->lex_brackstack)
54 #define PL_lex_casemods (PL_parser->lex_casemods)
55 #define PL_lex_casestack (PL_parser->lex_casestack)
56 #define PL_lex_defer (PL_parser->lex_defer)
57 #define PL_lex_dojoin (PL_parser->lex_dojoin)
58 #define PL_lex_expect (PL_parser->lex_expect)
59 #define PL_lex_formbrack (PL_parser->lex_formbrack)
60 #define PL_lex_inpat (PL_parser->lex_inpat)
61 #define PL_lex_inwhat (PL_parser->lex_inwhat)
62 #define PL_lex_op (PL_parser->lex_op)
63 #define PL_lex_repl (PL_parser->lex_repl)
64 #define PL_lex_starts (PL_parser->lex_starts)
65 #define PL_lex_stuff (PL_parser->lex_stuff)
66 #define PL_multi_start (PL_parser->multi_start)
67 #define PL_multi_open (PL_parser->multi_open)
68 #define PL_multi_close (PL_parser->multi_close)
69 #define PL_preambled (PL_parser->preambled)
70 #define PL_sublex_info (PL_parser->sublex_info)
71 #define PL_linestr (PL_parser->linestr)
72 #define PL_expect (PL_parser->expect)
73 #define PL_copline (PL_parser->copline)
74 #define PL_bufptr (PL_parser->bufptr)
75 #define PL_oldbufptr (PL_parser->oldbufptr)
76 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
77 #define PL_linestart (PL_parser->linestart)
78 #define PL_bufend (PL_parser->bufend)
79 #define PL_last_uni (PL_parser->last_uni)
80 #define PL_last_lop (PL_parser->last_lop)
81 #define PL_last_lop_op (PL_parser->last_lop_op)
82 #define PL_lex_state (PL_parser->lex_state)
83 #define PL_rsfp (PL_parser->rsfp)
84 #define PL_rsfp_filters (PL_parser->rsfp_filters)
85 #define PL_in_my (PL_parser->in_my)
86 #define PL_in_my_stash (PL_parser->in_my_stash)
87 #define PL_tokenbuf (PL_parser->tokenbuf)
88 #define PL_multi_end (PL_parser->multi_end)
89 #define PL_error_count (PL_parser->error_count)
92 # define PL_endwhite (PL_parser->endwhite)
93 # define PL_faketokens (PL_parser->faketokens)
94 # define PL_lasttoke (PL_parser->lasttoke)
95 # define PL_nextwhite (PL_parser->nextwhite)
96 # define PL_realtokenstart (PL_parser->realtokenstart)
97 # define PL_skipwhite (PL_parser->skipwhite)
98 # define PL_thisclose (PL_parser->thisclose)
99 # define PL_thismad (PL_parser->thismad)
100 # define PL_thisopen (PL_parser->thisopen)
101 # define PL_thisstuff (PL_parser->thisstuff)
102 # define PL_thistoken (PL_parser->thistoken)
103 # define PL_thiswhite (PL_parser->thiswhite)
104 # define PL_thiswhite (PL_parser->thiswhite)
105 # define PL_nexttoke (PL_parser->nexttoke)
106 # define PL_curforce (PL_parser->curforce)
108 # define PL_nexttoke (PL_parser->nexttoke)
109 # define PL_nexttype (PL_parser->nexttype)
110 # define PL_nextval (PL_parser->nextval)
113 static const char* const ident_too_long = "Identifier too long";
116 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
117 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
119 # define CURMAD(slot,sv)
120 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
123 #define XENUMMASK 0x3f
124 #define XFAKEEOF 0x40
125 #define XFAKEBRACK 0x80
127 #ifdef USE_UTF8_SCRIPTS
128 # define UTF (!IN_BYTES)
130 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
133 /* The maximum number of characters preceding the unrecognized one to display */
134 #define UNRECOGNIZED_PRECEDE_COUNT 10
136 /* In variables named $^X, these are the legal values for X.
137 * 1999-02-27 mjd-perl-patch@plover.com */
138 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
140 #define SPACE_OR_TAB(c) isBLANK_A(c)
142 /* LEX_* are values for PL_lex_state, the state of the lexer.
143 * They are arranged oddly so that the guard on the switch statement
144 * can get by with a single comparison (if the compiler is smart enough).
146 * These values refer to the various states within a sublex parse,
147 * i.e. within a double quotish string
150 /* #define LEX_NOTPARSING 11 is done in perl.h. */
152 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
153 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
154 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
155 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
156 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
158 /* at end of code, eg "$x" followed by: */
159 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
160 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
162 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
163 string or after \E, $foo, etc */
164 #define LEX_INTERPCONST 2 /* NOT USED */
165 #define LEX_FORMLINE 1 /* expecting a format line */
166 #define LEX_KNOWNEXT 0 /* next token known; just return it */
170 static const char* const lex_state_names[] = {
185 #include "keywords.h"
187 /* CLINE is a macro that ensures PL_copline has a sane value */
189 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
192 # define SKIPSPACE0(s) skipspace0(s)
193 # define SKIPSPACE1(s) skipspace1(s)
194 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
195 # define PEEKSPACE(s) skipspace2(s,0)
197 # define SKIPSPACE0(s) skipspace(s)
198 # define SKIPSPACE1(s) skipspace(s)
199 # define SKIPSPACE2(s,tsv) skipspace(s)
200 # define PEEKSPACE(s) skipspace(s)
204 * Convenience functions to return different tokens and prime the
205 * lexer for the next token. They all take an argument.
207 * TOKEN : generic token (used for '(', DOLSHARP, etc)
208 * OPERATOR : generic operator
209 * AOPERATOR : assignment operator
210 * PREBLOCK : beginning the block after an if, while, foreach, ...
211 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
212 * PREREF : *EXPR where EXPR is not a simple identifier
213 * TERM : expression term
214 * POSTDEREF : postfix dereference (->$* ->@[...] etc.)
215 * LOOPX : loop exiting command (goto, last, dump, etc)
216 * FTST : file test operator
217 * FUN0 : zero-argument function
218 * FUN0OP : zero-argument function, with its op created in this file
219 * FUN1 : not used, except for not, which isn't a UNIOP
220 * BOop : bitwise or or xor
222 * SHop : shift operator
223 * PWop : power operator
224 * PMop : pattern-matching operator
225 * Aop : addition-level operator
226 * Mop : multiplication-level operator
227 * Eop : equality-testing operator
228 * Rop : relational operator <= != gt
230 * Also see LOP and lop() below.
233 #ifdef DEBUGGING /* Serve -DT. */
234 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
236 # define REPORT(retval) (retval)
239 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
240 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
241 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
242 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
243 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
244 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
245 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
246 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
247 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
248 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
249 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
250 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
251 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
252 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
253 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
254 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
255 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
256 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
257 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
258 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
259 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
260 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
262 /* This bit of chicanery makes a unary function followed by
263 * a parenthesis into a function with one argument, highest precedence.
264 * The UNIDOR macro is for unary functions that can be followed by the //
265 * operator (such as C<shift // 0>).
267 #define UNI3(f,x,have_x) { \
268 pl_yylval.ival = f; \
269 if (have_x) PL_expect = x; \
271 PL_last_uni = PL_oldbufptr; \
272 PL_last_lop_op = f; \
274 return REPORT( (int)FUNC1 ); \
276 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
278 #define UNI(f) UNI3(f,XTERM,1)
279 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
280 #define UNIPROTO(f,optional) { \
281 if (optional) PL_last_uni = PL_oldbufptr; \
285 #define UNIBRACK(f) UNI3(f,0,0)
287 /* grandfather return to old style */
290 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
291 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
292 pl_yylval.ival = (f); \
298 #define COPLINE_INC_WITH_HERELINES \
300 CopLINE_inc(PL_curcop); \
301 if (PL_parser->herelines) \
302 CopLINE(PL_curcop) += PL_parser->herelines, \
303 PL_parser->herelines = 0; \
305 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
306 * is no sublex_push to follow. */
307 #define COPLINE_SET_FROM_MULTI_END \
309 CopLINE_set(PL_curcop, PL_multi_end); \
310 if (PL_multi_end != PL_multi_start) \
311 PL_parser->herelines = 0; \
317 /* how to interpret the pl_yylval associated with the token */
321 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
326 static struct debug_tokens {
328 enum token_type type;
330 } const debug_tokens[] =
332 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
333 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
334 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
335 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
336 { ARROW, TOKENTYPE_NONE, "ARROW" },
337 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
338 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
339 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
340 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
341 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
342 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
343 { DO, TOKENTYPE_NONE, "DO" },
344 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
345 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
346 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
347 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
348 { ELSE, TOKENTYPE_NONE, "ELSE" },
349 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
350 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
351 { FOR, TOKENTYPE_IVAL, "FOR" },
352 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
353 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
354 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
355 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
356 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
357 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
358 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
359 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
360 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
361 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
362 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
363 { IF, TOKENTYPE_IVAL, "IF" },
364 { LABEL, TOKENTYPE_PVAL, "LABEL" },
365 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
366 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
367 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
368 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
369 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
370 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
371 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
372 { MY, TOKENTYPE_IVAL, "MY" },
373 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
374 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
375 { OROP, TOKENTYPE_IVAL, "OROP" },
376 { OROR, TOKENTYPE_NONE, "OROR" },
377 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
378 { PEG, TOKENTYPE_NONE, "PEG" },
379 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
380 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
381 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
382 { POSTJOIN, TOKENTYPE_NONE, "POSTJOIN" },
383 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
384 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
385 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
386 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
387 { PREINC, TOKENTYPE_NONE, "PREINC" },
388 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
389 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
390 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
391 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
392 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
393 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
394 { SUB, TOKENTYPE_NONE, "SUB" },
395 { THING, TOKENTYPE_OPVAL, "THING" },
396 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
397 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
398 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
399 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
400 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
401 { USE, TOKENTYPE_IVAL, "USE" },
402 { WHEN, TOKENTYPE_IVAL, "WHEN" },
403 { WHILE, TOKENTYPE_IVAL, "WHILE" },
404 { WORD, TOKENTYPE_OPVAL, "WORD" },
405 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
406 { 0, TOKENTYPE_NONE, NULL }
409 /* dump the returned token in rv, plus any optional arg in pl_yylval */
412 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
416 PERL_ARGS_ASSERT_TOKEREPORT;
419 const char *name = NULL;
420 enum token_type type = TOKENTYPE_NONE;
421 const struct debug_tokens *p;
422 SV* const report = newSVpvs("<== ");
424 for (p = debug_tokens; p->token; p++) {
425 if (p->token == (int)rv) {
432 Perl_sv_catpv(aTHX_ report, name);
433 else if ((char)rv > ' ' && (char)rv <= '~')
435 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
437 sv_catpvs(report, " (pending identifier)");
440 sv_catpvs(report, "EOF");
442 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
447 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
449 case TOKENTYPE_OPNUM:
450 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
451 PL_op_name[lvalp->ival]);
454 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
456 case TOKENTYPE_OPVAL:
458 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
459 PL_op_name[lvalp->opval->op_type]);
460 if (lvalp->opval->op_type == OP_CONST) {
461 Perl_sv_catpvf(aTHX_ report, " %s",
462 SvPEEK(cSVOPx_sv(lvalp->opval)));
467 sv_catpvs(report, "(opval=null)");
470 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
476 /* print the buffer with suitable escapes */
479 S_printbuf(pTHX_ const char *const fmt, const char *const s)
481 SV* const tmp = newSVpvs("");
483 PERL_ARGS_ASSERT_PRINTBUF;
485 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
492 S_deprecate_commaless_var_list(pTHX) {
494 deprecate("comma-less variable list");
495 return REPORT(','); /* grandfather non-comma-format format */
501 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
502 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
506 S_ao(pTHX_ int toketype)
509 if (*PL_bufptr == '=') {
511 if (toketype == ANDAND)
512 pl_yylval.ival = OP_ANDASSIGN;
513 else if (toketype == OROR)
514 pl_yylval.ival = OP_ORASSIGN;
515 else if (toketype == DORDOR)
516 pl_yylval.ival = OP_DORASSIGN;
524 * When Perl expects an operator and finds something else, no_op
525 * prints the warning. It always prints "<something> found where
526 * operator expected. It prints "Missing semicolon on previous line?"
527 * if the surprise occurs at the start of the line. "do you need to
528 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
529 * where the compiler doesn't know if foo is a method call or a function.
530 * It prints "Missing operator before end of line" if there's nothing
531 * after the missing operator, or "... before <...>" if there is something
532 * after the missing operator.
536 S_no_op(pTHX_ const char *const what, char *s)
539 char * const oldbp = PL_bufptr;
540 const bool is_first = (PL_oldbufptr == PL_linestart);
542 PERL_ARGS_ASSERT_NO_OP;
548 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
549 if (ckWARN_d(WARN_SYNTAX)) {
551 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
552 "\t(Missing semicolon on previous line?)\n");
553 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
555 for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
556 t += UTF ? UTF8SKIP(t) : 1)
558 if (t < PL_bufptr && isSPACE(*t))
559 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
560 "\t(Do you need to predeclare %"UTF8f"?)\n",
561 UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
565 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
566 "\t(Missing operator before %"UTF8f"?)\n",
567 UTF8fARG(UTF, s - oldbp, oldbp));
575 * Complain about missing quote/regexp/heredoc terminator.
576 * If it's called with NULL then it cauterizes the line buffer.
577 * If we're in a delimited string and the delimiter is a control
578 * character, it's reformatted into a two-char sequence like ^C.
583 S_missingterm(pTHX_ char *s)
589 char * const nl = strrchr(s,'\n');
593 else if ((U8) PL_multi_close < 32) {
595 tmpbuf[1] = (char)toCTRL(PL_multi_close);
600 *tmpbuf = (char)PL_multi_close;
604 q = strchr(s,'"') ? '\'' : '"';
605 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
611 * Check whether the named feature is enabled.
614 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
617 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
619 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
621 assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
623 if (namelen > MAX_FEATURE_LEN)
625 memcpy(&he_name[8], name, namelen);
627 return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
628 REFCOUNTED_HE_EXISTS));
632 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
633 * utf16-to-utf8-reversed.
636 #ifdef PERL_CR_FILTER
640 const char *s = SvPVX_const(sv);
641 const char * const e = s + SvCUR(sv);
643 PERL_ARGS_ASSERT_STRIP_RETURN;
645 /* outer loop optimized to do nothing if there are no CR-LFs */
647 if (*s++ == '\r' && *s == '\n') {
648 /* hit a CR-LF, need to copy the rest */
652 if (*s == '\r' && s[1] == '\n')
663 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
665 const I32 count = FILTER_READ(idx+1, sv, maxlen);
666 if (count > 0 && !maxlen)
673 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
675 Creates and initialises a new lexer/parser state object, supplying
676 a context in which to lex and parse from a new source of Perl code.
677 A pointer to the new state object is placed in L</PL_parser>. An entry
678 is made on the save stack so that upon unwinding the new state object
679 will be destroyed and the former value of L</PL_parser> will be restored.
680 Nothing else need be done to clean up the parsing context.
682 The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
683 non-null, provides a string (in SV form) containing code to be parsed.
684 A copy of the string is made, so subsequent modification of I<line>
685 does not affect parsing. I<rsfp>, if non-null, provides an input stream
686 from which code will be read to be parsed. If both are non-null, the
687 code in I<line> comes first and must consist of complete lines of input,
688 and I<rsfp> supplies the remainder of the source.
690 The I<flags> parameter is reserved for future use. Currently it is only
691 used by perl internally, so extensions should always pass zero.
696 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
697 can share filters with the current parser.
698 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
699 caller, hence isn't owned by the parser, so shouldn't be closed on parser
700 destruction. This is used to handle the case of defaulting to reading the
701 script from the standard input because no filename was given on the command
702 line (without getting confused by situation where STDIN has been closed, so
703 the script handle is opened on fd 0) */
706 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
709 const char *s = NULL;
710 yy_parser *parser, *oparser;
711 if (flags && flags & ~LEX_START_FLAGS)
712 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
714 /* create and initialise a parser */
716 Newxz(parser, 1, yy_parser);
717 parser->old_parser = oparser = PL_parser;
720 parser->stack = NULL;
722 parser->stack_size = 0;
724 /* on scope exit, free this parser and restore any outer one */
726 parser->saved_curcop = PL_curcop;
728 /* initialise lexer state */
731 parser->curforce = -1;
733 parser->nexttoke = 0;
735 parser->error_count = oparser ? oparser->error_count : 0;
736 parser->copline = parser->preambling = NOLINE;
737 parser->lex_state = LEX_NORMAL;
738 parser->expect = XSTATE;
740 parser->rsfp_filters =
741 !(flags & LEX_START_SAME_FILTER) || !oparser
743 : MUTABLE_AV(SvREFCNT_inc(
744 oparser->rsfp_filters
745 ? oparser->rsfp_filters
746 : (oparser->rsfp_filters = newAV())
749 Newx(parser->lex_brackstack, 120, char);
750 Newx(parser->lex_casestack, 12, char);
751 *parser->lex_casestack = '\0';
752 Newxz(parser->lex_shared, 1, LEXSHARED);
756 s = SvPV_const(line, len);
757 parser->linestr = flags & LEX_START_COPIED
758 ? SvREFCNT_inc_simple_NN(line)
759 : newSVpvn_flags(s, len, SvUTF8(line));
760 sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2);
762 parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
764 parser->oldoldbufptr =
767 parser->linestart = SvPVX(parser->linestr);
768 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
769 parser->last_lop = parser->last_uni = NULL;
770 parser->lex_flags = flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
771 |LEX_DONT_CLOSE_RSFP);
773 parser->in_pod = parser->filtered = 0;
777 /* delete a parser object */
780 Perl_parser_free(pTHX_ const yy_parser *parser)
782 PERL_ARGS_ASSERT_PARSER_FREE;
784 PL_curcop = parser->saved_curcop;
785 SvREFCNT_dec(parser->linestr);
787 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
788 PerlIO_clearerr(parser->rsfp);
789 else if (parser->rsfp && (!parser->old_parser ||
790 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
791 PerlIO_close(parser->rsfp);
792 SvREFCNT_dec(parser->rsfp_filters);
793 SvREFCNT_dec(parser->lex_stuff);
794 SvREFCNT_dec(parser->sublex_info.repl);
796 Safefree(parser->lex_brackstack);
797 Safefree(parser->lex_casestack);
798 Safefree(parser->lex_shared);
799 PL_parser = parser->old_parser;
804 Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab)
807 I32 nexttoke = parser->lasttoke;
809 I32 nexttoke = parser->nexttoke;
811 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
814 if (S_is_opval_token(parser->nexttoke[nexttoke].next_type
816 && parser->nexttoke[nexttoke].next_val.opval
817 && parser->nexttoke[nexttoke].next_val.opval->op_slabbed
818 && OpSLAB(parser->nexttoke[nexttoke].next_val.opval) == slab) {
819 op_free(parser->nexttoke[nexttoke].next_val.opval);
820 parser->nexttoke[nexttoke].next_val.opval = NULL;
823 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
824 && parser->nextval[nexttoke].opval
825 && parser->nextval[nexttoke].opval->op_slabbed
826 && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
827 op_free(parser->nextval[nexttoke].opval);
828 parser->nextval[nexttoke].opval = NULL;
836 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
838 Buffer scalar containing the chunk currently under consideration of the
839 text currently being lexed. This is always a plain string scalar (for
840 which C<SvPOK> is true). It is not intended to be used as a scalar by
841 normal scalar means; instead refer to the buffer directly by the pointer
842 variables described below.
844 The lexer maintains various C<char*> pointers to things in the
845 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
846 reallocated, all of these pointers must be updated. Don't attempt to
847 do this manually, but rather use L</lex_grow_linestr> if you need to
848 reallocate the buffer.
850 The content of the text chunk in the buffer is commonly exactly one
851 complete line of input, up to and including a newline terminator,
852 but there are situations where it is otherwise. The octets of the
853 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
854 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
855 flag on this scalar, which may disagree with it.
857 For direct examination of the buffer, the variable
858 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
859 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
860 of these pointers is usually preferable to examination of the scalar
861 through normal scalar means.
863 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
865 Direct pointer to the end of the chunk of text currently being lexed, the
866 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
867 + SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
868 always located at the end of the buffer, and does not count as part of
869 the buffer's contents.
871 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
873 Points to the current position of lexing inside the lexer buffer.
874 Characters around this point may be freely examined, within
875 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
876 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
877 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
879 Lexing code (whether in the Perl core or not) moves this pointer past
880 the characters that it consumes. It is also expected to perform some
881 bookkeeping whenever a newline character is consumed. This movement
882 can be more conveniently performed by the function L</lex_read_to>,
883 which handles newlines appropriately.
885 Interpretation of the buffer's octets can be abstracted out by
886 using the slightly higher-level functions L</lex_peek_unichar> and
887 L</lex_read_unichar>.
889 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
891 Points to the start of the current line inside the lexer buffer.
892 This is useful for indicating at which column an error occurred, and
893 not much else. This must be updated by any lexing code that consumes
894 a newline; the function L</lex_read_to> handles this detail.
900 =for apidoc Amx|bool|lex_bufutf8
902 Indicates whether the octets in the lexer buffer
903 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
904 of Unicode characters. If not, they should be interpreted as Latin-1
905 characters. This is analogous to the C<SvUTF8> flag for scalars.
907 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
908 contains valid UTF-8. Lexing code must be robust in the face of invalid
911 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
912 is significant, but not the whole story regarding the input character
913 encoding. Normally, when a file is being read, the scalar contains octets
914 and its C<SvUTF8> flag is off, but the octets should be interpreted as
915 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
916 however, the scalar may have the C<SvUTF8> flag on, and in this case its
917 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
918 is in effect. This logic may change in the future; use this function
919 instead of implementing the logic yourself.
925 Perl_lex_bufutf8(pTHX)
931 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
933 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
934 at least I<len> octets (including terminating NUL). Returns a
935 pointer to the reallocated buffer. This is necessary before making
936 any direct modification of the buffer that would increase its length.
937 L</lex_stuff_pvn> provides a more convenient way to insert text into
940 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
941 this function updates all of the lexer's variables that point directly
948 Perl_lex_grow_linestr(pTHX_ STRLEN len)
952 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
953 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
954 linestr = PL_parser->linestr;
955 buf = SvPVX(linestr);
956 if (len <= SvLEN(linestr))
958 bufend_pos = PL_parser->bufend - buf;
959 bufptr_pos = PL_parser->bufptr - buf;
960 oldbufptr_pos = PL_parser->oldbufptr - buf;
961 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
962 linestart_pos = PL_parser->linestart - buf;
963 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
964 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
965 re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
966 PL_parser->lex_shared->re_eval_start - buf : 0;
968 buf = sv_grow(linestr, len);
970 PL_parser->bufend = buf + bufend_pos;
971 PL_parser->bufptr = buf + bufptr_pos;
972 PL_parser->oldbufptr = buf + oldbufptr_pos;
973 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
974 PL_parser->linestart = buf + linestart_pos;
975 if (PL_parser->last_uni)
976 PL_parser->last_uni = buf + last_uni_pos;
977 if (PL_parser->last_lop)
978 PL_parser->last_lop = buf + last_lop_pos;
979 if (PL_parser->lex_shared->re_eval_start)
980 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
985 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
987 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
988 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
989 reallocating the buffer if necessary. This means that lexing code that
990 runs later will see the characters as if they had appeared in the input.
991 It is not recommended to do this as part of normal parsing, and most
992 uses of this facility run the risk of the inserted characters being
993 interpreted in an unintended manner.
995 The string to be inserted is represented by I<len> octets starting
996 at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
997 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
998 The characters are recoded for the lexer buffer, according to how the
999 buffer is currently being interpreted (L</lex_bufutf8>). If a string
1000 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1001 function is more convenient.
1007 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1011 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1012 if (flags & ~(LEX_STUFF_UTF8))
1013 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1015 if (flags & LEX_STUFF_UTF8) {
1018 STRLEN highhalf = 0; /* Count of variants */
1019 const char *p, *e = pv+len;
1020 for (p = pv; p != e; p++) {
1021 if (! UTF8_IS_INVARIANT(*p)) {
1027 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1028 bufptr = PL_parser->bufptr;
1029 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1030 SvCUR_set(PL_parser->linestr,
1031 SvCUR(PL_parser->linestr) + len+highhalf);
1032 PL_parser->bufend += len+highhalf;
1033 for (p = pv; p != e; p++) {
1035 if (! UTF8_IS_INVARIANT(c)) {
1036 *bufptr++ = UTF8_TWO_BYTE_HI(c);
1037 *bufptr++ = UTF8_TWO_BYTE_LO(c);
1039 *bufptr++ = (char)c;
1044 if (flags & LEX_STUFF_UTF8) {
1045 STRLEN highhalf = 0;
1046 const char *p, *e = pv+len;
1047 for (p = pv; p != e; p++) {
1049 if (UTF8_IS_ABOVE_LATIN1(c)) {
1050 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1051 "non-Latin-1 character into Latin-1 input");
1052 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1055 } else if (! UTF8_IS_INVARIANT(c)) {
1056 /* malformed UTF-8 */
1058 SAVESPTR(PL_warnhook);
1059 PL_warnhook = PERL_WARNHOOK_FATAL;
1060 utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
1066 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1067 bufptr = PL_parser->bufptr;
1068 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1069 SvCUR_set(PL_parser->linestr,
1070 SvCUR(PL_parser->linestr) + len-highhalf);
1071 PL_parser->bufend += len-highhalf;
1074 if (UTF8_IS_INVARIANT(*p)) {
1080 *bufptr++ = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
1086 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1087 bufptr = PL_parser->bufptr;
1088 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1089 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1090 PL_parser->bufend += len;
1091 Copy(pv, bufptr, len, char);
1097 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1099 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1100 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1101 reallocating the buffer if necessary. This means that lexing code that
1102 runs later will see the characters as if they had appeared in the input.
1103 It is not recommended to do this as part of normal parsing, and most
1104 uses of this facility run the risk of the inserted characters being
1105 interpreted in an unintended manner.
1107 The string to be inserted is represented by octets starting at I<pv>
1108 and continuing to the first nul. These octets are interpreted as either
1109 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1110 in I<flags>. The characters are recoded for the lexer buffer, according
1111 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1112 If it is not convenient to nul-terminate a string to be inserted, the
1113 L</lex_stuff_pvn> function is more appropriate.
1119 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1121 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1122 lex_stuff_pvn(pv, strlen(pv), flags);
1126 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1128 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1129 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1130 reallocating the buffer if necessary. This means that lexing code that
1131 runs later will see the characters as if they had appeared in the input.
1132 It is not recommended to do this as part of normal parsing, and most
1133 uses of this facility run the risk of the inserted characters being
1134 interpreted in an unintended manner.
1136 The string to be inserted is the string value of I<sv>. The characters
1137 are recoded for the lexer buffer, according to how the buffer is currently
1138 being interpreted (L</lex_bufutf8>). If a string to be inserted is
1139 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1140 need to construct a scalar.
1146 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1150 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1152 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1154 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1158 =for apidoc Amx|void|lex_unstuff|char *ptr
1160 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1161 I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1162 This hides the discarded text from any lexing code that runs later,
1163 as if the text had never appeared.
1165 This is not the normal way to consume lexed text. For that, use
1172 Perl_lex_unstuff(pTHX_ char *ptr)
1176 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1177 buf = PL_parser->bufptr;
1179 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1182 bufend = PL_parser->bufend;
1184 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1185 unstuff_len = ptr - buf;
1186 Move(ptr, buf, bufend+1-ptr, char);
1187 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1188 PL_parser->bufend = bufend - unstuff_len;
1192 =for apidoc Amx|void|lex_read_to|char *ptr
1194 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1195 to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1196 performing the correct bookkeeping whenever a newline character is passed.
1197 This is the normal way to consume lexed text.
1199 Interpretation of the buffer's octets can be abstracted out by
1200 using the slightly higher-level functions L</lex_peek_unichar> and
1201 L</lex_read_unichar>.
1207 Perl_lex_read_to(pTHX_ char *ptr)
1210 PERL_ARGS_ASSERT_LEX_READ_TO;
1211 s = PL_parser->bufptr;
1212 if (ptr < s || ptr > PL_parser->bufend)
1213 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1214 for (; s != ptr; s++)
1216 COPLINE_INC_WITH_HERELINES;
1217 PL_parser->linestart = s+1;
1219 PL_parser->bufptr = ptr;
1223 =for apidoc Amx|void|lex_discard_to|char *ptr
1225 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1226 up to I<ptr>. The remaining content of the buffer will be moved, and
1227 all pointers into the buffer updated appropriately. I<ptr> must not
1228 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1229 it is not permitted to discard text that has yet to be lexed.
1231 Normally it is not necessarily to do this directly, because it suffices to
1232 use the implicit discarding behaviour of L</lex_next_chunk> and things
1233 based on it. However, if a token stretches across multiple lines,
1234 and the lexing code has kept multiple lines of text in the buffer for
1235 that purpose, then after completion of the token it would be wise to
1236 explicitly discard the now-unneeded earlier lines, to avoid future
1237 multi-line tokens growing the buffer without bound.
1243 Perl_lex_discard_to(pTHX_ char *ptr)
1247 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1248 buf = SvPVX(PL_parser->linestr);
1250 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1253 if (ptr > PL_parser->bufptr)
1254 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1255 discard_len = ptr - buf;
1256 if (PL_parser->oldbufptr < ptr)
1257 PL_parser->oldbufptr = ptr;
1258 if (PL_parser->oldoldbufptr < ptr)
1259 PL_parser->oldoldbufptr = ptr;
1260 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1261 PL_parser->last_uni = NULL;
1262 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1263 PL_parser->last_lop = NULL;
1264 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1265 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1266 PL_parser->bufend -= discard_len;
1267 PL_parser->bufptr -= discard_len;
1268 PL_parser->oldbufptr -= discard_len;
1269 PL_parser->oldoldbufptr -= discard_len;
1270 if (PL_parser->last_uni)
1271 PL_parser->last_uni -= discard_len;
1272 if (PL_parser->last_lop)
1273 PL_parser->last_lop -= discard_len;
1277 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1279 Reads in the next chunk of text to be lexed, appending it to
1280 L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1281 looked to the end of the current chunk and wants to know more. It is
1282 usual, but not necessary, for lexing to have consumed the entirety of
1283 the current chunk at this time.
1285 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1286 chunk (i.e., the current chunk has been entirely consumed), normally the
1287 current chunk will be discarded at the same time that the new chunk is
1288 read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1289 will not be discarded. If the current chunk has not been entirely
1290 consumed, then it will not be discarded regardless of the flag.
1292 Returns true if some new text was added to the buffer, or false if the
1293 buffer has reached the end of the input text.
1298 #define LEX_FAKE_EOF 0x80000000
1299 #define LEX_NO_TERM 0x40000000
1302 Perl_lex_next_chunk(pTHX_ U32 flags)
1306 STRLEN old_bufend_pos, new_bufend_pos;
1307 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1308 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1309 bool got_some_for_debugger = 0;
1311 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1312 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1313 linestr = PL_parser->linestr;
1314 buf = SvPVX(linestr);
1315 if (!(flags & LEX_KEEP_PREVIOUS) &&
1316 PL_parser->bufptr == PL_parser->bufend) {
1317 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1319 if (PL_parser->last_uni != PL_parser->bufend)
1320 PL_parser->last_uni = NULL;
1321 if (PL_parser->last_lop != PL_parser->bufend)
1322 PL_parser->last_lop = NULL;
1323 last_uni_pos = last_lop_pos = 0;
1327 old_bufend_pos = PL_parser->bufend - buf;
1328 bufptr_pos = PL_parser->bufptr - buf;
1329 oldbufptr_pos = PL_parser->oldbufptr - buf;
1330 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1331 linestart_pos = PL_parser->linestart - buf;
1332 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1333 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1335 if (flags & LEX_FAKE_EOF) {
1337 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1339 } else if (filter_gets(linestr, old_bufend_pos)) {
1341 got_some_for_debugger = 1;
1342 } else if (flags & LEX_NO_TERM) {
1345 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1346 sv_setpvs(linestr, "");
1348 /* End of real input. Close filehandle (unless it was STDIN),
1349 * then add implicit termination.
1351 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1352 PerlIO_clearerr(PL_parser->rsfp);
1353 else if (PL_parser->rsfp)
1354 (void)PerlIO_close(PL_parser->rsfp);
1355 PL_parser->rsfp = NULL;
1356 PL_parser->in_pod = PL_parser->filtered = 0;
1358 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1361 if (!PL_in_eval && PL_minus_p) {
1363 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1364 PL_minus_n = PL_minus_p = 0;
1365 } else if (!PL_in_eval && PL_minus_n) {
1366 sv_catpvs(linestr, /*{*/";}");
1369 sv_catpvs(linestr, ";");
1372 buf = SvPVX(linestr);
1373 new_bufend_pos = SvCUR(linestr);
1374 PL_parser->bufend = buf + new_bufend_pos;
1375 PL_parser->bufptr = buf + bufptr_pos;
1376 PL_parser->oldbufptr = buf + oldbufptr_pos;
1377 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1378 PL_parser->linestart = buf + linestart_pos;
1379 if (PL_parser->last_uni)
1380 PL_parser->last_uni = buf + last_uni_pos;
1381 if (PL_parser->last_lop)
1382 PL_parser->last_lop = buf + last_lop_pos;
1383 if (PL_parser->preambling != NOLINE) {
1384 CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1385 PL_parser->preambling = NOLINE;
1387 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1388 PL_curstash != PL_debstash) {
1389 /* debugger active and we're not compiling the debugger code,
1390 * so store the line into the debugger's array of lines
1392 update_debugger_info(NULL, buf+old_bufend_pos,
1393 new_bufend_pos-old_bufend_pos);
1399 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1401 Looks ahead one (Unicode) character in the text currently being lexed.
1402 Returns the codepoint (unsigned integer value) of the next character,
1403 or -1 if lexing has reached the end of the input text. To consume the
1404 peeked character, use L</lex_read_unichar>.
1406 If the next character is in (or extends into) the next chunk of input
1407 text, the next chunk will be read in. Normally the current chunk will be
1408 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1409 then the current chunk will not be discarded.
1411 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1412 is encountered, an exception is generated.
1418 Perl_lex_peek_unichar(pTHX_ U32 flags)
1422 if (flags & ~(LEX_KEEP_PREVIOUS))
1423 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1424 s = PL_parser->bufptr;
1425 bufend = PL_parser->bufend;
1431 if (!lex_next_chunk(flags))
1433 s = PL_parser->bufptr;
1434 bufend = PL_parser->bufend;
1437 if (UTF8_IS_INVARIANT(head))
1439 if (UTF8_IS_START(head)) {
1440 len = UTF8SKIP(&head);
1441 while ((STRLEN)(bufend-s) < len) {
1442 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1444 s = PL_parser->bufptr;
1445 bufend = PL_parser->bufend;
1448 unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1449 if (retlen == (STRLEN)-1) {
1450 /* malformed UTF-8 */
1452 SAVESPTR(PL_warnhook);
1453 PL_warnhook = PERL_WARNHOOK_FATAL;
1454 utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
1460 if (!lex_next_chunk(flags))
1462 s = PL_parser->bufptr;
1469 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1471 Reads the next (Unicode) character in the text currently being lexed.
1472 Returns the codepoint (unsigned integer value) of the character read,
1473 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1474 if lexing has reached the end of the input text. To non-destructively
1475 examine the next character, use L</lex_peek_unichar> instead.
1477 If the next character is in (or extends into) the next chunk of input
1478 text, the next chunk will be read in. Normally the current chunk will be
1479 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1480 then the current chunk will not be discarded.
1482 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1483 is encountered, an exception is generated.
1489 Perl_lex_read_unichar(pTHX_ U32 flags)
1492 if (flags & ~(LEX_KEEP_PREVIOUS))
1493 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1494 c = lex_peek_unichar(flags);
1497 COPLINE_INC_WITH_HERELINES;
1499 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1501 ++(PL_parser->bufptr);
1507 =for apidoc Amx|void|lex_read_space|U32 flags
1509 Reads optional spaces, in Perl style, in the text currently being
1510 lexed. The spaces may include ordinary whitespace characters and
1511 Perl-style comments. C<#line> directives are processed if encountered.
1512 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1513 at a non-space character (or the end of the input text).
1515 If spaces extend into the next chunk of input text, the next chunk will
1516 be read in. Normally the current chunk will be discarded at the same
1517 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1518 chunk will not be discarded.
1523 #define LEX_NO_INCLINE 0x40000000
1524 #define LEX_NO_NEXT_CHUNK 0x80000000
1527 Perl_lex_read_space(pTHX_ U32 flags)
1530 const bool can_incline = !(flags & LEX_NO_INCLINE);
1531 bool need_incline = 0;
1532 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1533 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1536 sv_free(PL_skipwhite);
1537 PL_skipwhite = NULL;
1540 PL_skipwhite = newSVpvs("");
1541 #endif /* PERL_MAD */
1542 s = PL_parser->bufptr;
1543 bufend = PL_parser->bufend;
1549 } while (!(c == '\n' || (c == 0 && s == bufend)));
1550 } else if (c == '\n') {
1553 PL_parser->linestart = s;
1559 } else if (isSPACE(c)) {
1561 } else if (c == 0 && s == bufend) {
1566 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1567 #endif /* PERL_MAD */
1568 if (flags & LEX_NO_NEXT_CHUNK)
1570 PL_parser->bufptr = s;
1571 l = CopLINE(PL_curcop);
1572 CopLINE(PL_curcop) += PL_parser->herelines + 1;
1573 got_more = lex_next_chunk(flags);
1574 CopLINE_set(PL_curcop, l);
1575 s = PL_parser->bufptr;
1576 bufend = PL_parser->bufend;
1579 if (can_incline && need_incline && PL_parser->rsfp) {
1589 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1590 #endif /* PERL_MAD */
1591 PL_parser->bufptr = s;
1596 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1598 This function performs syntax checking on a prototype, C<proto>.
1599 If C<warn> is true, any illegal characters or mismatched brackets
1600 will trigger illegalproto warnings, declaring that they were
1601 detected in the prototype for C<name>.
1603 The return value is C<true> if this is a valid prototype, and
1604 C<false> if it is not, regardless of whether C<warn> was C<true> or
1607 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1614 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
1616 STRLEN len, origlen;
1617 char *p = proto ? SvPV(proto, len) : NULL;
1618 bool bad_proto = FALSE;
1619 bool in_brackets = FALSE;
1620 bool after_slash = FALSE;
1621 char greedy_proto = ' ';
1622 bool proto_after_greedy_proto = FALSE;
1623 bool must_be_last = FALSE;
1624 bool underscore = FALSE;
1625 bool bad_proto_after_underscore = FALSE;
1627 PERL_ARGS_ASSERT_VALIDATE_PROTO;
1633 for (; len--; p++) {
1636 proto_after_greedy_proto = TRUE;
1638 if (!strchr(";@%", *p))
1639 bad_proto_after_underscore = TRUE;
1642 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1649 in_brackets = FALSE;
1650 else if ((*p == '@' || *p == '%') &&
1653 must_be_last = TRUE;
1662 after_slash = FALSE;
1667 SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1670 ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1671 origlen, UNI_DISPLAY_ISPRINT)
1672 : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1674 if (proto_after_greedy_proto)
1675 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1676 "Prototype after '%c' for %"SVf" : %s",
1677 greedy_proto, SVfARG(name), p);
1679 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1680 "Missing ']' in prototype for %"SVf" : %s",
1683 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1684 "Illegal character in prototype for %"SVf" : %s",
1686 if (bad_proto_after_underscore)
1687 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1688 "Illegal character after '_' in prototype for %"SVf" : %s",
1692 return (! (proto_after_greedy_proto || bad_proto) );
1697 * This subroutine has nothing to do with tilting, whether at windmills
1698 * or pinball tables. Its name is short for "increment line". It
1699 * increments the current line number in CopLINE(PL_curcop) and checks
1700 * to see whether the line starts with a comment of the form
1701 * # line 500 "foo.pm"
1702 * If so, it sets the current line number and file to the values in the comment.
1706 S_incline(pTHX_ const char *s)
1714 PERL_ARGS_ASSERT_INCLINE;
1716 COPLINE_INC_WITH_HERELINES;
1717 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1718 && s+1 == PL_bufend && *s == ';') {
1719 /* fake newline in string eval */
1720 CopLINE_dec(PL_curcop);
1725 while (SPACE_OR_TAB(*s))
1727 if (strnEQ(s, "line", 4))
1731 if (SPACE_OR_TAB(*s))
1735 while (SPACE_OR_TAB(*s))
1743 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1745 while (SPACE_OR_TAB(*s))
1747 if (*s == '"' && (t = strchr(s+1, '"'))) {
1753 while (!isSPACE(*t))
1757 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1759 if (*e != '\n' && *e != '\0')
1760 return; /* false alarm */
1762 line_num = atoi(n)-1;
1765 const STRLEN len = t - s;
1767 if (!PL_rsfp && !PL_parser->filtered) {
1768 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1769 * to *{"::_<newfilename"} */
1770 /* However, the long form of evals is only turned on by the
1771 debugger - usually they're "(eval %lu)" */
1772 GV * const cfgv = CopFILEGV(PL_curcop);
1775 STRLEN tmplen2 = len;
1779 if (tmplen2 + 2 <= sizeof smallbuf)
1782 Newx(tmpbuf2, tmplen2 + 2, char);
1787 memcpy(tmpbuf2 + 2, s, tmplen2);
1790 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1792 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1793 /* adjust ${"::_<newfilename"} to store the new file name */
1794 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1795 /* The line number may differ. If that is the case,
1796 alias the saved lines that are in the array.
1797 Otherwise alias the whole array. */
1798 if (CopLINE(PL_curcop) == line_num) {
1799 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1800 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1802 else if (GvAV(cfgv)) {
1803 AV * const av = GvAV(cfgv);
1804 const I32 start = CopLINE(PL_curcop)+1;
1805 I32 items = AvFILLp(av) - start;
1807 AV * const av2 = GvAVn(gv2);
1808 SV **svp = AvARRAY(av) + start;
1809 I32 l = (I32)line_num+1;
1811 av_store(av2, l++, SvREFCNT_inc(*svp++));
1816 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1819 CopFILE_free(PL_curcop);
1820 CopFILE_setn(PL_curcop, s, len);
1822 CopLINE_set(PL_curcop, line_num);
1825 #define skipspace(s) skipspace_flags(s, 0)
1828 /* skip space before PL_thistoken */
1831 S_skipspace0(pTHX_ char *s)
1833 PERL_ARGS_ASSERT_SKIPSPACE0;
1840 PL_thiswhite = newSVpvs("");
1841 sv_catsv(PL_thiswhite, PL_skipwhite);
1842 sv_free(PL_skipwhite);
1845 PL_realtokenstart = s - SvPVX(PL_linestr);
1849 /* skip space after PL_thistoken */
1852 S_skipspace1(pTHX_ char *s)
1854 const char *start = s;
1855 I32 startoff = start - SvPVX(PL_linestr);
1857 PERL_ARGS_ASSERT_SKIPSPACE1;
1862 start = SvPVX(PL_linestr) + startoff;
1863 if (!PL_thistoken && PL_realtokenstart >= 0) {
1864 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1865 PL_thistoken = newSVpvn(tstart, start - tstart);
1867 PL_realtokenstart = -1;
1870 PL_nextwhite = newSVpvs("");
1871 sv_catsv(PL_nextwhite, PL_skipwhite);
1872 sv_free(PL_skipwhite);
1879 S_skipspace2(pTHX_ char *s, SV **svp)
1882 const I32 startoff = s - SvPVX(PL_linestr);
1884 PERL_ARGS_ASSERT_SKIPSPACE2;
1887 if (!PL_madskills || !svp)
1889 start = SvPVX(PL_linestr) + startoff;
1890 if (!PL_thistoken && PL_realtokenstart >= 0) {
1891 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1892 PL_thistoken = newSVpvn(tstart, start - tstart);
1893 PL_realtokenstart = -1;
1897 *svp = newSVpvs("");
1898 sv_setsv(*svp, PL_skipwhite);
1899 sv_free(PL_skipwhite);
1908 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1910 AV *av = CopFILEAVx(PL_curcop);
1913 if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1915 sv = *av_fetch(av, 0, 1);
1916 SvUPGRADE(sv, SVt_PVMG);
1918 if (!SvPOK(sv)) sv_setpvs(sv,"");
1920 sv_catsv(sv, orig_sv);
1922 sv_catpvn(sv, buf, len);
1927 if (PL_parser->preambling == NOLINE)
1928 av_store(av, CopLINE(PL_curcop), sv);
1934 * Called to gobble the appropriate amount and type of whitespace.
1935 * Skips comments as well.
1939 S_skipspace_flags(pTHX_ char *s, U32 flags)
1943 #endif /* PERL_MAD */
1944 PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1947 sv_free(PL_skipwhite);
1948 PL_skipwhite = NULL;
1950 #endif /* PERL_MAD */
1951 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1952 while (s < PL_bufend && SPACE_OR_TAB(*s))
1955 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1957 lex_read_space(flags | LEX_KEEP_PREVIOUS |
1958 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1959 LEX_NO_NEXT_CHUNK : 0));
1961 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1962 if (PL_linestart > PL_bufptr)
1963 PL_bufptr = PL_linestart;
1968 PL_skipwhite = newSVpvn(start, s-start);
1969 #endif /* PERL_MAD */
1975 * Check the unary operators to ensure there's no ambiguity in how they're
1976 * used. An ambiguous piece of code would be:
1978 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1979 * the +5 is its argument.
1989 if (PL_oldoldbufptr != PL_last_uni)
1991 while (isSPACE(*PL_last_uni))
1994 while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
1996 if ((t = strchr(s, '(')) && t < PL_bufptr)
1999 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
2000 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
2001 (int)(s - PL_last_uni), PL_last_uni);
2005 * LOP : macro to build a list operator. Its behaviour has been replaced
2006 * with a subroutine, S_lop() for which LOP is just another name.
2009 #define LOP(f,x) return lop(f,x,s)
2013 * Build a list operator (or something that might be one). The rules:
2014 * - if we have a next token, then it's a list operator [why?]
2015 * - if the next thing is an opening paren, then it's a function
2016 * - else it's a list operator
2020 S_lop(pTHX_ I32 f, int x, char *s)
2024 PERL_ARGS_ASSERT_LOP;
2030 PL_last_lop = PL_oldbufptr;
2031 PL_last_lop_op = (OPCODE)f;
2040 return REPORT(FUNC);
2043 return REPORT(FUNC);
2046 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
2047 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2048 return REPORT(LSTOP);
2055 * Sets up for an eventual force_next(). start_force(0) basically does
2056 * an unshift, while start_force(-1) does a push. yylex removes items
2061 S_start_force(pTHX_ int where)
2065 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
2066 where = PL_lasttoke;
2067 assert(PL_curforce < 0 || PL_curforce == where);
2068 if (PL_curforce != where) {
2069 for (i = PL_lasttoke; i > where; --i) {
2070 PL_nexttoke[i] = PL_nexttoke[i-1];
2074 if (PL_curforce < 0) /* in case of duplicate start_force() */
2075 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
2076 PL_curforce = where;
2079 curmad('^', newSVpvs(""));
2080 CURMAD('_', PL_nextwhite);
2085 S_curmad(pTHX_ char slot, SV *sv)
2091 if (PL_curforce < 0)
2092 where = &PL_thismad;
2094 where = &PL_nexttoke[PL_curforce].next_mad;
2100 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
2102 else if (PL_encoding) {
2103 sv_recode_to_utf8(sv, PL_encoding);
2108 /* keep a slot open for the head of the list? */
2109 if (slot != '_' && *where && (*where)->mad_key == '^') {
2110 (*where)->mad_key = slot;
2111 sv_free(MUTABLE_SV(((*where)->mad_val)));
2112 (*where)->mad_val = (void*)sv;
2115 addmad(newMADsv(slot, sv), where, 0);
2118 # define start_force(where) NOOP
2119 # define curmad(slot, sv) NOOP
2124 * When the lexer realizes it knows the next token (for instance,
2125 * it is reordering tokens for the parser) then it can call S_force_next
2126 * to know what token to return the next time the lexer is called. Caller
2127 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
2128 * and possibly PL_expect to ensure the lexer handles the token correctly.
2132 S_force_next(pTHX_ I32 type)
2137 PerlIO_printf(Perl_debug_log, "### forced token:\n");
2138 tokereport(type, &NEXTVAL_NEXTTOKE);
2142 if (PL_curforce < 0)
2143 start_force(PL_lasttoke);
2144 PL_nexttoke[PL_curforce].next_type = type;
2145 if (PL_lex_state != LEX_KNOWNEXT)
2146 PL_lex_defer = PL_lex_state;
2147 PL_lex_state = LEX_KNOWNEXT;
2148 PL_lex_expect = PL_expect;
2151 PL_nexttype[PL_nexttoke] = type;
2153 if (PL_lex_state != LEX_KNOWNEXT) {
2154 PL_lex_defer = PL_lex_state;
2155 PL_lex_expect = PL_expect;
2156 PL_lex_state = LEX_KNOWNEXT;
2164 * This subroutine handles postfix deref syntax after the arrow has already
2165 * been emitted. @* $* etc. are emitted as two separate token right here.
2166 * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2167 * only the first, leaving yylex to find the next.
2171 S_postderef(pTHX_ char const funny, char const next)
2174 assert(strchr("$@%&*", funny));
2175 assert(strchr("*[{", next));
2177 PL_expect = XOPERATOR;
2178 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2179 assert('@' == funny || '$' == funny);
2180 PL_lex_state = LEX_INTERPEND;
2181 start_force(PL_curforce);
2182 force_next(POSTJOIN);
2184 start_force(PL_curforce);
2189 if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
2190 && !PL_lex_brackets)
2192 PL_expect = XOPERATOR;
2201 int yyc = PL_parser->yychar;
2202 if (yyc != YYEMPTY) {
2205 NEXTVAL_NEXTTOKE = PL_parser->yylval;
2206 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2207 PL_lex_allbrackets--;
2209 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2210 } else if (yyc == '('/*)*/) {
2211 PL_lex_allbrackets--;
2216 PL_parser->yychar = YYEMPTY;
2221 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2224 SV * const sv = newSVpvn_utf8(start, len,
2227 && !is_ascii_string((const U8*)start, len)
2228 && is_utf8_string((const U8*)start, len));
2234 * When the lexer knows the next thing is a word (for instance, it has
2235 * just seen -> and it knows that the next char is a word char, then
2236 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2240 * char *start : buffer position (must be within PL_linestr)
2241 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2242 * int check_keyword : if true, Perl checks to make sure the word isn't
2243 * a keyword (do this if the word is a label, e.g. goto FOO)
2244 * int allow_pack : if true, : characters will also be allowed (require,
2245 * use, etc. do this)
2246 * int allow_initial_tick : used by the "sub" lexer only.
2250 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2256 PERL_ARGS_ASSERT_FORCE_WORD;
2258 start = SKIPSPACE1(start);
2260 if (isIDFIRST_lazy_if(s,UTF) ||
2261 (allow_pack && *s == ':') )
2263 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2264 if (check_keyword) {
2265 char *s2 = PL_tokenbuf;
2266 if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
2268 if (keyword(s2, len, 0))
2271 start_force(PL_curforce);
2273 curmad('X', newSVpvn(start,s-start));
2274 if (token == METHOD) {
2279 PL_expect = XOPERATOR;
2283 curmad('g', newSVpvs( "forced" ));
2284 NEXTVAL_NEXTTOKE.opval
2285 = (OP*)newSVOP(OP_CONST,0,
2286 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2287 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2295 * Called when the lexer wants $foo *foo &foo etc, but the program
2296 * text only contains the "foo" portion. The first argument is a pointer
2297 * to the "foo", and the second argument is the type symbol to prefix.
2298 * Forces the next token to be a "WORD".
2299 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2303 S_force_ident(pTHX_ const char *s, int kind)
2307 PERL_ARGS_ASSERT_FORCE_IDENT;
2310 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2311 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2312 UTF ? SVf_UTF8 : 0));
2313 start_force(PL_curforce);
2314 NEXTVAL_NEXTTOKE.opval = o;
2317 o->op_private = OPpCONST_ENTERED;
2318 /* XXX see note in pp_entereval() for why we forgo typo
2319 warnings if the symbol must be introduced in an eval.
2321 gv_fetchpvn_flags(s, len,
2322 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2323 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2324 kind == '$' ? SVt_PV :
2325 kind == '@' ? SVt_PVAV :
2326 kind == '%' ? SVt_PVHV :
2334 S_force_ident_maybe_lex(pTHX_ char pit)
2336 start_force(PL_curforce);
2337 NEXTVAL_NEXTTOKE.ival = pit;
2342 Perl_str_to_version(pTHX_ SV *sv)
2347 const char *start = SvPV_const(sv,len);
2348 const char * const end = start + len;
2349 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2351 PERL_ARGS_ASSERT_STR_TO_VERSION;
2353 while (start < end) {
2357 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2362 retval += ((NV)n)/nshift;
2371 * Forces the next token to be a version number.
2372 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2373 * and if "guessing" is TRUE, then no new token is created (and the caller
2374 * must use an alternative parsing method).
2378 S_force_version(pTHX_ char *s, int guessing)
2384 I32 startoff = s - SvPVX(PL_linestr);
2387 PERL_ARGS_ASSERT_FORCE_VERSION;
2395 while (isDIGIT(*d) || *d == '_' || *d == '.')
2399 start_force(PL_curforce);
2400 curmad('X', newSVpvn(s,d-s));
2403 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2405 #ifdef USE_LOCALE_NUMERIC
2406 char *loc = savepv(setlocale(LC_NUMERIC, NULL));
2407 setlocale(LC_NUMERIC, "C");
2409 s = scan_num(s, &pl_yylval);
2410 #ifdef USE_LOCALE_NUMERIC
2411 setlocale(LC_NUMERIC, loc);
2414 version = pl_yylval.opval;
2415 ver = cSVOPx(version)->op_sv;
2416 if (SvPOK(ver) && !SvNIOK(ver)) {
2417 SvUPGRADE(ver, SVt_PVNV);
2418 SvNV_set(ver, str_to_version(ver));
2419 SvNOK_on(ver); /* hint that it is a version */
2422 else if (guessing) {
2425 sv_free(PL_nextwhite); /* let next token collect whitespace */
2427 s = SvPVX(PL_linestr) + startoff;
2435 if (PL_madskills && !version) {
2436 sv_free(PL_nextwhite); /* let next token collect whitespace */
2438 s = SvPVX(PL_linestr) + startoff;
2441 /* NOTE: The parser sees the package name and the VERSION swapped */
2442 start_force(PL_curforce);
2443 NEXTVAL_NEXTTOKE.opval = version;
2450 * S_force_strict_version
2451 * Forces the next token to be a version number using strict syntax rules.
2455 S_force_strict_version(pTHX_ char *s)
2460 I32 startoff = s - SvPVX(PL_linestr);
2462 const char *errstr = NULL;
2464 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2466 while (isSPACE(*s)) /* leading whitespace */
2469 if (is_STRICT_VERSION(s,&errstr)) {
2471 s = (char *)scan_version(s, ver, 0);
2472 version = newSVOP(OP_CONST, 0, ver);
2474 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2475 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2479 yyerror(errstr); /* version required */
2484 if (PL_madskills && !version) {
2485 sv_free(PL_nextwhite); /* let next token collect whitespace */
2487 s = SvPVX(PL_linestr) + startoff;
2490 /* NOTE: The parser sees the package name and the VERSION swapped */
2491 start_force(PL_curforce);
2492 NEXTVAL_NEXTTOKE.opval = version;
2500 * Tokenize a quoted string passed in as an SV. It finds the next
2501 * chunk, up to end of string or a backslash. It may make a new
2502 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2507 S_tokeq(pTHX_ SV *sv)
2516 PERL_ARGS_ASSERT_TOKEQ;
2521 s = SvPV_force(sv, len);
2522 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2525 /* This is relying on the SV being "well formed" with a trailing '\0' */
2526 while (s < send && !(*s == '\\' && s[1] == '\\'))
2531 if ( PL_hints & HINT_NEW_STRING ) {
2532 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2536 if (s + 1 < send && (s[1] == '\\'))
2537 s++; /* all that, just for this */
2542 SvCUR_set(sv, d - SvPVX_const(sv));
2544 if ( PL_hints & HINT_NEW_STRING )
2545 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2550 * Now come three functions related to double-quote context,
2551 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2552 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2553 * interact with PL_lex_state, and create fake ( ... ) argument lists
2554 * to handle functions and concatenation.
2558 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
2563 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2565 * Pattern matching will set PL_lex_op to the pattern-matching op to
2566 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2568 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2570 * Everything else becomes a FUNC.
2572 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2573 * had an OP_CONST or OP_READLINE). This just sets us up for a
2574 * call to S_sublex_push().
2578 S_sublex_start(pTHX)
2581 const I32 op_type = pl_yylval.ival;
2583 if (op_type == OP_NULL) {
2584 pl_yylval.opval = PL_lex_op;
2588 if (op_type == OP_CONST || op_type == OP_READLINE) {
2589 SV *sv = tokeq(PL_lex_stuff);
2591 if (SvTYPE(sv) == SVt_PVIV) {
2592 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2594 const char * const p = SvPV_const(sv, len);
2595 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2599 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2600 PL_lex_stuff = NULL;
2601 /* Allow <FH> // "foo" */
2602 if (op_type == OP_READLINE)
2603 PL_expect = XTERMORDORDOR;
2606 else if (op_type == OP_BACKTICK && PL_lex_op) {
2607 /* readpipe() was overridden */
2608 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2609 pl_yylval.opval = PL_lex_op;
2611 PL_lex_stuff = NULL;
2615 PL_sublex_info.super_state = PL_lex_state;
2616 PL_sublex_info.sub_inwhat = (U16)op_type;
2617 PL_sublex_info.sub_op = PL_lex_op;
2618 PL_lex_state = LEX_INTERPPUSH;
2622 pl_yylval.opval = PL_lex_op;
2632 * Create a new scope to save the lexing state. The scope will be
2633 * ended in S_sublex_done. Returns a '(', starting the function arguments
2634 * to the uc, lc, etc. found before.
2635 * Sets PL_lex_state to LEX_INTERPCONCAT.
2643 const bool is_heredoc = PL_multi_close == '<';
2646 PL_lex_state = PL_sublex_info.super_state;
2647 SAVEI8(PL_lex_dojoin);
2648 SAVEI32(PL_lex_brackets);
2649 SAVEI32(PL_lex_allbrackets);
2650 SAVEI32(PL_lex_formbrack);
2651 SAVEI8(PL_lex_fakeeof);
2652 SAVEI32(PL_lex_casemods);
2653 SAVEI32(PL_lex_starts);
2654 SAVEI8(PL_lex_state);
2655 SAVESPTR(PL_lex_repl);
2656 SAVEVPTR(PL_lex_inpat);
2657 SAVEI16(PL_lex_inwhat);
2660 SAVECOPLINE(PL_curcop);
2661 SAVEI32(PL_multi_end);
2662 SAVEI32(PL_parser->herelines);
2663 PL_parser->herelines = 0;
2665 SAVEI8(PL_multi_close);
2666 SAVEPPTR(PL_bufptr);
2667 SAVEPPTR(PL_bufend);
2668 SAVEPPTR(PL_oldbufptr);
2669 SAVEPPTR(PL_oldoldbufptr);
2670 SAVEPPTR(PL_last_lop);
2671 SAVEPPTR(PL_last_uni);
2672 SAVEPPTR(PL_linestart);
2673 SAVESPTR(PL_linestr);
2674 SAVEGENERICPV(PL_lex_brackstack);
2675 SAVEGENERICPV(PL_lex_casestack);
2676 SAVEGENERICPV(PL_parser->lex_shared);
2677 SAVEBOOL(PL_parser->lex_re_reparsing);
2678 SAVEI32(PL_copline);
2680 /* The here-doc parser needs to be able to peek into outer lexing
2681 scopes to find the body of the here-doc. So we put PL_linestr and
2682 PL_bufptr into lex_shared, to ‘share’ those values.
2684 PL_parser->lex_shared->ls_linestr = PL_linestr;
2685 PL_parser->lex_shared->ls_bufptr = PL_bufptr;
2687 PL_linestr = PL_lex_stuff;
2688 PL_lex_repl = PL_sublex_info.repl;
2689 PL_lex_stuff = NULL;
2690 PL_sublex_info.repl = NULL;
2692 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2693 = SvPVX(PL_linestr);
2694 PL_bufend += SvCUR(PL_linestr);
2695 PL_last_lop = PL_last_uni = NULL;
2696 SAVEFREESV(PL_linestr);
2697 if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2699 PL_lex_dojoin = FALSE;
2700 PL_lex_brackets = PL_lex_formbrack = 0;
2701 PL_lex_allbrackets = 0;
2702 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2703 Newx(PL_lex_brackstack, 120, char);
2704 Newx(PL_lex_casestack, 12, char);
2705 PL_lex_casemods = 0;
2706 *PL_lex_casestack = '\0';
2708 PL_lex_state = LEX_INTERPCONCAT;
2710 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2711 PL_copline = NOLINE;
2713 Newxz(shared, 1, LEXSHARED);
2714 shared->ls_prev = PL_parser->lex_shared;
2715 PL_parser->lex_shared = shared;
2717 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2718 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2719 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2720 PL_lex_inpat = PL_sublex_info.sub_op;
2722 PL_lex_inpat = NULL;
2724 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2725 PL_in_eval &= ~EVAL_RE_REPARSING;
2732 * Restores lexer state after a S_sublex_push.
2739 if (!PL_lex_starts++) {
2740 SV * const sv = newSVpvs("");
2741 if (SvUTF8(PL_linestr))
2743 PL_expect = XOPERATOR;
2744 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2748 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2749 PL_lex_state = LEX_INTERPCASEMOD;
2753 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2754 assert(PL_lex_inwhat != OP_TRANSR);
2755 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2756 PL_linestr = PL_lex_repl;
2758 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2759 PL_bufend += SvCUR(PL_linestr);
2760 PL_last_lop = PL_last_uni = NULL;
2761 PL_lex_dojoin = FALSE;
2762 PL_lex_brackets = 0;
2763 PL_lex_allbrackets = 0;
2764 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2765 PL_lex_casemods = 0;
2766 *PL_lex_casestack = '\0';
2768 if (SvEVALED(PL_lex_repl)) {
2769 PL_lex_state = LEX_INTERPNORMAL;
2771 /* we don't clear PL_lex_repl here, so that we can check later
2772 whether this is an evalled subst; that means we rely on the
2773 logic to ensure sublex_done() is called again only via the
2774 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2777 PL_lex_state = LEX_INTERPCONCAT;
2780 if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2781 CopLINE(PL_curcop) +=
2782 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xpad_cop_seq.xlow
2783 + PL_parser->herelines;
2784 PL_parser->herelines = 0;
2789 const line_t l = CopLINE(PL_curcop);
2794 PL_endwhite = newSVpvs("");
2795 sv_catsv(PL_endwhite, PL_thiswhite);
2799 sv_setpvs(PL_thistoken,"");
2801 PL_realtokenstart = -1;
2805 if (PL_multi_close == '<')
2806 PL_parser->herelines += l - PL_multi_end;
2807 PL_bufend = SvPVX(PL_linestr);
2808 PL_bufend += SvCUR(PL_linestr);
2809 PL_expect = XOPERATOR;
2810 PL_sublex_info.sub_inwhat = 0;
2815 PERL_STATIC_INLINE SV*
2816 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2818 /* <s> points to first character of interior of \N{}, <e> to one beyond the
2819 * interior, hence to the "}". Finds what the name resolves to, returning
2820 * an SV* containing it; NULL if no valid one found */
2822 SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2829 const U8* first_bad_char_loc;
2830 const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2832 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2834 if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2836 &first_bad_char_loc))
2838 /* If warnings are on, this will print a more detailed analysis of what
2839 * is wrong than the error message below */
2840 utf8n_to_uvchr(first_bad_char_loc,
2841 e - ((char *) first_bad_char_loc),
2844 /* We deliberately don't try to print the malformed character, which
2845 * might not print very well; it also may be just the first of many
2846 * malformations, so don't print what comes after it */
2847 yyerror(Perl_form(aTHX_
2848 "Malformed UTF-8 character immediately after '%.*s'",
2849 (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
2853 res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2854 /* include the <}> */
2855 e - backslash_ptr + 1);
2857 SvREFCNT_dec_NN(res);
2861 /* See if the charnames handler is the Perl core's, and if so, we can skip
2862 * the validation needed for a user-supplied one, as Perl's does its own
2864 table = GvHV(PL_hintgv); /* ^H */
2865 cvp = hv_fetchs(table, "charnames", FALSE);
2866 if (cvp && (cv = *cvp) && SvROK(cv) && ((rv = SvRV(cv)) != NULL)
2867 && SvTYPE(rv) == SVt_PVCV && ((stash = CvSTASH(rv)) != NULL))
2869 const char * const name = HvNAME(stash);
2870 if strEQ(name, "_charnames") {
2875 /* Here, it isn't Perl's charname handler. We can't rely on a
2876 * user-supplied handler to validate the input name. For non-ut8 input,
2877 * look to see that the first character is legal. Then loop through the
2878 * rest checking that each is a continuation */
2880 /* This code needs to be sync'ed with a regex in _charnames.pm which does
2884 if (! isALPHAU(*s)) {
2889 if (! isCHARNAME_CONT(*s)) {
2892 if (*s == ' ' && *(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2893 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2894 "A sequence of multiple spaces in a charnames "
2895 "alias definition is deprecated");
2899 if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2900 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2901 "Trailing white-space in a charnames alias "
2902 "definition is deprecated");
2906 /* Similarly for utf8. For invariants can check directly; for other
2907 * Latin1, can calculate their code point and check; otherwise use a
2909 if (UTF8_IS_INVARIANT(*s)) {
2910 if (! isALPHAU(*s)) {
2914 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2915 if (! isALPHAU(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) {
2921 if (! PL_utf8_charname_begin) {
2922 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2923 PL_utf8_charname_begin = _core_swash_init("utf8",
2924 "_Perl_Charname_Begin",
2926 1, 0, NULL, &flags);
2928 if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2935 if (UTF8_IS_INVARIANT(*s)) {
2936 if (! isCHARNAME_CONT(*s)) {
2939 if (*s == ' ' && *(s-1) == ' '
2940 && ckWARN_d(WARN_DEPRECATED)) {
2941 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2942 "A sequence of multiple spaces in a charnam"
2943 "es alias definition is deprecated");
2947 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2948 if (! isCHARNAME_CONT(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1))))
2955 if (! PL_utf8_charname_continue) {
2956 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2957 PL_utf8_charname_continue = _core_swash_init("utf8",
2958 "_Perl_Charname_Continue",
2960 1, 0, NULL, &flags);
2962 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2968 if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2969 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2970 "Trailing white-space in a charnames alias "
2971 "definition is deprecated");
2975 if (SvUTF8(res)) { /* Don't accept malformed input */
2976 const U8* first_bad_char_loc;
2978 const char* const str = SvPV_const(res, len);
2979 if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2980 /* If warnings are on, this will print a more detailed analysis of
2981 * what is wrong than the error message below */
2982 utf8n_to_uvchr(first_bad_char_loc,
2983 (char *) first_bad_char_loc - str,
2986 /* We deliberately don't try to print the malformed character,
2987 * which might not print very well; it also may be just the first
2988 * of many malformations, so don't print what comes after it */
2991 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2992 (int) (e - backslash_ptr + 1), backslash_ptr,
2993 (int) ((char *) first_bad_char_loc - str), str
3003 int bad_char_size = ((UTF) ? UTF8SKIP(s) : 1);
3005 /* The final %.*s makes sure that should the trailing NUL be missing
3006 * that this print won't run off the end of the string */
3009 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
3010 (int)(s - backslash_ptr + bad_char_size), backslash_ptr,
3011 (int)(e - s + bad_char_size), s + bad_char_size
3013 UTF ? SVf_UTF8 : 0);
3021 Extracts the next constant part of a pattern, double-quoted string,
3022 or transliteration. This is terrifying code.
3024 For example, in parsing the double-quoted string "ab\x63$d", it would
3025 stop at the '$' and return an OP_CONST containing 'abc'.
3027 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3028 processing a pattern (PL_lex_inpat is true), a transliteration
3029 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
3031 Returns a pointer to the character scanned up to. If this is
3032 advanced from the start pointer supplied (i.e. if anything was
3033 successfully parsed), will leave an OP_CONST for the substring scanned
3034 in pl_yylval. Caller must intuit reason for not parsing further
3035 by looking at the next characters herself.
3039 \N{FOO} => \N{U+hex_for_character_FOO}
3040 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
3043 all other \-char, including \N and \N{ apart from \N{ABC}
3046 @ and $ where it appears to be a var, but not for $ as tail anchor
3051 In transliterations:
3052 characters are VERY literal, except for - not at the start or end
3053 of the string, which indicates a range. If the range is in bytes,
3054 scan_const expands the range to the full set of intermediate
3055 characters. If the range is in utf8, the hyphen is replaced with
3056 a certain range mark which will be handled by pmtrans() in op.c.
3058 In double-quoted strings:
3060 double-quoted style: \r and \n
3061 constants: \x31, etc.
3062 deprecated backrefs: \1 (in substitution replacements)
3063 case and quoting: \U \Q \E
3066 scan_const does *not* construct ops to handle interpolated strings.
3067 It stops processing as soon as it finds an embedded $ or @ variable
3068 and leaves it to the caller to work out what's going on.
3070 embedded arrays (whether in pattern or not) could be:
3071 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
3073 $ in double-quoted strings must be the symbol of an embedded scalar.
3075 $ in pattern could be $foo or could be tail anchor. Assumption:
3076 it's a tail anchor if $ is the last thing in the string, or if it's
3077 followed by one of "()| \r\n\t"
3079 \1 (backreferences) are turned into $1 in substitutions
3081 The structure of the code is
3082 while (there's a character to process) {
3083 handle transliteration ranges
3084 skip regexp comments /(?#comment)/ and codes /(?{code})/
3085 skip #-initiated comments in //x patterns
3086 check for embedded arrays
3087 check for embedded scalars
3089 deprecate \1 in substitution replacements
3090 handle string-changing backslashes \l \U \Q \E, etc.
3091 switch (what was escaped) {
3092 handle \- in a transliteration (becomes a literal -)
3093 if a pattern and not \N{, go treat as regular character
3094 handle \132 (octal characters)
3095 handle \x15 and \x{1234} (hex characters)
3096 handle \N{name} (named characters, also \N{3,5} in a pattern)
3097 handle \cV (control characters)
3098 handle printf-style backslashes (\f, \r, \n, etc)
3101 } (end if backslash)
3102 handle regular character
3103 } (end while character to read)
3108 S_scan_const(pTHX_ char *start)
3111 char *send = PL_bufend; /* end of the constant */
3112 SV *sv = newSV(send - start); /* sv for the constant. See
3113 note below on sizing. */
3114 char *s = start; /* start of the constant */
3115 char *d = SvPVX(sv); /* destination for copies */
3116 bool dorange = FALSE; /* are we in a translit range? */
3117 bool didrange = FALSE; /* did we just finish a range? */
3118 bool in_charclass = FALSE; /* within /[...]/ */
3119 bool has_utf8 = FALSE; /* Output constant is UTF8 */
3120 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
3121 to be UTF8? But, this can
3122 show as true when the source
3123 isn't utf8, as for example
3124 when it is entirely composed
3126 SV *res; /* result from charnames */
3128 /* Note on sizing: The scanned constant is placed into sv, which is
3129 * initialized by newSV() assuming one byte of output for every byte of
3130 * input. This routine expects newSV() to allocate an extra byte for a
3131 * trailing NUL, which this routine will append if it gets to the end of
3132 * the input. There may be more bytes of input than output (eg., \N{LATIN
3133 * CAPITAL LETTER A}), or more output than input if the constant ends up
3134 * recoded to utf8, but each time a construct is found that might increase
3135 * the needed size, SvGROW() is called. Its size parameter each time is
3136 * based on the best guess estimate at the time, namely the length used so
3137 * far, plus the length the current construct will occupy, plus room for
3138 * the trailing NUL, plus one byte for every input byte still unscanned */
3140 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3143 UV literal_endpoint = 0;
3144 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
3147 PERL_ARGS_ASSERT_SCAN_CONST;
3149 assert(PL_lex_inwhat != OP_TRANSR);
3150 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3151 /* If we are doing a trans and we know we want UTF8 set expectation */
3152 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
3153 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3156 /* Protect sv from errors and fatal warnings. */
3157 ENTER_with_name("scan_const");
3160 while (s < send || dorange) {
3162 /* get transliterations out of the way (they're most literal) */
3163 if (PL_lex_inwhat == OP_TRANS) {
3164 /* expand a range A-Z to the full set of characters. AIE! */
3166 I32 i; /* current expanded character */
3167 I32 min; /* first character in range */
3168 I32 max; /* last character in range */
3179 char * const c = (char*)utf8_hop((U8*)d, -1);
3183 *c = (char) ILLEGAL_UTF8_BYTE;
3184 /* mark the range as done, and continue */
3190 i = d - SvPVX_const(sv); /* remember current offset */
3193 SvLEN(sv) + (has_utf8 ?
3194 (512 - UTF_CONTINUATION_MARK +
3197 /* How many two-byte within 0..255: 128 in UTF-8,
3198 * 96 in UTF-8-mod. */
3200 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
3202 d = SvPVX(sv) + i; /* refresh d after realloc */
3206 for (j = 0; j <= 1; j++) {
3207 char * const c = (char*)utf8_hop((U8*)d, -1);
3208 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
3214 max = (U8)0xff; /* only to \xff */
3215 uvmax = uv; /* \x{100} to uvmax */
3217 d = c; /* eat endpoint chars */
3222 d -= 2; /* eat the first char and the - */
3223 min = (U8)*d; /* first char in range */
3224 max = (U8)d[1]; /* last char in range */
3231 "Invalid range \"%c-%c\" in transliteration operator",
3232 (char)min, (char)max);
3236 if (literal_endpoint == 2 &&
3237 ((isLOWER_A(min) && isLOWER_A(max)) ||
3238 (isUPPER_A(min) && isUPPER_A(max))))
3240 for (i = min; i <= max; i++) {
3247 for (i = min; i <= max; i++)
3250 append_utf8_from_native_byte(i, &d);
3258 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
3260 *d++ = (char) ILLEGAL_UTF8_BYTE;
3262 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
3266 /* mark the range as done, and continue */
3270 literal_endpoint = 0;
3275 /* range begins (ignore - as first or last char) */
3276 else if (*s == '-' && s+1 < send && s != start) {
3278 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
3285 *d++ = (char) ILLEGAL_UTF8_BYTE; /* use illegal utf8 byte--see pmtrans */
3295 literal_endpoint = 0;
3296 native_range = TRUE;
3301 /* if we get here, we're not doing a transliteration */
3303 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3306 while (s1 >= start && *s1-- == '\\')
3309 in_charclass = TRUE;
3312 else if (*s == ']' && PL_lex_inpat && in_charclass) {
3315 while (s1 >= start && *s1-- == '\\')
3318 in_charclass = FALSE;
3321 /* skip for regexp comments /(?#comment)/, except for the last
3322 * char, which will be done separately.
3323 * Stop on (?{..}) and friends */
3325 else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3327 while (s+1 < send && *s != ')')
3330 else if (!PL_lex_casemods &&
3331 ( s[2] == '{' /* This should match regcomp.c */
3332 || (s[2] == '?' && s[3] == '{')))
3338 /* likewise skip #-initiated comments in //x patterns */
3339 else if (*s == '#' && PL_lex_inpat && !in_charclass &&
3340 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
3341 while (s+1 < send && *s != '\n')
3345 /* no further processing of single-quoted regex */
3346 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3347 goto default_action;
3349 /* check for embedded arrays
3350 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3352 else if (*s == '@' && s[1]) {
3353 if (isWORDCHAR_lazy_if(s+1,UTF))
3355 if (strchr(":'{$", s[1]))
3357 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3358 break; /* in regexp, neither @+ nor @- are interpolated */
3361 /* check for embedded scalars. only stop if we're sure it's a
3364 else if (*s == '$') {
3365 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
3367 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3369 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3370 "Possible unintended interpolation of $\\ in regex");
3372 break; /* in regexp, $ might be tail anchor */
3376 /* End of else if chain - OP_TRANS rejoin rest */
3379 if (*s == '\\' && s+1 < send) {
3380 char* e; /* Can be used for ending '}', etc. */
3384 /* warn on \1 - \9 in substitution replacements, but note that \11
3385 * is an octal; and \19 is \1 followed by '9' */
3386 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
3387 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
3389 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3394 /* string-change backslash escapes */
3395 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3399 /* In a pattern, process \N, but skip any other backslash escapes.
3400 * This is because we don't want to translate an escape sequence
3401 * into a meta symbol and have the regex compiler use the meta
3402 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3403 * in spite of this, we do have to process \N here while the proper
3404 * charnames handler is in scope. See bugs #56444 and #62056.
3405 * There is a complication because \N in a pattern may also stand
3406 * for 'match a non-nl', and not mean a charname, in which case its
3407 * processing should be deferred to the regex compiler. To be a
3408 * charname it must be followed immediately by a '{', and not look
3409 * like \N followed by a curly quantifier, i.e., not something like
3410 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3412 else if (PL_lex_inpat
3415 || regcurly(s + 1, FALSE)))
3418 goto default_action;
3423 /* quoted - in transliterations */
3425 if (PL_lex_inwhat == OP_TRANS) {
3432 if ((isALPHANUMERIC(*s)))
3433 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3434 "Unrecognized escape \\%c passed through",
3436 /* default action is to copy the quoted character */
3437 goto default_action;
3440 /* eg. \132 indicates the octal constant 0132 */
3441 case '0': case '1': case '2': case '3':
3442 case '4': case '5': case '6': case '7':
3444 I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3446 uv = grok_oct(s, &len, &flags, NULL);
3448 if (len < 3 && s < send && isDIGIT(*s)
3449 && ckWARN(WARN_MISC))
3451 Perl_warner(aTHX_ packWARN(WARN_MISC),
3452 "%s", form_short_octal_warning(s, len));
3455 goto NUM_ESCAPE_INSERT;
3457 /* eg. \o{24} indicates the octal constant \024 */
3462 bool valid = grok_bslash_o(&s, &uv, &error,
3463 TRUE, /* Output warning */
3464 FALSE, /* Not strict */
3465 TRUE, /* Output warnings for
3472 goto NUM_ESCAPE_INSERT;
3475 /* eg. \x24 indicates the hex constant 0x24 */
3480 bool valid = grok_bslash_x(&s, &uv, &error,
3481 TRUE, /* Output warning */
3482 FALSE, /* Not strict */
3483 TRUE, /* Output warnings for
3493 /* Insert oct or hex escaped character. There will always be
3494 * enough room in sv since such escapes will be longer than any
3495 * UTF-8 sequence they can end up as, except if they force us
3496 * to recode the rest of the string into utf8 */
3498 /* Here uv is the ordinal of the next character being added */
3499 if (!UVCHR_IS_INVARIANT(uv)) {
3500 if (!has_utf8 && uv > 255) {
3501 /* Might need to recode whatever we have accumulated so
3502 * far if it contains any chars variant in utf8 or
3505 SvCUR_set(sv, d - SvPVX_const(sv));
3508 /* See Note on sizing above. */
3509 sv_utf8_upgrade_flags_grow(sv,
3510 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3511 UNISKIP(uv) + (STRLEN)(send - s) + 1);
3512 d = SvPVX(sv) + SvCUR(sv);
3517 d = (char*)uvchr_to_utf8((U8*)d, uv);
3518 if (PL_lex_inwhat == OP_TRANS &&
3519 PL_sublex_info.sub_op) {
3520 PL_sublex_info.sub_op->op_private |=
3521 (PL_lex_repl ? OPpTRANS_FROM_UTF
3525 if (uv > 255 && !dorange)
3526 native_range = FALSE;
3539 /* In a non-pattern \N must be a named character, like \N{LATIN
3540 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3541 * mean to match a non-newline. For non-patterns, named
3542 * characters are converted to their string equivalents. In
3543 * patterns, named characters are not converted to their
3544 * ultimate forms for the same reasons that other escapes
3545 * aren't. Instead, they are converted to the \N{U+...} form
3546 * to get the value from the charnames that is in effect right
3547 * now, while preserving the fact that it was a named character
3548 * so that the regex compiler knows this */
3550 /* The structure of this section of code (besides checking for
3551 * errors and upgrading to utf8) is:
3552 * Further disambiguate between the two meanings of \N, and if
3553 * not a charname, go process it elsewhere
3554 * If of form \N{U+...}, pass it through if a pattern;
3555 * otherwise convert to utf8
3556 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3557 * pattern; otherwise convert to utf8 */
3559 /* Here, s points to the 'N'; the test below is guaranteed to
3560 * succeed if we are being called on a pattern as we already
3561 * know from a test above that the next character is a '{'.
3562 * On a non-pattern \N must mean 'named sequence, which
3563 * requires braces */
3566 yyerror("Missing braces on \\N{}");
3571 /* If there is no matching '}', it is an error. */
3572 if (! (e = strchr(s, '}'))) {
3573 if (! PL_lex_inpat) {
3574 yyerror("Missing right brace on \\N{}");
3576 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3581 /* Here it looks like a named character */
3583 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3584 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3585 | PERL_SCAN_DISALLOW_PREFIX;
3588 /* For \N{U+...}, the '...' is a unicode value even on
3589 * EBCDIC machines */
3590 s += 2; /* Skip to next char after the 'U+' */
3592 uv = grok_hex(s, &len, &flags, NULL);
3593 if (len == 0 || len != (STRLEN)(e - s)) {
3594 yyerror("Invalid hexadecimal number in \\N{U+...}");
3601 /* On non-EBCDIC platforms, pass through to the regex
3602 * compiler unchanged. The reason we evaluated the
3603 * number above is to make sure there wasn't a syntax
3604 * error. But on EBCDIC we convert to native so
3605 * downstream code can continue to assume it's native
3607 s -= 5; /* Include the '\N{U+' */
3609 d += my_snprintf(d, e - s + 1 + 1, /* includes the }
3612 (unsigned int) UNI_TO_NATIVE(uv));
3614 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3618 else { /* Not a pattern: convert the hex to string */
3620 /* If destination is not in utf8, unconditionally
3621 * recode it to be so. This is because \N{} implies
3622 * Unicode semantics, and scalars have to be in utf8
3623 * to guarantee those semantics */
3625 SvCUR_set(sv, d - SvPVX_const(sv));
3628 /* See Note on sizing above. */
3629 sv_utf8_upgrade_flags_grow(
3631 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3632 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3633 d = SvPVX(sv) + SvCUR(sv);
3637 /* Add the (Unicode) code point to the output. */
3638 if (UNI_IS_INVARIANT(uv)) {
3639 *d++ = (char) LATIN1_TO_NATIVE(uv);
3642 d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3646 else /* Here is \N{NAME} but not \N{U+...}. */
3647 if ((res = get_and_check_backslash_N_name(s, e)))
3650 const char *str = SvPV_const(res, len);
3653 if (! len) { /* The name resolved to an empty string */
3654 Copy("\\N{}", d, 4, char);
3658 /* In order to not lose information for the regex
3659 * compiler, pass the result in the specially made
3660 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3661 * the code points in hex of each character
3662 * returned by charnames */
3664 const char *str_end = str + len;
3665 const STRLEN off = d - SvPVX_const(sv);
3667 if (! SvUTF8(res)) {
3668 /* For the non-UTF-8 case, we can determine the
3669 * exact length needed without having to parse
3670 * through the string. Each character takes up
3671 * 2 hex digits plus either a trailing dot or
3673 d = off + SvGROW(sv, off
3675 + 6 /* For the "\N{U+", and
3677 + (STRLEN)(send - e));
3678 Copy("\\N{U+", d, 5, char);
3680 while (str < str_end) {
3682 my_snprintf(hex_string, sizeof(hex_string),
3683 "%02X.", (U8) *str);
3684 Copy(hex_string, d, 3, char);
3688 d--; /* We will overwrite below the final
3689 dot with a right brace */
3692 STRLEN char_length; /* cur char's byte length */
3694 /* and the number of bytes after this is
3695 * translated into hex digits */
3696 STRLEN output_length;
3698 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3699 * for max('U+', '.'); and 1 for NUL */
3700 char hex_string[2 * UTF8_MAXBYTES + 5];
3702 /* Get the first character of the result. */
3703 U32 uv = utf8n_to_uvchr((U8 *) str,
3707 /* Convert first code point to hex, including
3708 * the boiler plate before it. */
3710 my_snprintf(hex_string, sizeof(hex_string),
3714 /* Make sure there is enough space to hold it */
3715 d = off + SvGROW(sv, off
3717 + (STRLEN)(send - e)
3718 + 2); /* '}' + NUL */
3720 Copy(hex_string, d, output_length, char);
3723 /* For each subsequent character, append dot and
3724 * its ordinal in hex */
3725 while ((str += char_length) < str_end) {
3726 const STRLEN off = d - SvPVX_const(sv);
3727 U32 uv = utf8n_to_uvchr((U8 *) str,
3732 my_snprintf(hex_string,
3737 d = off + SvGROW(sv, off
3739 + (STRLEN)(send - e)
3740 + 2); /* '}' + NUL */
3741 Copy(hex_string, d, output_length, char);
3746 *d++ = '}'; /* Done. Add the trailing brace */
3749 else { /* Here, not in a pattern. Convert the name to a
3752 /* If destination is not in utf8, unconditionally
3753 * recode it to be so. This is because \N{} implies
3754 * Unicode semantics, and scalars have to be in utf8
3755 * to guarantee those semantics */
3757 SvCUR_set(sv, d - SvPVX_const(sv));
3760 /* See Note on sizing above. */
3761 sv_utf8_upgrade_flags_grow(sv,
3762 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3763 len + (STRLEN)(send - s) + 1);
3764 d = SvPVX(sv) + SvCUR(sv);
3766 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3768 /* See Note on sizing above. (NOTE: SvCUR() is not
3769 * set correctly here). */
3770 const STRLEN off = d - SvPVX_const(sv);
3771 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3773 Copy(str, d, len, char);
3779 } /* End \N{NAME} */
3782 native_range = FALSE; /* \N{} is defined to be Unicode */
3784 s = e + 1; /* Point to just after the '}' */
3787 /* \c is a control character */
3791 *d++ = grok_bslash_c(*s++, has_utf8, 1);
3794 yyerror("Missing control char name in \\c");
3798 /* printf-style backslashes, formfeeds, newlines, etc */
3815 *d++ = ASCII_TO_NATIVE('\033');
3824 } /* end if (backslash) */
3831 /* If we started with encoded form, or already know we want it,
3832 then encode the next character */
3833 if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3837 /* One might think that it is wasted effort in the case of the
3838 * source being utf8 (this_utf8 == TRUE) to take the next character
3839 * in the source, convert it to an unsigned value, and then convert
3840 * it back again. But the source has not been validated here. The
3841 * routine that does the conversion checks for errors like
3844 const UV nextuv = (this_utf8)
3845 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
3847 const STRLEN need = UNISKIP(nextuv);
3849 SvCUR_set(sv, d - SvPVX_const(sv));
3852 /* See Note on sizing above. */
3853 sv_utf8_upgrade_flags_grow(sv,
3854 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3855 need + (STRLEN)(send - s) + 1);
3856 d = SvPVX(sv) + SvCUR(sv);
3858 } else if (need > len) {
3859 /* encoded value larger than old, may need extra space (NOTE:
3860 * SvCUR() is not set correctly here). See Note on sizing
3862 const STRLEN off = d - SvPVX_const(sv);
3863 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3867 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3869 if (uv > 255 && !dorange)
3870 native_range = FALSE;
3876 } /* while loop to process each character */
3878 /* terminate the string and set up the sv */
3880 SvCUR_set(sv, d - SvPVX_const(sv));
3881 if (SvCUR(sv) >= SvLEN(sv))
3882 Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3883 " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3886 if (PL_encoding && !has_utf8) {
3887 sv_recode_to_utf8(sv, PL_encoding);
3893 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3894 PL_sublex_info.sub_op->op_private |=
3895 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3899 /* shrink the sv if we allocated more than we used */
3900 if (SvCUR(sv) + 5 < SvLEN(sv)) {
3901 SvPV_shrink_to_cur(sv);
3904 /* return the substring (via pl_yylval) only if we parsed anything */
3907 for (; s2 < s; s2++) {
3909 COPLINE_INC_WITH_HERELINES;
3911 SvREFCNT_inc_simple_void_NN(sv);
3912 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
3913 && ! PL_parser->lex_re_reparsing)
3915 const char *const key = PL_lex_inpat ? "qr" : "q";
3916 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3920 if (PL_lex_inwhat == OP_TRANS) {
3923 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3926 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3934 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3937 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3939 LEAVE_with_name("scan_const");
3944 * Returns TRUE if there's more to the expression (e.g., a subscript),
3947 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3949 * ->[ and ->{ return TRUE
3950 * ->$* ->@* ->@[ and ->@{ return TRUE if postfix_interpolate is enabled
3951 * { and [ outside a pattern are always subscripts, so return TRUE
3952 * if we're outside a pattern and it's not { or [, then return FALSE
3953 * if we're in a pattern and the first char is a {
3954 * {4,5} (any digits around the comma) returns FALSE
3955 * if we're in a pattern and the first char is a [
3957 * [SOMETHING] has a funky algorithm to decide whether it's a
3958 * character class or not. It has to deal with things like
3959 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3960 * anything else returns TRUE
3963 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3966 S_intuit_more(pTHX_ char *s)
3970 PERL_ARGS_ASSERT_INTUIT_MORE;
3972 if (PL_lex_brackets)
3974 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3976 if (*s == '-' && s[1] == '>'
3977 && FEATURE_POSTDEREF_QQ_IS_ENABLED
3978 && ( (s[2] == '$' && s[3] == '*')
3979 ||(s[2] == '@' && strchr("*[{",s[3])) ))
3981 if (*s != '{' && *s != '[')
3986 /* In a pattern, so maybe we have {n,m}. */
3988 if (regcurly(s, FALSE)) {
3994 /* On the other hand, maybe we have a character class */
3997 if (*s == ']' || *s == '^')
4000 /* this is terrifying, and it works */
4003 const char * const send = strchr(s,']');
4004 unsigned char un_char, last_un_char;
4005 char tmpbuf[sizeof PL_tokenbuf * 4];
4007 if (!send) /* has to be an expression */
4009 weight = 2; /* let's weigh the evidence */
4013 else if (isDIGIT(*s)) {
4015 if (isDIGIT(s[1]) && s[2] == ']')
4021 Zero(seen,256,char);
4023 for (; s < send; s++) {
4024 last_un_char = un_char;
4025 un_char = (unsigned char)*s;
4030 weight -= seen[un_char] * 10;
4031 if (isWORDCHAR_lazy_if(s+1,UTF)) {
4033 char *tmp = PL_bufend;
4034 PL_bufend = (char*)send;
4035 scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4037 len = (int)strlen(tmpbuf);
4038 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
4039 UTF ? SVf_UTF8 : 0, SVt_PV))
4044 else if (*s == '$' && s[1] &&
4045 strchr("[#!%*<>()-=",s[1])) {
4046 if (/*{*/ strchr("])} =",s[2]))
4055 if (strchr("wds]",s[1]))
4057 else if (seen[(U8)'\''] || seen[(U8)'"'])
4059 else if (strchr("rnftbxcav",s[1]))
4061 else if (isDIGIT(s[1])) {
4063 while (s[1] && isDIGIT(s[1]))
4073 if (strchr("aA01! ",last_un_char))
4075 if (strchr("zZ79~",s[1]))
4077 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4078 weight -= 5; /* cope with negative subscript */
4081 if (!isWORDCHAR(last_un_char)
4082 && !(last_un_char == '$' || last_un_char == '@'
4083 || last_un_char == '&')
4084 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4089 if (keyword(tmpbuf, d - tmpbuf, 0))
4092 if (un_char == last_un_char + 1)
4094 weight -= seen[un_char];
4099 if (weight >= 0) /* probably a character class */
4109 * Does all the checking to disambiguate
4111 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
4112 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4114 * First argument is the stuff after the first token, e.g. "bar".
4116 * Not a method if foo is a filehandle.
4117 * Not a method if foo is a subroutine prototyped to take a filehandle.
4118 * Not a method if it's really "Foo $bar"
4119 * Method if it's "foo $bar"
4120 * Not a method if it's really "print foo $bar"
4121 * Method if it's really "foo package::" (interpreted as package->foo)
4122 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4123 * Not a method if bar is a filehandle or package, but is quoted with
4128 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
4131 char *s = start + (*start == '$');
4132 char tmpbuf[sizeof PL_tokenbuf];
4139 PERL_ARGS_ASSERT_INTUIT_METHOD;
4141 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4143 if (cv && SvPOK(cv)) {
4144 const char *proto = CvPROTO(cv);
4146 while (*proto && (isSPACE(*proto) || *proto == ';'))
4153 if (*start == '$') {
4154 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
4155 isUPPER(*PL_tokenbuf))
4158 len = start - SvPVX(PL_linestr);
4162 start = SvPVX(PL_linestr) + len;
4166 return *s == '(' ? FUNCMETH : METHOD;
4169 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4170 /* start is the beginning of the possible filehandle/object,
4171 * and s is the end of it
4172 * tmpbuf is a copy of it (but with single quotes as double colons)
4175 if (!keyword(tmpbuf, len, 0)) {
4176 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4180 soff = s - SvPVX(PL_linestr);
4184 indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
4185 if (indirgv && GvCVu(indirgv))
4187 /* filehandle or package name makes it a method */
4188 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4190 soff = s - SvPVX(PL_linestr);
4193 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4194 return 0; /* no assumptions -- "=>" quotes bareword */
4196 start_force(PL_curforce);
4197 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
4198 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4199 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4201 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
4202 ( UTF ? SVf_UTF8 : 0 )));
4207 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
4209 return *s == '(' ? FUNCMETH : METHOD;
4215 /* Encoded script support. filter_add() effectively inserts a
4216 * 'pre-processing' function into the current source input stream.
4217 * Note that the filter function only applies to the current source file
4218 * (e.g., it will not affect files 'require'd or 'use'd by this one).
4220 * The datasv parameter (which may be NULL) can be used to pass
4221 * private data to this instance of the filter. The filter function
4222 * can recover the SV using the FILTER_DATA macro and use it to
4223 * store private buffers and state information.
4225 * The supplied datasv parameter is upgraded to a PVIO type
4226 * and the IoDIRP/IoANY field is used to store the function pointer,
4227 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4228 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4229 * private use must be set using malloc'd pointers.
4233 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4242 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4243 Perl_croak(aTHX_ "Source filters apply only to byte streams");
4245 if (!PL_rsfp_filters)
4246 PL_rsfp_filters = newAV();
4249 SvUPGRADE(datasv, SVt_PVIO);
4250 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4251 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4252 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4253 FPTR2DPTR(void *, IoANY(datasv)),
4254 SvPV_nolen(datasv)));
4255 av_unshift(PL_rsfp_filters, 1);
4256 av_store(PL_rsfp_filters, 0, datasv) ;
4258 !PL_parser->filtered
4259 && PL_parser->lex_flags & LEX_EVALBYTES
4260 && PL_bufptr < PL_bufend
4262 const char *s = PL_bufptr;
4263 while (s < PL_bufend) {
4265 SV *linestr = PL_parser->linestr;
4266 char *buf = SvPVX(linestr);
4267 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4268 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4269 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4270 STRLEN const linestart_pos = PL_parser->linestart - buf;
4271 STRLEN const last_uni_pos =
4272 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4273 STRLEN const last_lop_pos =
4274 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4275 av_push(PL_rsfp_filters, linestr);
4276 PL_parser->linestr =
4277 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4278 buf = SvPVX(PL_parser->linestr);
4279 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4280 PL_parser->bufptr = buf + bufptr_pos;
4281 PL_parser->oldbufptr = buf + oldbufptr_pos;
4282 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4283 PL_parser->linestart = buf + linestart_pos;
4284 if (PL_parser->last_uni)
4285 PL_parser->last_uni = buf + last_uni_pos;
4286 if (PL_parser->last_lop)
4287 PL_parser->last_lop = buf + last_lop_pos;
4288 SvLEN(linestr) = SvCUR(linestr);
4289 SvCUR(linestr) = s-SvPVX(linestr);
4290 PL_parser->filtered = 1;
4300 /* Delete most recently added instance of this filter function. */
4302 Perl_filter_del(pTHX_ filter_t funcp)
4307 PERL_ARGS_ASSERT_FILTER_DEL;
4310 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4311 FPTR2DPTR(void*, funcp)));
4313 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4315 /* if filter is on top of stack (usual case) just pop it off */
4316 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4317 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4318 sv_free(av_pop(PL_rsfp_filters));
4322 /* we need to search for the correct entry and clear it */
4323 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4327 /* Invoke the idxth filter function for the current rsfp. */
4328 /* maxlen 0 = read one text line */
4330 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4335 /* This API is bad. It should have been using unsigned int for maxlen.
4336 Not sure if we want to change the API, but if not we should sanity
4337 check the value here. */
4338 unsigned int correct_length = maxlen < 0 ? PERL_INT_MAX : maxlen;
4340 PERL_ARGS_ASSERT_FILTER_READ;
4342 if (!PL_parser || !PL_rsfp_filters)
4344 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
4345 /* Provide a default input filter to make life easy. */
4346 /* Note that we append to the line. This is handy. */
4347 DEBUG_P(PerlIO_printf(Perl_debug_log,
4348 "filter_read %d: from rsfp\n", idx));
4349 if (correct_length) {
4352 const int old_len = SvCUR(buf_sv);
4354 /* ensure buf_sv is large enough */
4355 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4356 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4357 correct_length)) <= 0) {
4358 if (PerlIO_error(PL_rsfp))
4359 return -1; /* error */
4361 return 0 ; /* end of file */
4363 SvCUR_set(buf_sv, old_len + len) ;
4364 SvPVX(buf_sv)[old_len + len] = '\0';
4367 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4368 if (PerlIO_error(PL_rsfp))
4369 return -1; /* error */
4371 return 0 ; /* end of file */
4374 return SvCUR(buf_sv);
4376 /* Skip this filter slot if filter has been deleted */
4377 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4378 DEBUG_P(PerlIO_printf(Perl_debug_log,
4379 "filter_read %d: skipped (filter deleted)\n",
4381 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4383 if (SvTYPE(datasv) != SVt_PVIO) {
4384 if (correct_length) {
4386 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4387 if (!remainder) return 0; /* eof */
4388 if (correct_length > remainder) correct_length = remainder;
4389 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4390 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4393 const char *s = SvEND(datasv);
4394 const char *send = SvPVX(datasv) + SvLEN(datasv);
4402 if (s == send) return 0; /* eof */
4403 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4404 SvCUR_set(datasv, s-SvPVX(datasv));
4406 return SvCUR(buf_sv);
4408 /* Get function pointer hidden within datasv */
4409 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4410 DEBUG_P(PerlIO_printf(Perl_debug_log,
4411 "filter_read %d: via function %p (%s)\n",
4412 idx, (void*)datasv, SvPV_nolen_const(datasv)));
4413 /* Call function. The function is expected to */
4414 /* call "FILTER_READ(idx+1, buf_sv)" first. */
4415 /* Return: <0:error, =0:eof, >0:not eof */
4416 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4420 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4424 PERL_ARGS_ASSERT_FILTER_GETS;
4426 #ifdef PERL_CR_FILTER
4427 if (!PL_rsfp_filters) {
4428 filter_add(S_cr_textfilter,NULL);
4431 if (PL_rsfp_filters) {
4433 SvCUR_set(sv, 0); /* start with empty line */
4434 if (FILTER_READ(0, sv, 0) > 0)
4435 return ( SvPVX(sv) ) ;
4440 return (sv_gets(sv, PL_rsfp, append));
4444 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4449 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4451 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4455 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4456 (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4458 return GvHV(gv); /* Foo:: */
4461 /* use constant CLASS => 'MyClass' */
4462 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4463 if (gv && GvCV(gv)) {
4464 SV * const sv = cv_const_sv(GvCV(gv));
4466 pkgname = SvPV_const(sv, len);
4469 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4473 * S_readpipe_override
4474 * Check whether readpipe() is overridden, and generates the appropriate
4475 * optree, provided sublex_start() is called afterwards.
4478 S_readpipe_override(pTHX)
4481 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
4482 pl_yylval.ival = OP_BACKTICK;
4484 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4486 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
4487 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
4488 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4490 COPLINE_SET_FROM_MULTI_END;
4491 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
4492 op_append_elem(OP_LIST,
4493 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4494 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4501 * The intent of this yylex wrapper is to minimize the changes to the
4502 * tokener when we aren't interested in collecting madprops. It remains
4503 * to be seen how successful this strategy will be...
4510 char *s = PL_bufptr;
4512 /* make sure PL_thiswhite is initialized */
4516 /* previous token ate up our whitespace? */
4517 if (!PL_lasttoke && PL_nextwhite) {
4518 PL_thiswhite = PL_nextwhite;
4522 /* isolate the token, and figure out where it is without whitespace */
4523 PL_realtokenstart = -1;
4527 assert(PL_curforce < 0);
4529 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
4530 if (!PL_thistoken) {
4531 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4532 PL_thistoken = newSVpvs("");
4534 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4535 PL_thistoken = newSVpvn(tstart, s - tstart);
4538 if (PL_thismad) /* install head */
4539 CURMAD('X', PL_thistoken);
4542 /* last whitespace of a sublex? */
4543 if (optype == ')' && PL_endwhite) {
4544 CURMAD('X', PL_endwhite);
4549 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
4550 if (!PL_thiswhite && !PL_endwhite && !optype) {
4551 sv_free(PL_thistoken);
4556 /* put off final whitespace till peg */
4557 if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
4558 PL_nextwhite = PL_thiswhite;
4561 else if (PL_thisopen) {
4562 CURMAD('q', PL_thisopen);
4564 sv_free(PL_thistoken);
4568 /* Store actual token text as madprop X */
4569 CURMAD('X', PL_thistoken);
4573 /* add preceding whitespace as madprop _ */
4574 CURMAD('_', PL_thiswhite);
4578 /* add quoted material as madprop = */
4579 CURMAD('=', PL_thisstuff);
4583 /* add terminating quote as madprop Q */
4584 CURMAD('Q', PL_thisclose);
4588 /* special processing based on optype */
4592 /* opval doesn't need a TOKEN since it can already store mp */
4602 if (pl_yylval.opval)
4603 append_madprops(PL_thismad, pl_yylval.opval, 0);
4611 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4624 /* remember any fake bracket that lexer is about to discard */
4625 if (PL_lex_brackets == 1 &&
4626 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4629 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4632 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4633 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4636 break; /* don't bother looking for trailing comment */
4645 /* attach a trailing comment to its statement instead of next token */
4649 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4651 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4653 if (*s == '\n' || *s == '#') {
4654 while (s < PL_bufend && *s != '\n')
4658 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4659 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4672 /* Create new token struct. Note: opvals return early above. */
4673 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4680 S_tokenize_use(pTHX_ int is_use, char *s) {
4683 PERL_ARGS_ASSERT_TOKENIZE_USE;
4685 if (PL_expect != XSTATE)
4686 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4687 is_use ? "use" : "no"));
4690 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4691 s = force_version(s, TRUE);
4692 if (*s == ';' || *s == '}'
4693 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4694 start_force(PL_curforce);
4695 NEXTVAL_NEXTTOKE.opval = NULL;
4698 else if (*s == 'v') {
4699 s = force_word(s,WORD,FALSE,TRUE);
4700 s = force_version(s, FALSE);
4704 s = force_word(s,WORD,FALSE,TRUE);
4705 s = force_version(s, FALSE);
4707 pl_yylval.ival = is_use;
4711 static const char* const exp_name[] =
4712 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4713 "ATTRTERM", "TERMBLOCK", "POSTDEREF", "TERMORDORDOR"
4717 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4719 S_word_takes_any_delimeter(char *p, STRLEN len)
4721 return (len == 1 && strchr("msyq", p[0])) ||
4723 (p[0] == 't' && p[1] == 'r') ||
4724 (p[0] == 'q' && strchr("qwxr", p[1]))));
4728 S_check_scalar_slice(pTHX_ char *s)
4731 while (*s == ' ' || *s == '\t') s++;
4732 if (*s == 'q' && s[1] == 'w'
4733 && !isWORDCHAR_lazy_if(s+2,UTF))
4735 while (*s && (isWORDCHAR_lazy_if(s,UTF) || strchr(" \t$#+-'\"", *s)))
4736 s += UTF ? UTF8SKIP(s) : 1;
4737 if (*s == '}' || *s == ']')
4738 pl_yylval.ival = OPpSLICEWARNING;
4744 Works out what to call the token just pulled out of the input
4745 stream. The yacc parser takes care of taking the ops we return and
4746 stitching them into a tree.
4749 The type of the next token
4752 Switch based on the current state:
4753 - if we already built the token before, use it
4754 - if we have a case modifier in a string, deal with that
4755 - handle other cases of interpolation inside a string
4756 - scan the next line if we are inside a format
4757 In the normal state switch on the next character:
4759 if alphabetic, go to key lookup
4760 unrecoginized character - croak
4761 - 0/4/26: handle end-of-line or EOF
4762 - cases for whitespace
4763 - \n and #: handle comments and line numbers
4764 - various operators, brackets and sigils
4767 - 'v': vstrings (or go to key lookup)
4768 - 'x' repetition operator (or go to key lookup)
4769 - other ASCII alphanumerics (key lookup begins here):
4772 scan built-in keyword (but do nothing with it yet)
4773 check for statement label
4774 check for lexical subs
4775 goto just_a_word if there is one
4776 see whether built-in keyword is overridden
4777 switch on keyword number:
4778 - default: just_a_word:
4779 not a built-in keyword; handle bareword lookup
4780 disambiguate between method and sub call
4781 fall back to bareword
4782 - cases for built-in keywords
4790 char *s = PL_bufptr;
4794 const bool saw_infix_sigil = PL_parser->saw_infix_sigil;
4798 /* orig_keyword, gvp, and gv are initialized here because
4799 * jump to the label just_a_word_zero can bypass their
4800 * initialization later. */
4801 I32 orig_keyword = 0;
4806 SV* tmp = newSVpvs("");
4807 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4808 (IV)CopLINE(PL_curcop),
4809 lex_state_names[PL_lex_state],
4810 exp_name[PL_expect],
4811 pv_display(tmp, s, strlen(s), 0, 60));
4815 switch (PL_lex_state) {
4817 case LEX_INTERPNORMAL:
4820 /* when we've already built the next token, just pull it out of the queue */
4824 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4826 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4827 PL_nexttoke[PL_lasttoke].next_mad = 0;
4828 if (PL_thismad && PL_thismad->mad_key == '_') {
4829 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4830 PL_thismad->mad_val = 0;
4831 mad_free(PL_thismad);
4836 PL_lex_state = PL_lex_defer;
4837 PL_expect = PL_lex_expect;
4838 PL_lex_defer = LEX_NORMAL;
4839 if (!PL_nexttoke[PL_lasttoke].next_type)
4844 pl_yylval = PL_nextval[PL_nexttoke];
4846 PL_lex_state = PL_lex_defer;
4847 PL_expect = PL_lex_expect;
4848 PL_lex_defer = LEX_NORMAL;
4854 next_type = PL_nexttoke[PL_lasttoke].next_type;
4856 next_type = PL_nexttype[PL_nexttoke];
4858 if (next_type & (7<<24)) {
4859 if (next_type & (1<<24)) {
4860 if (PL_lex_brackets > 100)
4861 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4862 PL_lex_brackstack[PL_lex_brackets++] =
4863 (char) ((next_type >> 16) & 0xff);
4865 if (next_type & (2<<24))
4866 PL_lex_allbrackets++;
4867 if (next_type & (4<<24))
4868 PL_lex_allbrackets--;
4869 next_type &= 0xffff;
4871 return REPORT(next_type == 'p' ? pending_ident() : next_type);
4874 /* interpolated case modifiers like \L \U, including \Q and \E.
4875 when we get here, PL_bufptr is at the \
4877 case LEX_INTERPCASEMOD:
4879 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4881 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4882 PL_bufptr, PL_bufend, *PL_bufptr);
4884 /* handle \E or end of string */
4885 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4887 if (PL_lex_casemods) {
4888 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4889 PL_lex_casestack[PL_lex_casemods] = '\0';
4891 if (PL_bufptr != PL_bufend
4892 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4893 || oldmod == 'F')) {
4895 PL_lex_state = LEX_INTERPCONCAT;
4898 PL_thistoken = newSVpvs("\\E");
4901 PL_lex_allbrackets--;
4904 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4905 /* Got an unpaired \E */
4906 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4907 "Useless use of \\E");
4910 while (PL_bufptr != PL_bufend &&
4911 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4914 PL_thiswhite = newSVpvs("");
4915 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4920 if (PL_bufptr != PL_bufend)
4923 PL_lex_state = LEX_INTERPCONCAT;
4927 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4928 "### Saw case modifier\n"); });
4930 if (s[1] == '\\' && s[2] == 'E') {
4934 PL_thiswhite = newSVpvs("");
4935 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4939 PL_lex_state = LEX_INTERPCONCAT;
4944 if (!PL_madskills) /* when just compiling don't need correct */
4945 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4946 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
4947 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4948 (strchr(PL_lex_casestack, 'L')
4949 || strchr(PL_lex_casestack, 'U')
4950 || strchr(PL_lex_casestack, 'F'))) {
4951 PL_lex_casestack[--PL_lex_casemods] = '\0';
4952 PL_lex_allbrackets--;
4955 if (PL_lex_casemods > 10)
4956 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4957 PL_lex_casestack[PL_lex_casemods++] = *s;
4958 PL_lex_casestack[PL_lex_casemods] = '\0';
4959 PL_lex_state = LEX_INTERPCONCAT;
4960 start_force(PL_curforce);
4961 NEXTVAL_NEXTTOKE.ival = 0;
4962 force_next((2<<24)|'(');
4963 start_force(PL_curforce);
4965 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4967 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4969 NEXTVAL_NEXTTOKE.ival = OP_LC;
4971 NEXTVAL_NEXTTOKE.ival = OP_UC;
4973 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4975 NEXTVAL_NEXTTOKE.ival = OP_FC;
4977 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4979 SV* const tmpsv = newSVpvs("\\ ");
4980 /* replace the space with the character we want to escape
4982 SvPVX(tmpsv)[1] = *s;
4988 if (PL_lex_starts) {
4994 sv_free(PL_thistoken);
4995 PL_thistoken = newSVpvs("");
4998 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4999 if (PL_lex_casemods == 1 && PL_lex_inpat)
5008 case LEX_INTERPPUSH:
5009 return REPORT(sublex_push());
5011 case LEX_INTERPSTART:
5012 if (PL_bufptr == PL_bufend)
5013 return REPORT(sublex_done());
5014 DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
5015 "### Interpolated variable\n"); });
5017 /* for /@a/, we leave the joining for the regex engine to do
5018 * (unless we're within \Q etc) */
5019 PL_lex_dojoin = (*PL_bufptr == '@'
5020 && (!PL_lex_inpat || PL_lex_casemods));
5021 PL_lex_state = LEX_INTERPNORMAL;
5022 if (PL_lex_dojoin) {
5023 start_force(PL_curforce);
5024 NEXTVAL_NEXTTOKE.ival = 0;
5026 start_force(PL_curforce);
5027 force_ident("\"", '$');
5028 start_force(PL_curforce);
5029 NEXTVAL_NEXTTOKE.ival = 0;
5031 start_force(PL_curforce);
5032 NEXTVAL_NEXTTOKE.ival = 0;
5033 force_next((2<<24)|'(');
5034 start_force(PL_curforce);
5035 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
5038 /* Convert (?{...}) and friends to 'do {...}' */
5039 if (PL_lex_inpat && *PL_bufptr == '(') {
5040 PL_parser->lex_shared->re_eval_start = PL_bufptr;
5042 if (*PL_bufptr != '{')
5044 start_force(PL_curforce);
5045 /* XXX probably need a CURMAD(something) here */
5046 PL_expect = XTERMBLOCK;
5050 if (PL_lex_starts++) {
5055 sv_free(PL_thistoken);
5056 PL_thistoken = newSVpvs("");
5059 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5060 if (!PL_lex_casemods && PL_lex_inpat)
5067 case LEX_INTERPENDMAYBE:
5068 if (intuit_more(PL_bufptr)) {
5069 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
5075 if (PL_lex_dojoin) {
5076 const U8 dojoin_was = PL_lex_dojoin;
5077 PL_lex_dojoin = FALSE;
5078 PL_lex_state = LEX_INTERPCONCAT;
5082 sv_free(PL_thistoken);
5083 PL_thistoken = newSVpvs("");
5086 PL_lex_allbrackets--;
5087 return REPORT(dojoin_was == 1 ? ')' : POSTJOIN);
5089 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
5090 && SvEVALED(PL_lex_repl))
5092 if (PL_bufptr != PL_bufend)
5093 Perl_croak(aTHX_ "Bad evalled substitution pattern");
5096 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
5097 re_eval_str. If the here-doc body’s length equals the previous
5098 value of re_eval_start, re_eval_start will now be null. So
5099 check re_eval_str as well. */
5100 if (PL_parser->lex_shared->re_eval_start
5101 || PL_parser->lex_shared->re_eval_str) {
5103 if (*PL_bufptr != ')')
5104 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
5106 /* having compiled a (?{..}) expression, return the original
5107 * text too, as a const */
5108 if (PL_parser->lex_shared->re_eval_str) {
5109 sv = PL_parser->lex_shared->re_eval_str;
5110 PL_parser->lex_shared->re_eval_str = NULL;
5112 PL_bufptr - PL_parser->lex_shared->re_eval_start);
5113 SvPV_shrink_to_cur(sv);
5115 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
5116 PL_bufptr - PL_parser->lex_shared->re_eval_start);
5117 start_force(PL_curforce);
5118 /* XXX probably need a CURMAD(something) here */
5119 NEXTVAL_NEXTTOKE.opval =
5120 (OP*)newSVOP(OP_CONST, 0,
5123 PL_parser->lex_shared->re_eval_start = NULL;
5129 case LEX_INTERPCONCAT:
5131 if (PL_lex_brackets)
5132 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
5133 (long) PL_lex_brackets);
5135 if (PL_bufptr == PL_bufend)
5136 return REPORT(sublex_done());
5138 /* m'foo' still needs to be parsed for possible (?{...}) */
5139 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
5140 SV *sv = newSVsv(PL_linestr);
5142 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5146 s = scan_const(PL_bufptr);
5148 PL_lex_state = LEX_INTERPCASEMOD;
5150 PL_lex_state = LEX_INTERPSTART;
5153 if (s != PL_bufptr) {
5154 start_force(PL_curforce);
5156 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
5158 NEXTVAL_NEXTTOKE = pl_yylval;
5161 if (PL_lex_starts++) {
5165 sv_free(PL_thistoken);
5166 PL_thistoken = newSVpvs("");
5169 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5170 if (!PL_lex_casemods && PL_lex_inpat)
5183 s = scan_formline(PL_bufptr);
5184 if (!PL_lex_formbrack)
5193 /* We really do *not* want PL_linestr ever becoming a COW. */
5194 assert (!SvIsCOW(PL_linestr));
5196 PL_oldoldbufptr = PL_oldbufptr;
5198 PL_parser->saw_infix_sigil = 0;
5203 sv_free(PL_thistoken);
5206 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5210 if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
5213 SV *dsv = newSVpvs_flags("", SVs_TEMP);
5214 const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s,
5216 SVs_TEMP | SVf_UTF8),
5217 10, UNI_DISPLAY_ISPRINT)
5218 : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
5219 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
5220 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
5221 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
5225 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %"UTF8f"<-- HERE near column %d", c,
5226 UTF8fARG(UTF, (s - d), d),
5231 goto fake_eof; /* emulate EOF on ^D or ^Z */
5237 if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
5240 if (PL_lex_brackets &&
5241 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
5242 yyerror((const char *)
5244 ? "Format not terminated"
5245 : "Missing right curly or square bracket"));
5247 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5248 "### Tokener got EOF\n");
5252 if (s++ < PL_bufend)
5253 goto retry; /* ignore stray nulls */
5256 if (!PL_in_eval && !PL_preambled) {
5257 PL_preambled = TRUE;
5263 /* Generate a string of Perl code to load the debugger.
5264 * If PERL5DB is set, it will return the contents of that,
5265 * otherwise a compile-time require of perl5db.pl. */
5267 const char * const pdb = PerlEnv_getenv("PERL5DB");
5270 sv_setpv(PL_linestr, pdb);
5271 sv_catpvs(PL_linestr,";");
5273 SETERRNO(0,SS_NORMAL);
5274 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
5276 PL_parser->preambling = CopLINE(PL_curcop);
5278 sv_setpvs(PL_linestr,"");
5279 if (PL_preambleav) {
5280 SV **svp = AvARRAY(PL_preambleav);
5281 SV **const end = svp + AvFILLp(PL_preambleav);
5283 sv_catsv(PL_linestr, *svp);
5285 sv_catpvs(PL_linestr, ";");
5287 sv_free(MUTABLE_SV(PL_preambleav));
5288 PL_preambleav = NULL;
5291 sv_catpvs(PL_linestr,
5292 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
5293 if (PL_minus_n || PL_minus_p) {
5294 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
5296 sv_catpvs(PL_linestr,"chomp;");
5299 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
5300 || *PL_splitstr == '"')
5301 && strchr(PL_splitstr + 1, *PL_splitstr))
5302 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
5304 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
5305 bytes can be used as quoting characters. :-) */
5306 const char *splits = PL_splitstr;
5307 sv_catpvs(PL_linestr, "our @F=split(q\0");
5310 if (*splits == '\\')
5311 sv_catpvn(PL_linestr, splits, 1);
5312 sv_catpvn(PL_linestr, splits, 1);
5313 } while (*splits++);
5314 /* This loop will embed the trailing NUL of
5315 PL_linestr as the last thing it does before
5317 sv_catpvs(PL_linestr, ");");
5321 sv_catpvs(PL_linestr,"our @F=split(' ');");
5324 sv_catpvs(PL_linestr, "\n");
5325 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5326 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5327 PL_last_lop = PL_last_uni = NULL;
5328 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5329 update_debugger_info(PL_linestr, NULL, 0);
5334 bof = PL_rsfp ? TRUE : FALSE;
5337 fake_eof = LEX_FAKE_EOF;
5339 PL_bufptr = PL_bufend;
5340 COPLINE_INC_WITH_HERELINES;
5341 if (!lex_next_chunk(fake_eof)) {
5342 CopLINE_dec(PL_curcop);
5344 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
5346 CopLINE_dec(PL_curcop);
5349 PL_realtokenstart = -1;
5352 /* If it looks like the start of a BOM or raw UTF-16,
5353 * check if it in fact is. */
5354 if (bof && PL_rsfp &&
5356 *(U8*)s == BOM_UTF8_FIRST_BYTE ||
5359 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
5360 bof = (offset == (Off_t)SvCUR(PL_linestr));
5361 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5362 /* offset may include swallowed CR */
5364 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
5367 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5368 s = swallow_bom((U8*)s);
5371 if (PL_parser->in_pod) {
5372 /* Incest with pod. */
5375 sv_catsv(PL_thiswhite, PL_linestr);
5377 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
5378 sv_setpvs(PL_linestr, "");
5379 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5380 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5381 PL_last_lop = PL_last_uni = NULL;
5382 PL_parser->in_pod = 0;
5385 if (PL_rsfp || PL_parser->filtered)
5387 } while (PL_parser->in_pod);
5388 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5389 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5390 PL_last_lop = PL_last_uni = NULL;
5391 if (CopLINE(PL_curcop) == 1) {
5392 while (s < PL_bufend && isSPACE(*s))
5394 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5398 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5402 if (*s == '#' && *(s+1) == '!')
5404 #ifdef ALTERNATE_SHEBANG
5406 static char const as[] = ALTERNATE_SHEBANG;
5407 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5408 d = s + (sizeof(as) - 1);
5410 #endif /* ALTERNATE_SHEBANG */
5419 while (*d && !isSPACE(*d))
5423 #ifdef ARG_ZERO_IS_SCRIPT
5424 if (ipathend > ipath) {
5426 * HP-UX (at least) sets argv[0] to the script name,
5427 * which makes $^X incorrect. And Digital UNIX and Linux,
5428 * at least, set argv[0] to the basename of the Perl
5429 * interpreter. So, having found "#!", we'll set it right.
5431 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5433 assert(SvPOK(x) || SvGMAGICAL(x));
5434 if (sv_eq(x, CopFILESV(PL_curcop))) {
5435 sv_setpvn(x, ipath, ipathend - ipath);
5441 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
5442 const char * const lstart = SvPV_const(x,llen);
5444 bstart += blen - llen;
5445 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5446 sv_setpvn(x, ipath, ipathend - ipath);
5451 TAINT_NOT; /* $^X is always tainted, but that's OK */
5453 #endif /* ARG_ZERO_IS_SCRIPT */
5458 d = instr(s,"perl -");
5460 d = instr(s,"perl");
5462 /* avoid getting into infinite loops when shebang
5463 * line contains "Perl" rather than "perl" */
5465 for (d = ipathend-4; d >= ipath; --d) {
5466 if ((*d == 'p' || *d == 'P')
5467 && !ibcmp(d, "perl", 4))
5477 #ifdef ALTERNATE_SHEBANG
5479 * If the ALTERNATE_SHEBANG on this system starts with a
5480 * character that can be part of a Perl expression, then if
5481 * we see it but not "perl", we're probably looking at the
5482 * start of Perl code, not a request to hand off to some
5483 * other interpreter. Similarly, if "perl" is there, but
5484 * not in the first 'word' of the line, we assume the line
5485 * contains the start of the Perl program.
5487 if (d && *s != '#') {
5488 const char *c = ipath;
5489 while (*c && !strchr("; \t\r\n\f\v#", *c))
5492 d = NULL; /* "perl" not in first word; ignore */
5494 *s = '#'; /* Don't try to parse shebang line */
5496 #endif /* ALTERNATE_SHEBANG */
5501 !instr(s,"indir") &&
5502 instr(PL_origargv[0],"perl"))
5509 while (s < PL_bufend && isSPACE(*s))
5511 if (s < PL_bufend) {
5512 Newx(newargv,PL_origargc+3,char*);
5514 while (s < PL_bufend && !isSPACE(*s))
5517 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5520 newargv = PL_origargv;
5523 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5525 Perl_croak(aTHX_ "Can't exec %s", ipath);
5528 while (*d && !isSPACE(*d))
5530 while (SPACE_OR_TAB(*d))
5534 const bool switches_done = PL_doswitches;
5535 const U32 oldpdb = PL_perldb;
5536 const bool oldn = PL_minus_n;
5537 const bool oldp = PL_minus_p;
5541 bool baduni = FALSE;
5543 const char *d2 = d1 + 1;
5544 if (parse_unicode_opts((const char **)&d2)
5548 if (baduni || *d1 == 'M' || *d1 == 'm') {
5549 const char * const m = d1;
5550 while (*d1 && !isSPACE(*d1))
5552 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5555 d1 = moreswitches(d1);
5557 if (PL_doswitches && !switches_done) {
5558 int argc = PL_origargc;
5559 char **argv = PL_origargv;
5562 } while (argc && argv[0][0] == '-' && argv[0][1]);
5563 init_argv_symbols(argc,argv);
5565 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5566 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5567 /* if we have already added "LINE: while (<>) {",
5568 we must not do it again */
5570 sv_setpvs(PL_linestr, "");
5571 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5572 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5573 PL_last_lop = PL_last_uni = NULL;
5574 PL_preambled = FALSE;
5575 if (PERLDB_LINE || PERLDB_SAVESRC)
5576 (void)gv_fetchfile(PL_origfilename);
5583 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5584 PL_lex_state = LEX_FORMLINE;
5585 start_force(PL_curforce);
5586 NEXTVAL_NEXTTOKE.ival = 0;
5587 force_next(FORMRBRACK);
5592 #ifdef PERL_STRICT_CR
5593 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5595 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5597 case ' ': case '\t': case '\f': case 013:
5599 PL_realtokenstart = -1;
5602 PL_thiswhite = newSVpvs("");
5603 sv_catpvn(PL_thiswhite, s, 1);
5611 PL_realtokenstart = -1;
5615 if (PL_lex_state != LEX_NORMAL ||
5616 (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5617 if (*s == '#' && s == PL_linestart && PL_in_eval
5618 && !PL_rsfp && !PL_parser->filtered) {
5619 /* handle eval qq[#line 1 "foo"\n ...] */
5620 CopLINE_dec(PL_curcop);
5623 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5625 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5629 const bool in_comment = *s == '#';
5631 while (d < PL_bufend && *d != '\n')
5635 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5636 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5640 PL_thiswhite = newSVpvn(s, d - s);
5643 if (in_comment && d == PL_bufend
5644 && PL_lex_state == LEX_INTERPNORMAL
5645 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5646 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5649 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5650 PL_lex_state = LEX_FORMLINE;
5651 start_force(PL_curforce);
5652 NEXTVAL_NEXTTOKE.ival = 0;
5653 force_next(FORMRBRACK);
5659 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5660 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5663 TOKEN(PEG); /* make sure any #! line is accessible */
5669 if (PL_madskills) d = s;
5670 while (s < PL_bufend && *s != '\n')
5678 else if (s > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5679 Perl_croak(aTHX_ "panic: input overflow");
5681 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5683 PL_thiswhite = newSVpvs("");
5684 if (CopLINE(PL_curcop) == 1) {
5685 sv_setpvs(PL_thiswhite, "");
5688 sv_catpvn(PL_thiswhite, d, s - d);
5695 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5703 while (s < PL_bufend && SPACE_OR_TAB(*s))
5706 if (strnEQ(s,"=>",2)) {
5707 s = force_word(PL_bufptr,WORD,FALSE,FALSE);
5708 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5709 OPERATOR('-'); /* unary minus */
5711 PL_last_uni = PL_oldbufptr;
5713 case 'r': ftst = OP_FTEREAD; break;
5714 case 'w': ftst = OP_FTEWRITE; break;
5715 case 'x': ftst = OP_FTEEXEC; break;
5716 case 'o': ftst = OP_FTEOWNED; break;
5717 case 'R': ftst = OP_FTRREAD; break;
5718 case 'W': ftst = OP_FTRWRITE; break;
5719 case 'X': ftst = OP_FTREXEC; break;
5720 case 'O': ftst = OP_FTROWNED; break;
5721 case 'e': ftst = OP_FTIS; break;
5722 case 'z': ftst = OP_FTZERO; break;
5723 case 's': ftst = OP_FTSIZE; break;
5724 case 'f': ftst = OP_FTFILE; break;
5725 case 'd': ftst = OP_FTDIR; break;
5726 case 'l': ftst = OP_FTLINK; break;
5727 case 'p': ftst = OP_FTPIPE; break;
5728 case 'S': ftst = OP_FTSOCK; break;
5729 case 'u': ftst = OP_FTSUID; break;
5730 case 'g': ftst = OP_FTSGID; break;
5731 case 'k': ftst = OP_FTSVTX; break;
5732 case 'b': ftst = OP_FTBLK; break;
5733 case 'c': ftst = OP_FTCHR; break;
5734 case 't': ftst = OP_FTTTY; break;
5735 case 'T': ftst = OP_FTTEXT; break;
5736 case 'B': ftst = OP_FTBINARY; break;
5737 case 'M': case 'A': case 'C':
5738 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5740 case 'M': ftst = OP_FTMTIME; break;
5741 case 'A': ftst = OP_FTATIME; break;
5742 case 'C': ftst = OP_FTCTIME; break;
5750 PL_last_lop_op = (OPCODE)ftst;
5751 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5752 "### Saw file test %c\n", (int)tmp);
5757 /* Assume it was a minus followed by a one-letter named
5758 * subroutine call (or a -bareword), then. */
5759 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5760 "### '-%c' looked like a file test but was not\n",
5767 const char tmp = *s++;
5770 if (PL_expect == XOPERATOR)
5775 else if (*s == '>') {
5778 if (FEATURE_POSTDEREF_IS_ENABLED && (
5779 ((*s == '$' || *s == '&') && s[1] == '*')
5780 ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
5781 ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5784 Perl_ck_warner_d(aTHX_
5785 packWARN(WARN_EXPERIMENTAL__POSTDEREF),
5786 "Postfix dereference is experimental"
5788 PL_expect = XPOSTDEREF;
5791 if (isIDFIRST_lazy_if(s,UTF)) {
5792 s = force_word(s,METHOD,FALSE,TRUE);
5800 if (PL_expect == XOPERATOR) {
5801 if (*s == '=' && !PL_lex_allbrackets &&
5802 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5809 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5811 OPERATOR('-'); /* unary minus */
5817 const char tmp = *s++;
5820 if (PL_expect == XOPERATOR)
5825 if (PL_expect == XOPERATOR) {
5826 if (*s == '=' && !PL_lex_allbrackets &&
5827 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5834 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5841 if (PL_expect == XPOSTDEREF) POSTDEREF('*');
5842 if (PL_expect != XOPERATOR) {
5843 s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5844 PL_expect = XOPERATOR;
5845 force_ident(PL_tokenbuf, '*');
5853 if (*s == '=' && !PL_lex_allbrackets &&
5854 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5860 if (*s == '=' && !PL_lex_allbrackets &&
5861 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5865 PL_parser->saw_infix_sigil = 1;
5870 if (PL_expect == XOPERATOR) {
5871 if (s[1] == '=' && !PL_lex_allbrackets &&
5872 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5875 PL_parser->saw_infix_sigil = 1;
5878 else if (PL_expect == XPOSTDEREF) POSTDEREF('%');
5879 PL_tokenbuf[0] = '%';
5880 s = scan_ident(s, PL_tokenbuf + 1,
5881 sizeof PL_tokenbuf - 1, FALSE);
5883 if (!PL_tokenbuf[1]) {
5886 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5888 PL_tokenbuf[0] = '@';
5890 /* Warn about % where they meant $. */
5891 if (*s == '[' || *s == '{') {
5892 if (ckWARN(WARN_SYNTAX)) {
5893 S_check_scalar_slice(aTHX_ s);
5897 PL_expect = XOPERATOR;
5898 force_ident_maybe_lex('%');
5902 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5903 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5908 if (PL_lex_brackets > 100)
5909 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5910 PL_lex_brackstack[PL_lex_brackets++] = 0;
5911 PL_lex_allbrackets++;
5913 const char tmp = *s++;
5918 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5920 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5923 Perl_ck_warner_d(aTHX_
5924 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5925 "Smartmatch is experimental");
5931 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5938 goto just_a_word_zero_gv;
5941 switch (PL_expect) {
5947 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5949 PL_bufptr = s; /* update in case we back off */
5952 "Use of := for an empty attribute list is not allowed");
5959 PL_expect = XTERMBLOCK;
5962 stuffstart = s - SvPVX(PL_linestr) - 1;
5966 while (isIDFIRST_lazy_if(s,UTF)) {
5969 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5970 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5971 if (tmp < 0) tmp = -tmp;
5986 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5988 d = scan_str(d,TRUE,TRUE,FALSE, FALSE);
5989 COPLINE_SET_FROM_MULTI_END;
5991 /* MUST advance bufptr here to avoid bogus
5992 "at end of line" context messages from yyerror().
5994 PL_bufptr = s + len;
5995 yyerror("Unterminated attribute parameter in attribute list");
5999 return REPORT(0); /* EOF indicator */
6003 sv_catsv(sv, PL_lex_stuff);
6004 attrs = op_append_elem(OP_LIST, attrs,
6005 newSVOP(OP_CONST, 0, sv));
6006 SvREFCNT_dec(PL_lex_stuff);
6007 PL_lex_stuff = NULL;
6010 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
6012 if (PL_in_my == KEY_our) {
6013 deprecate(":unique");
6016 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
6019 /* NOTE: any CV attrs applied here need to be part of
6020 the CVf_BUILTIN_ATTRS define in cv.h! */
6021 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
6023 CvLVALUE_on(PL_compcv);
6025 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
6027 deprecate(":locked");
6029 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
6031 CvMETHOD_on(PL_compcv);
6033 /* After we've set the flags, it could be argued that
6034 we don't need to do the attributes.pm-based setting
6035 process, and shouldn't bother appending recognized
6036 flags. To experiment with that, uncomment the
6037 following "else". (Note that's already been
6038 uncommented. That keeps the above-applied built-in
6039 attributes from being intercepted (and possibly
6040 rejected) by a package's attribute routines, but is
6041 justified by the performance win for the common case
6042 of applying only built-in attributes.) */
6044 attrs = op_append_elem(OP_LIST, attrs,
6045 newSVOP(OP_CONST, 0,
6049 if (*s == ':' && s[1] != ':')
6052 break; /* require real whitespace or :'s */
6053 /* XXX losing whitespace on sequential attributes here */
6057 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
6058 if (*s != ';' && *s != '}' && *s != tmp
6059 && (tmp != '=' || *s != ')')) {
6060 const char q = ((*s == '\'') ? '"' : '\'');
6061 /* If here for an expression, and parsed no attrs, back
6063 if (tmp == '=' && !attrs) {
6067 /* MUST advance bufptr here to avoid bogus "at end of line"
6068 context messages from yyerror().
6071 yyerror( (const char *)
6073 ? Perl_form(aTHX_ "Invalid separator character "
6074 "%c%c%c in attribute list", q, *s, q)
6075 : "Unterminated attribute list" ) );
6083 start_force(PL_curforce);
6084 NEXTVAL_NEXTTOKE.opval = attrs;
6085 CURMAD('_', PL_nextwhite);
6090 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
6091 (s - SvPVX(PL_linestr)) - stuffstart);
6096 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
6100 PL_lex_allbrackets--;
6104 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6105 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
6109 PL_lex_allbrackets++;
6112 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6118 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6121 PL_lex_allbrackets--;
6127 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6130 if (PL_lex_brackets <= 0)
6131 yyerror("Unmatched right square bracket");
6134 PL_lex_allbrackets--;
6135 if (PL_lex_state == LEX_INTERPNORMAL) {
6136 if (PL_lex_brackets == 0) {
6137 if (*s == '-' && s[1] == '>')
6138 PL_lex_state = LEX_INTERPENDMAYBE;
6139 else if (*s != '[' && *s != '{')
6140 PL_lex_state = LEX_INTERPEND;
6147 if (PL_lex_brackets > 100) {
6148 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6150 switch (PL_expect) {
6152 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6153 PL_lex_allbrackets++;
6154 OPERATOR(HASHBRACK);
6156 while (s < PL_bufend && SPACE_OR_TAB(*s))
6159 PL_tokenbuf[0] = '\0';
6160 if (d < PL_bufend && *d == '-') {
6161 PL_tokenbuf[0] = '-';
6163 while (d < PL_bufend && SPACE_OR_TAB(*d))
6166 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
6167 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6169 while (d < PL_bufend && SPACE_OR_TAB(*d))
6172 const char minus = (PL_tokenbuf[0] == '-');
6173 s = force_word(s + minus, WORD, FALSE, TRUE);
6181 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6182 PL_lex_allbrackets++;
6187 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6188 PL_lex_allbrackets++;
6193 if (PL_oldoldbufptr == PL_last_lop)
6194 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6196 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6197 PL_lex_allbrackets++;
6200 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6202 /* This hack is to get the ${} in the message. */
6204 yyerror("syntax error");
6207 OPERATOR(HASHBRACK);
6209 /* This hack serves to disambiguate a pair of curlies
6210 * as being a block or an anon hash. Normally, expectation
6211 * determines that, but in cases where we're not in a
6212 * position to expect anything in particular (like inside
6213 * eval"") we have to resolve the ambiguity. This code
6214 * covers the case where the first term in the curlies is a
6215 * quoted string. Most other cases need to be explicitly
6216 * disambiguated by prepending a "+" before the opening
6217 * curly in order to force resolution as an anon hash.
6219 * XXX should probably propagate the outer expectation
6220 * into eval"" to rely less on this hack, but that could
6221 * potentially break current behavior of eval"".
6225 if (*s == '\'' || *s == '"' || *s == '`') {
6226 /* common case: get past first string, handling escapes */
6227 for (t++; t < PL_bufend && *t != *s;)
6228 if (*t++ == '\\' && (*t == '\\' || *t == *s))
6232 else if (*s == 'q') {
6235 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6236 && !isWORDCHAR(*t))))
6238 /* skip q//-like construct */
6240 char open, close, term;
6243 while (t < PL_bufend && isSPACE(*t))
6245 /* check for q => */
6246 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6247 OPERATOR(HASHBRACK);
6251 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
6255 for (t++; t < PL_bufend; t++) {
6256 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6258 else if (*t == open)
6262 for (t++; t < PL_bufend; t++) {
6263 if (*t == '\\' && t+1 < PL_bufend)
6265 else if (*t == close && --brackets <= 0)
6267 else if (*t == open)
6274 /* skip plain q word */
6275 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
6278 else if (isWORDCHAR_lazy_if(t,UTF)) {
6280 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
6283 while (t < PL_bufend && isSPACE(*t))
6285 /* if comma follows first term, call it an anon hash */
6286 /* XXX it could be a comma expression with loop modifiers */
6287 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6288 || (*t == '=' && t[1] == '>')))
6289 OPERATOR(HASHBRACK);
6290 if (PL_expect == XREF)
6293 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6299 pl_yylval.ival = CopLINE(PL_curcop);
6300 if (isSPACE(*s) || *s == '#')
6301 PL_copline = NOLINE; /* invalidate current command line number */
6302 TOKEN(formbrack ? '=' : '{');
6304 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6308 if (PL_lex_brackets <= 0)
6309 yyerror("Unmatched right curly bracket");
6311 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6312 PL_lex_allbrackets--;
6313 if (PL_lex_state == LEX_INTERPNORMAL) {
6314 if (PL_lex_brackets == 0) {
6315 if (PL_expect & XFAKEBRACK) {
6316 PL_expect &= XENUMMASK;
6317 PL_lex_state = LEX_INTERPEND;
6322 PL_thiswhite = newSVpvs("");
6323 sv_catpvs(PL_thiswhite,"}");
6326 return yylex(); /* ignore fake brackets */
6328 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6329 && SvEVALED(PL_lex_repl))
6330 PL_lex_state = LEX_INTERPEND;
6331 else if (*s == '-' && s[1] == '>')
6332 PL_lex_state = LEX_INTERPENDMAYBE;
6333 else if (*s != '[' && *s != '{')
6334 PL_lex_state = LEX_INTERPEND;
6337 if (PL_expect & XFAKEBRACK) {
6338 PL_expect &= XENUMMASK;
6340 return yylex(); /* ignore fake brackets */
6342 start_force(PL_curforce);
6344 curmad('X', newSVpvn(s-1,1));
6345 CURMAD('_', PL_thiswhite);
6347 force_next(formbrack ? '.' : '}');
6348 if (formbrack) LEAVE;
6350 if (PL_madskills && !PL_thistoken)
6351 PL_thistoken = newSVpvs("");
6353 if (formbrack == 2) { /* means . where arguments were expected */
6354 start_force(PL_curforce);
6360 if (PL_expect == XPOSTDEREF) POSTDEREF('&');
6363 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6364 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6371 if (PL_expect == XOPERATOR) {
6372 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
6373 && isIDFIRST_lazy_if(s,UTF))
6375 CopLINE_dec(PL_curcop);
6376 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6377 CopLINE_inc(PL_curcop);
6379 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6380 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6384 PL_parser->saw_infix_sigil = 1;
6388 PL_tokenbuf[0] = '&';
6389 s = scan_ident(s - 1, PL_tokenbuf + 1,
6390 sizeof PL_tokenbuf - 1, TRUE);
6391 if (PL_tokenbuf[1]) {
6392 PL_expect = XOPERATOR;
6393 force_ident_maybe_lex('&');
6397 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6403 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6404 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6411 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6412 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6420 const char tmp = *s++;
6422 if (!PL_lex_allbrackets &&
6423 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6430 if (!PL_lex_allbrackets &&
6431 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
6439 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6440 && strchr("+-*/%.^&|<",tmp))
6441 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6442 "Reversed %c= operator",(int)tmp);
6444 if (PL_expect == XSTATE && isALPHA(tmp) &&
6445 (s == PL_linestart+1 || s[-2] == '\n') )
6447 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6448 || PL_lex_state != LEX_NORMAL) {
6453 if (strnEQ(s,"=cut",4)) {
6469 PL_thiswhite = newSVpvs("");
6470 sv_catpvn(PL_thiswhite, PL_linestart,
6471 PL_bufend - PL_linestart);
6475 PL_parser->in_pod = 1;
6479 if (PL_expect == XBLOCK) {
6481 #ifdef PERL_STRICT_CR
6482 while (SPACE_OR_TAB(*t))
6484 while (SPACE_OR_TAB(*t) || *t == '\r')
6487 if (*t == '\n' || *t == '#') {
6490 SAVEI8(PL_parser->form_lex_state);
6491 SAVEI32(PL_lex_formbrack);
6492 PL_parser->form_lex_state = PL_lex_state;
6493 PL_lex_formbrack = PL_lex_brackets + 1;
6497 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6506 const char tmp = *s++;
6508 /* was this !=~ where !~ was meant?
6509 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6511 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6512 const char *t = s+1;
6514 while (t < PL_bufend && isSPACE(*t))
6517 if (*t == '/' || *t == '?' ||
6518 ((*t == 'm' || *t == 's' || *t == 'y')
6519 && !isWORDCHAR(t[1])) ||
6520 (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6521 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6522 "!=~ should be !~");
6524 if (!PL_lex_allbrackets &&
6525 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6537 if (PL_expect != XOPERATOR) {
6538 if (s[1] != '<' && !strchr(s,'>'))
6541 s = scan_heredoc(s);
6543 s = scan_inputsymbol(s);
6544 PL_expect = XOPERATOR;
6545 TOKEN(sublex_start());
6551 if (*s == '=' && !PL_lex_allbrackets &&
6552 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6556 SHop(OP_LEFT_SHIFT);
6561 if (!PL_lex_allbrackets &&
6562 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6569 if (!PL_lex_allbrackets &&
6570 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6578 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6586 const char tmp = *s++;
6588 if (*s == '=' && !PL_lex_allbrackets &&
6589 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6593 SHop(OP_RIGHT_SHIFT);
6595 else if (tmp == '=') {
6596 if (!PL_lex_allbrackets &&
6597 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6605 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6614 if (PL_expect == XOPERATOR) {
6615 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6616 return deprecate_commaless_var_list();
6619 else if (PL_expect == XPOSTDEREF) POSTDEREF('$');
6621 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6622 PL_tokenbuf[0] = '@';
6623 s = scan_ident(s + 1, PL_tokenbuf + 1,
6624 sizeof PL_tokenbuf - 1, FALSE);
6625 if (PL_expect == XOPERATOR)
6626 no_op("Array length", s);
6627 if (!PL_tokenbuf[1])
6629 PL_expect = XOPERATOR;
6630 force_ident_maybe_lex('#');
6634 PL_tokenbuf[0] = '$';
6635 s = scan_ident(s, PL_tokenbuf + 1,
6636 sizeof PL_tokenbuf - 1, FALSE);
6637 if (PL_expect == XOPERATOR)
6639 if (!PL_tokenbuf[1]) {
6641 yyerror("Final $ should be \\$ or $name");
6647 const char tmp = *s;
6648 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6651 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6652 && intuit_more(s)) {
6654 PL_tokenbuf[0] = '@';
6655 if (ckWARN(WARN_SYNTAX)) {
6658 while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
6661 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6662 while (t < PL_bufend && *t != ']')
6664 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6665 "Multidimensional syntax %.*s not supported",
6666 (int)((t - PL_bufptr) + 1), PL_bufptr);
6670 else if (*s == '{') {
6672 PL_tokenbuf[0] = '%';
6673 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6674 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6676 char tmpbuf[sizeof PL_tokenbuf];
6679 } while (isSPACE(*t));
6680 if (isIDFIRST_lazy_if(t,UTF)) {
6682 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6687 && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6688 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6689 "You need to quote \"%"UTF8f"\"",
6690 UTF8fARG(UTF, len, tmpbuf));
6696 PL_expect = XOPERATOR;
6697 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6698 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6699 if (!islop || PL_last_lop_op == OP_GREPSTART)
6700 PL_expect = XOPERATOR;
6701 else if (strchr("$@\"'`q", *s))
6702 PL_expect = XTERM; /* e.g. print $fh "foo" */
6703 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6704 PL_expect = XTERM; /* e.g. print $fh &sub */
6705 else if (isIDFIRST_lazy_if(s,UTF)) {
6706 char tmpbuf[sizeof PL_tokenbuf];
6708 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6709 if ((t2 = keyword(tmpbuf, len, 0))) {
6710 /* binary operators exclude handle interpretations */
6722 PL_expect = XTERM; /* e.g. print $fh length() */
6727 PL_expect = XTERM; /* e.g. print $fh subr() */
6730 else if (isDIGIT(*s))
6731 PL_expect = XTERM; /* e.g. print $fh 3 */
6732 else if (*s == '.' && isDIGIT(s[1]))
6733 PL_expect = XTERM; /* e.g. print $fh .3 */
6734 else if ((*s == '?' || *s == '-' || *s == '+')
6735 && !isSPACE(s[1]) && s[1] != '=')
6736 PL_expect = XTERM; /* e.g. print $fh -1 */
6737 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6739 PL_expect = XTERM; /* e.g. print $fh /.../
6740 XXX except DORDOR operator
6742 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6744 PL_expect = XTERM; /* print $fh <<"EOF" */
6747 force_ident_maybe_lex('$');
6751 if (PL_expect == XOPERATOR)
6753 else if (PL_expect == XPOSTDEREF) POSTDEREF('@');
6754 PL_tokenbuf[0] = '@';
6755 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6757 if (!PL_tokenbuf[1]) {
6760 if (PL_lex_state == LEX_NORMAL)
6762 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6764 PL_tokenbuf[0] = '%';
6766 /* Warn about @ where they meant $. */
6767 if (*s == '[' || *s == '{') {
6768 if (ckWARN(WARN_SYNTAX)) {
6769 S_check_scalar_slice(aTHX_ s);
6773 PL_expect = XOPERATOR;
6774 force_ident_maybe_lex('@');
6777 case '/': /* may be division, defined-or, or pattern */
6778 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6779 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6780 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6785 case '?': /* may either be conditional or pattern */
6786 if (PL_expect == XOPERATOR) {
6789 if (!PL_lex_allbrackets &&
6790 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6794 PL_lex_allbrackets++;
6800 /* A // operator. */
6801 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6802 (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6803 LEX_FAKEEOF_LOGIC)) {
6811 if (*s == '=' && !PL_lex_allbrackets &&
6812 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6821 /* Disable warning on "study /blah/" */
6822 if (PL_oldoldbufptr == PL_last_uni
6823 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6824 || memNE(PL_last_uni, "study", 5)
6825 || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
6829 deprecate("?PATTERN? without explicit operator");
6830 s = scan_pat(s,OP_MATCH);
6831 TERM(sublex_start());
6835 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6836 #ifdef PERL_STRICT_CR
6839 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6841 && (s == PL_linestart || s[-1] == '\n') )
6844 formbrack = 2; /* dot seen where arguments expected */
6847 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6851 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6854 if (!PL_lex_allbrackets &&
6855 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6862 pl_yylval.ival = OPf_SPECIAL;
6868 if (*s == '=' && !PL_lex_allbrackets &&
6869 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6876 case '0': case '1': case '2': case '3': case '4':
6877 case '5': case '6': case '7': case '8': case '9':
6878 s = scan_num(s, &pl_yylval);
6879 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6880 if (PL_expect == XOPERATOR)
6885 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
6886 COPLINE_SET_FROM_MULTI_END;
6887 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6888 if (PL_expect == XOPERATOR) {
6889 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6890 return deprecate_commaless_var_list();
6897 pl_yylval.ival = OP_CONST;
6898 TERM(sublex_start());
6901 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
6904 printbuf("### Saw string before %s\n", s);
6906 PerlIO_printf(Perl_debug_log,
6907 "### Saw unterminated string\n");
6909 if (PL_expect == XOPERATOR) {
6910 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6911 return deprecate_commaless_var_list();
6918 pl_yylval.ival = OP_CONST;
6919 /* FIXME. I think that this can be const if char *d is replaced by
6920 more localised variables. */
6921 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6922 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6923 pl_yylval.ival = OP_STRINGIFY;
6927 if (pl_yylval.ival == OP_CONST)
6928 COPLINE_SET_FROM_MULTI_END;
6929 TERM(sublex_start());
6932 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
6933 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6934 if (PL_expect == XOPERATOR)
6935 no_op("Backticks",s);
6938 readpipe_override();
6939 TERM(sublex_start());
6943 if (PL_lex_inwhat && isDIGIT(*s))
6944 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6946 if (PL_expect == XOPERATOR)
6947 no_op("Backslash",s);
6951 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6952 char *start = s + 2;
6953 while (isDIGIT(*start) || *start == '_')
6955 if (*start == '.' && isDIGIT(start[1])) {
6956 s = scan_num(s, &pl_yylval);
6959 else if ((*start == ':' && start[1] == ':')
6960 || (PL_expect == XSTATE && *start == ':'))
6962 else if (PL_expect == XSTATE) {
6964 while (d < PL_bufend && isSPACE(*d)) d++;
6965 if (*d == ':') goto keylookup;
6967 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6968 if (!isALPHA(*start) && (PL_expect == XTERM
6969 || PL_expect == XREF || PL_expect == XSTATE
6970 || PL_expect == XTERMORDORDOR)) {
6971 GV *const gv = gv_fetchpvn_flags(s, start - s,
6972 UTF ? SVf_UTF8 : 0, SVt_PVCV);
6974 s = scan_num(s, &pl_yylval);
6981 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
7034 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7036 /* Some keywords can be followed by any delimiter, including ':' */
7037 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
7039 /* x::* is just a word, unless x is "CORE" */
7040 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
7044 while (d < PL_bufend && isSPACE(*d))
7045 d++; /* no comments skipped here, or s### is misparsed */
7047 /* Is this a word before a => operator? */
7048 if (*d == '=' && d[1] == '>') {
7052 = (OP*)newSVOP(OP_CONST, 0,
7053 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7054 pl_yylval.opval->op_private = OPpCONST_BARE;
7058 /* Check for plugged-in keyword */
7062 char *saved_bufptr = PL_bufptr;
7064 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
7066 if (result == KEYWORD_PLUGIN_DECLINE) {
7067 /* not a plugged-in keyword */
7068 PL_bufptr = saved_bufptr;
7069 } else if (result == KEYWORD_PLUGIN_STMT) {
7070 pl_yylval.opval = o;
7073 return REPORT(PLUGSTMT);
7074 } else if (result == KEYWORD_PLUGIN_EXPR) {
7075 pl_yylval.opval = o;
7077 PL_expect = XOPERATOR;
7078 return REPORT(PLUGEXPR);
7080 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
7085 /* Check for built-in keyword */
7086 tmp = keyword(PL_tokenbuf, len, 0);
7088 /* Is this a label? */
7089 if (!anydelim && PL_expect == XSTATE
7090 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
7092 pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
7093 pl_yylval.pval[len] = '\0';
7094 pl_yylval.pval[len+1] = UTF ? 1 : 0;
7099 /* Check for lexical sub */
7100 if (PL_expect != XOPERATOR) {
7101 char tmpbuf[sizeof PL_tokenbuf + 1];
7103 Copy(PL_tokenbuf, tmpbuf+1, len, char);
7104 off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
7105 if (off != NOT_IN_PAD) {
7106 assert(off); /* we assume this is boolean-true below */
7107 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
7108 HV * const stash = PAD_COMPNAME_OURSTASH(off);
7109 HEK * const stashname = HvNAME_HEK(stash);
7110 sv = newSVhek(stashname);
7111 sv_catpvs(sv, "::");
7112 sv_catpvn_flags(sv, PL_tokenbuf, len,
7113 (UTF ? SV_CATUTF8 : SV_CATBYTES));
7114 gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
7124 rv2cv_op = newOP(OP_PADANY, 0);
7125 rv2cv_op->op_targ = off;
7126 cv = find_lexical_cv(off);
7134 if (tmp < 0) { /* second-class keyword? */
7135 GV *ogv = NULL; /* override (winner) */
7136 GV *hgv = NULL; /* hidden (loser) */
7137 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
7139 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7140 UTF ? SVf_UTF8 : 0, SVt_PVCV)) &&
7143 if (GvIMPORTED_CV(gv))
7145 else if (! CvMETHOD(cv))
7149 (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
7150 UTF ? -(I32)len : (I32)len, FALSE)) &&
7151 (gv = *gvp) && isGV_with_GP(gv) &&
7152 GvCVu(gv) && GvIMPORTED_CV(gv))
7159 tmp = 0; /* overridden by import or by GLOBAL */
7162 && -tmp==KEY_lock /* XXX generalizable kludge */
7165 tmp = 0; /* any sub overrides "weak" keyword */
7167 else { /* no override */
7169 if (tmp == KEY_dump) {
7170 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
7171 "dump() better written as CORE::dump()");
7175 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
7176 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7177 "Ambiguous call resolved as CORE::%s(), "
7178 "qualify as such or use &",
7183 if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
7184 && (!anydelim || *s != '#')) {
7185 /* no override, and not s### either; skipspace is safe here
7186 * check for => on following line */
7188 STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
7189 STRLEN soff = s - SvPVX(PL_linestr);
7190 s = skipspace_flags(s, LEX_NO_INCLINE);
7191 arrow = *s == '=' && s[1] == '>';
7192 PL_bufptr = SvPVX(PL_linestr) + bufoff;
7193 s = SvPVX(PL_linestr) + soff;
7201 default: /* not a keyword */
7202 /* Trade off - by using this evil construction we can pull the
7203 variable gv into the block labelled keylookup. If not, then
7204 we have to give it function scope so that the goto from the
7205 earlier ':' case doesn't bypass the initialisation. */
7207 just_a_word_zero_gv:
7219 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7220 const char penultchar =
7221 lastchar && PL_bufptr - 2 >= PL_linestart
7225 SV *nextPL_nextwhite = 0;
7229 /* Get the rest if it looks like a package qualifier */
7231 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7233 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7236 Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
7237 UTF8fARG(UTF, len, PL_tokenbuf),
7238 *s == '\'' ? "'" : "::");
7243 if (PL_expect == XOPERATOR) {
7244 if (PL_bufptr == PL_linestart) {
7245 CopLINE_dec(PL_curcop);
7246 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7247 CopLINE_inc(PL_curcop);
7250 no_op("Bareword",s);
7253 /* Look for a subroutine with this name in current package,
7254 unless this is a lexical sub, or name is "Foo::",
7255 in which case Foo is a bareword
7256 (and a package name). */
7258 if (len > 2 && !PL_madskills &&
7259 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
7261 if (ckWARN(WARN_BAREWORD)
7262 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7263 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7264 "Bareword \"%"UTF8f"\" refers to nonexistent package",
7265 UTF8fARG(UTF, len, PL_tokenbuf));
7267 PL_tokenbuf[len] = '\0';
7273 /* Mustn't actually add anything to a symbol table.
7274 But also don't want to "initialise" any placeholder
7275 constants that might already be there into full
7276 blown PVGVs with attached PVCV. */
7277 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7278 GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
7284 /* if we saw a global override before, get the right name */
7287 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
7288 len ? len : strlen(PL_tokenbuf));
7290 SV * const tmp_sv = sv;
7291 sv = newSVpvs("CORE::GLOBAL::");
7292 sv_catsv(sv, tmp_sv);
7293 SvREFCNT_dec(tmp_sv);
7297 if (PL_madskills && !PL_thistoken) {
7298 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
7299 PL_thistoken = newSVpvn(start,s - start);
7300 PL_realtokenstart = s - SvPVX(PL_linestr);
7304 /* Presume this is going to be a bareword of some sort. */
7306 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
7307 pl_yylval.opval->op_private = OPpCONST_BARE;
7309 /* And if "Foo::", then that's what it certainly is. */
7315 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
7316 const_op->op_private = OPpCONST_BARE;
7317 rv2cv_op = newCVREF(0, const_op);
7318 cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0);
7321 /* See if it's the indirect object for a list operator. */
7323 if (PL_oldoldbufptr &&
7324 PL_oldoldbufptr < PL_bufptr &&
7325 (PL_oldoldbufptr == PL_last_lop
7326 || PL_oldoldbufptr == PL_last_uni) &&
7327 /* NO SKIPSPACE BEFORE HERE! */
7328 (PL_expect == XREF ||
7329 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
7331 bool immediate_paren = *s == '(';
7333 /* (Now we can afford to cross potential line boundary.) */
7334 s = SKIPSPACE2(s,nextPL_nextwhite);
7336 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
7339 /* Two barewords in a row may indicate method call. */
7341 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
7342 (tmp = intuit_method(s, gv, cv))) {
7344 if (tmp == METHOD && !PL_lex_allbrackets &&
7345 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7346 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7350 /* If not a declared subroutine, it's an indirect object. */
7351 /* (But it's an indir obj regardless for sort.) */
7352 /* Also, if "_" follows a filetest operator, it's a bareword */
7355 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
7357 (PL_last_lop_op != OP_MAPSTART &&
7358 PL_last_lop_op != OP_GREPSTART))))
7359 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7360 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
7363 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7368 PL_expect = XOPERATOR;
7371 s = SKIPSPACE2(s,nextPL_nextwhite);
7372 PL_nextwhite = nextPL_nextwhite;
7377 /* Is this a word before a => operator? */
7378 if (*s == '=' && s[1] == '>' && !pkgname) {
7381 /* This is our own scalar, created a few lines above,
7383 SvREADONLY_off(cSVOPx(pl_yylval.opval)->op_sv);
7384 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
7385 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
7386 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
7387 SvREADONLY_on(cSVOPx(pl_yylval.opval)->op_sv);
7391 /* If followed by a paren, it's certainly a subroutine. */
7396 while (SPACE_OR_TAB(*d))
7398 if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
7405 PL_nextwhite = PL_thiswhite;
7408 start_force(PL_curforce);
7410 NEXTVAL_NEXTTOKE.opval =
7411 off ? rv2cv_op : pl_yylval.opval;
7412 PL_expect = XOPERATOR;
7415 PL_nextwhite = nextPL_nextwhite;
7416 curmad('X', PL_thistoken);
7417 PL_thistoken = newSVpvs("");
7421 op_free(pl_yylval.opval), force_next(PRIVATEREF);
7422 else op_free(rv2cv_op), force_next(WORD);
7427 /* If followed by var or block, call it a method (unless sub) */
7429 if ((*s == '$' || *s == '{') && !cv) {
7431 PL_last_lop = PL_oldbufptr;
7432 PL_last_lop_op = OP_METHOD;
7433 if (!PL_lex_allbrackets &&
7434 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7435 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7439 /* If followed by a bareword, see if it looks like indir obj. */
7442 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
7443 && (tmp = intuit_method(s, gv, cv))) {
7445 if (tmp == METHOD && !PL_lex_allbrackets &&
7446 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7447 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7451 /* Not a method, so call it a subroutine (if defined) */
7454 if (lastchar == '-' && penultchar != '-') {
7455 const STRLEN l = len ? len : strlen(PL_tokenbuf);
7456 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7457 "Ambiguous use of -%"UTF8f" resolved as -&%"UTF8f"()",
7458 UTF8fARG(UTF, l, PL_tokenbuf),
7459 UTF8fARG(UTF, l, PL_tokenbuf));
7461 /* Check for a constant sub */
7462 if ((sv = cv_const_sv_or_av(cv))) {
7465 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7466 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7467 if (SvTYPE(sv) == SVt_PVAV)
7468 pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7471 pl_yylval.opval->op_private = 0;
7472 pl_yylval.opval->op_folded = 1;
7473 pl_yylval.opval->op_flags |= OPf_SPECIAL;
7478 op_free(pl_yylval.opval);
7480 off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7481 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7482 PL_last_lop = PL_oldbufptr;
7483 PL_last_lop_op = OP_ENTERSUB;
7484 /* Is there a prototype? */
7491 STRLEN protolen = CvPROTOLEN(cv);
7492 const char *proto = CvPROTO(cv);
7494 proto = S_strip_spaces(aTHX_ proto, &protolen);
7497 if ((optional = *proto == ';'))
7500 while (*proto == ';');
7504 *proto == '$' || *proto == '_'
7505 || *proto == '*' || *proto == '+'
7510 *proto == '\\' && proto[1] && proto[2] == '\0'
7513 UNIPROTO(UNIOPSUB,optional);
7514 if (*proto == '\\' && proto[1] == '[') {
7515 const char *p = proto + 2;
7516 while(*p && *p != ']')
7518 if(*p == ']' && !p[1])
7519 UNIPROTO(UNIOPSUB,optional);
7521 if (*proto == '&' && *s == '{') {
7523 sv_setpvs(PL_subname, "__ANON__");
7525 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7526 if (!PL_lex_allbrackets &&
7527 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7528 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7535 PL_nextwhite = PL_thiswhite;
7538 start_force(PL_curforce);
7539 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7542 PL_nextwhite = nextPL_nextwhite;
7543 curmad('X', PL_thistoken);
7544 PL_thistoken = newSVpvs("");
7546 force_next(off ? PRIVATEREF : WORD);
7547 if (!PL_lex_allbrackets &&
7548 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7549 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7554 /* Guess harder when madskills require "best effort". */
7555 if (PL_madskills && (!gv || !GvCVu(gv))) {
7556 int probable_sub = 0;
7557 if (strchr("\"'`$@%0123456789!*+{[<", *s))
7559 else if (isALPHA(*s)) {
7563 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
7564 if (!keyword(tmpbuf, tmplen, 0))
7567 while (d < PL_bufend && isSPACE(*d))
7569 if (*d == '=' && d[1] == '>')
7574 gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
7576 op_free(pl_yylval.opval);
7578 off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7579 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7580 PL_last_lop = PL_oldbufptr;
7581 PL_last_lop_op = OP_ENTERSUB;
7582 PL_nextwhite = PL_thiswhite;
7584 start_force(PL_curforce);
7585 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7587 PL_nextwhite = nextPL_nextwhite;
7588 curmad('X', PL_thistoken);
7589 PL_thistoken = newSVpvs("");
7590 force_next(off ? PRIVATEREF : WORD);
7591 if (!PL_lex_allbrackets &&
7592 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7593 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7597 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7599 force_next(off ? PRIVATEREF : WORD);
7600 if (!PL_lex_allbrackets &&
7601 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7602 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7607 /* Call it a bare word */
7609 if (PL_hints & HINT_STRICT_SUBS)
7610 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7613 /* after "print" and similar functions (corresponding to
7614 * "F? L" in opcode.pl), whatever wasn't already parsed as
7615 * a filehandle should be subject to "strict subs".
7616 * Likewise for the optional indirect-object argument to system
7617 * or exec, which can't be a bareword */
7618 if ((PL_last_lop_op == OP_PRINT
7619 || PL_last_lop_op == OP_PRTF
7620 || PL_last_lop_op == OP_SAY
7621 || PL_last_lop_op == OP_SYSTEM
7622 || PL_last_lop_op == OP_EXEC)
7623 && (PL_hints & HINT_STRICT_SUBS))
7624 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7625 if (lastchar != '-') {
7626 if (ckWARN(WARN_RESERVED)) {
7630 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7631 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7639 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7640 && saw_infix_sigil) {
7641 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7642 "Operator or semicolon missing before %c%"UTF8f,
7644 UTF8fARG(UTF, strlen(PL_tokenbuf),
7646 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7647 "Ambiguous use of %c resolved as operator %c",
7648 lastchar, lastchar);
7655 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7660 (OP*)newSVOP(OP_CONST, 0,
7661 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7664 case KEY___PACKAGE__:
7666 (OP*)newSVOP(OP_CONST, 0,
7668 ? newSVhek(HvNAME_HEK(PL_curstash))
7675 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7676 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
7679 gv = (GV *)*hv_fetchs(stash, "DATA", 1);
7681 gv_init(gv,stash,"DATA",4,0);
7684 GvIOp(gv) = newIO();
7685 IoIFP(GvIOp(gv)) = PL_rsfp;
7686 #if defined(HAS_FCNTL) && defined(F_SETFD)
7688 const int fd = PerlIO_fileno(PL_rsfp);
7689 fcntl(fd,F_SETFD,fd >= 3);
7692 /* Mark this internal pseudo-handle as clean */
7693 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7694 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7695 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7697 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7698 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7699 /* if the script was opened in binmode, we need to revert
7700 * it to text mode for compatibility; but only iff it has CRs
7701 * XXX this is a questionable hack at best. */
7702 if (PL_bufend-PL_bufptr > 2
7703 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7706 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7707 loc = PerlIO_tell(PL_rsfp);
7708 (void)PerlIO_seek(PL_rsfp, 0L, 0);
7711 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7713 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7714 #endif /* NETWARE */
7716 PerlIO_seek(PL_rsfp, loc, 0);
7720 #ifdef PERLIO_LAYERS
7723 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7724 else if (PL_encoding) {
7731 XPUSHs(PL_encoding);
7733 call_method("name", G_SCALAR);
7737 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7738 Perl_form(aTHX_ ":encoding(%"SVf")",
7747 if (PL_realtokenstart >= 0) {
7748 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7750 PL_endwhite = newSVpvs("");
7751 sv_catsv(PL_endwhite, PL_thiswhite);
7753 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7754 PL_realtokenstart = -1;
7756 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7766 FUN0OP(newPVOP(OP_RUNCV,0,NULL));
7775 if (PL_expect == XSTATE) {
7782 if (*s == ':' && s[1] == ':') {
7786 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7787 if ((*s == ':' && s[1] == ':')
7788 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7792 Copy(PL_bufptr, PL_tokenbuf, olen, char);
7796 Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
7797 UTF8fARG(UTF, len, PL_tokenbuf));
7800 else if (tmp == KEY_require || tmp == KEY_do
7802 /* that's a way to remember we saw "CORE::" */
7815 LOP(OP_ACCEPT,XTERM);
7818 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7823 LOP(OP_ATAN2,XTERM);
7829 LOP(OP_BINMODE,XTERM);
7832 LOP(OP_BLESS,XTERM);
7841 /* We have to disambiguate the two senses of
7842 "continue". If the next token is a '{' then
7843 treat it as the start of a continue block;
7844 otherwise treat it as a control operator.
7854 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7864 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7873 if (!PL_cryptseen) {
7874 PL_cryptseen = TRUE;
7878 LOP(OP_CRYPT,XTERM);
7881 LOP(OP_CHMOD,XTERM);
7884 LOP(OP_CHOWN,XTERM);
7887 LOP(OP_CONNECT,XTERM);
7907 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7909 if (len && !keyword(PL_tokenbuf + 1, len, 0)) {
7912 force_ident_maybe_lex('&');
7917 if (orig_keyword == KEY_do) {
7926 PL_hints |= HINT_BLOCK_SCOPE;
7936 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7937 STR_WITH_LEN("NDBM_File::"),
7938 STR_WITH_LEN("DB_File::"),
7939 STR_WITH_LEN("GDBM_File::"),
7940 STR_WITH_LEN("SDBM_File::"),
7941 STR_WITH_LEN("ODBM_File::"),
7943 LOP(OP_DBMOPEN,XTERM);
7949 PL_expect = XOPERATOR;
7950 s = force_word(s,WORD,TRUE,FALSE);
7957 pl_yylval.ival = CopLINE(PL_curcop);
7961 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7975 if (*s == '{') { /* block eval */
7976 PL_expect = XTERMBLOCK;
7977 UNIBRACK(OP_ENTERTRY);
7979 else { /* string eval */
7981 UNIBRACK(OP_ENTEREVAL);
7986 UNIBRACK(-OP_ENTEREVAL);
8000 case KEY_endhostent:
8006 case KEY_endservent:
8009 case KEY_endprotoent:
8020 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8022 pl_yylval.ival = CopLINE(PL_curcop);
8024 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
8027 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
8030 if ((PL_bufend - p) >= 3 &&
8031 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
8033 else if ((PL_bufend - p) >= 4 &&
8034 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
8037 if (isIDFIRST_lazy_if(p,UTF)) {
8038 p = scan_ident(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
8042 Perl_croak(aTHX_ "Missing $ on loop variable");
8044 s = SvPVX(PL_linestr) + soff;
8050 LOP(OP_FORMLINE,XTERM);
8059 LOP(OP_FCNTL,XTERM);
8065 LOP(OP_FLOCK,XTERM);
8068 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8073 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8078 LOP(OP_GREPSTART, XREF);
8081 PL_expect = XOPERATOR;
8082 s = force_word(s,WORD,TRUE,FALSE);
8097 case KEY_getpriority:
8098 LOP(OP_GETPRIORITY,XTERM);
8100 case KEY_getprotobyname:
8103 case KEY_getprotobynumber:
8104 LOP(OP_GPBYNUMBER,XTERM);
8106 case KEY_getprotoent:
8118 case KEY_getpeername:
8119 UNI(OP_GETPEERNAME);
8121 case KEY_gethostbyname:
8124 case KEY_gethostbyaddr:
8125 LOP(OP_GHBYADDR,XTERM);
8127 case KEY_gethostent:
8130 case KEY_getnetbyname:
8133 case KEY_getnetbyaddr:
8134 LOP(OP_GNBYADDR,XTERM);
8139 case KEY_getservbyname:
8140 LOP(OP_GSBYNAME,XTERM);
8142 case KEY_getservbyport:
8143 LOP(OP_GSBYPORT,XTERM);
8145 case KEY_getservent:
8148 case KEY_getsockname:
8149 UNI(OP_GETSOCKNAME);
8151 case KEY_getsockopt:
8152 LOP(OP_GSOCKOPT,XTERM);
8167 pl_yylval.ival = CopLINE(PL_curcop);
8168 Perl_ck_warner_d(aTHX_
8169 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8170 "given is experimental");
8175 orig_keyword==KEY_glob ? (orig_keyword=0, -OP_GLOB) : OP_GLOB,
8183 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8185 pl_yylval.ival = CopLINE(PL_curcop);
8189 LOP(OP_INDEX,XTERM);
8195 LOP(OP_IOCTL,XTERM);
8207 PL_expect = XOPERATOR;
8208 s = force_word(s,WORD,TRUE,FALSE);
8225 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8230 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8244 LOP(OP_LISTEN,XTERM);
8253 s = scan_pat(s,OP_MATCH);
8254 TERM(sublex_start());
8257 LOP(OP_MAPSTART, XREF);
8260 LOP(OP_MKDIR,XTERM);
8263 LOP(OP_MSGCTL,XTERM);
8266 LOP(OP_MSGGET,XTERM);
8269 LOP(OP_MSGRCV,XTERM);
8272 LOP(OP_MSGSND,XTERM);
8277 PL_in_my = (U16)tmp;
8279 if (isIDFIRST_lazy_if(s,UTF)) {
8283 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8284 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
8286 if (!FEATURE_LEXSUBS_IS_ENABLED)
8288 "Experimental \"%s\" subs not enabled",
8289 tmp == KEY_my ? "my" :
8290 tmp == KEY_state ? "state" : "our");
8291 Perl_ck_warner_d(aTHX_
8292 packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
8293 "The lexical_subs feature is experimental");
8296 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
8297 if (!PL_in_my_stash) {
8300 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
8301 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
8304 if (PL_madskills) { /* just add type to declarator token */
8305 sv_catsv(PL_thistoken, PL_nextwhite);
8307 sv_catpvn(PL_thistoken, start, s - start);
8315 PL_expect = XOPERATOR;
8316 s = force_word(s,WORD,TRUE,FALSE);
8320 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8325 s = tokenize_use(0, s);
8329 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
8332 if (!PL_lex_allbrackets &&
8333 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8334 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8340 if (isIDFIRST_lazy_if(s,UTF)) {
8342 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
8344 for (t=d; isSPACE(*t);)
8346 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8348 && !(t[0] == '=' && t[1] == '>')
8349 && !(t[0] == ':' && t[1] == ':')
8350 && !keyword(s, d-s, 0)
8352 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8353 "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
8354 UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8360 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8362 pl_yylval.ival = OP_OR;
8372 LOP(OP_OPEN_DIR,XTERM);
8375 checkcomma(s,PL_tokenbuf,"filehandle");
8379 checkcomma(s,PL_tokenbuf,"filehandle");
8398 s = force_word(s,WORD,FALSE,TRUE);
8400 s = force_strict_version(s);
8401 PL_lex_expect = XBLOCK;
8405 LOP(OP_PIPE_OP,XTERM);
8408 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8409 COPLINE_SET_FROM_MULTI_END;
8412 pl_yylval.ival = OP_CONST;
8413 TERM(sublex_start());
8420 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8421 COPLINE_SET_FROM_MULTI_END;
8424 PL_expect = XOPERATOR;
8425 if (SvCUR(PL_lex_stuff)) {
8426 int warned_comma = !ckWARN(WARN_QW);
8427 int warned_comment = warned_comma;
8428 d = SvPV_force(PL_lex_stuff, len);
8430 for (; isSPACE(*d) && len; --len, ++d)
8435 if (!warned_comma || !warned_comment) {
8436 for (; !isSPACE(*d) && len; --len, ++d) {
8437 if (!warned_comma && *d == ',') {
8438 Perl_warner(aTHX_ packWARN(WARN_QW),
8439 "Possible attempt to separate words with commas");
8442 else if (!warned_comment && *d == '#') {
8443 Perl_warner(aTHX_ packWARN(WARN_QW),
8444 "Possible attempt to put comments in qw() list");
8450 for (; !isSPACE(*d) && len; --len, ++d)
8453 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8454 words = op_append_elem(OP_LIST, words,
8455 newSVOP(OP_CONST, 0, tokeq(sv)));
8460 words = newNULLLIST();
8462 SvREFCNT_dec(PL_lex_stuff);
8463 PL_lex_stuff = NULL;
8465 PL_expect = XOPERATOR;
8466 pl_yylval.opval = sawparens(words);
8471 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8474 pl_yylval.ival = OP_STRINGIFY;
8475 if (SvIVX(PL_lex_stuff) == '\'')
8476 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
8477 TERM(sublex_start());
8480 s = scan_pat(s,OP_QR);
8481 TERM(sublex_start());
8484 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8487 readpipe_override();
8488 TERM(sublex_start());
8495 PL_expect = XOPERATOR;
8497 s = force_version(s, FALSE);
8499 else if (*s != 'v' || !isDIGIT(s[1])
8500 || (s = force_version(s, TRUE), *s == 'v'))
8502 *PL_tokenbuf = '\0';
8503 s = force_word(s,WORD,TRUE,TRUE);
8504 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
8505 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8506 GV_ADD | (UTF ? SVf_UTF8 : 0));
8508 yyerror("<> should be quotes");
8510 if (orig_keyword == KEY_require) {
8518 PL_last_uni = PL_oldbufptr;
8519 PL_last_lop_op = OP_REQUIRE;
8521 return REPORT( (int)REQUIRE );
8527 PL_expect = XOPERATOR;
8528 s = force_word(s,WORD,TRUE,FALSE);
8532 LOP(OP_RENAME,XTERM);
8541 LOP(OP_RINDEX,XTERM);
8550 UNIDOR(OP_READLINE);
8553 UNIDOR(OP_BACKTICK);
8562 LOP(OP_REVERSE,XTERM);
8565 UNIDOR(OP_READLINK);
8572 if (pl_yylval.opval)
8573 TERM(sublex_start());
8575 TOKEN(1); /* force error */
8578 checkcomma(s,PL_tokenbuf,"filehandle");
8588 LOP(OP_SELECT,XTERM);
8594 LOP(OP_SEMCTL,XTERM);
8597 LOP(OP_SEMGET,XTERM);
8600 LOP(OP_SEMOP,XTERM);
8606 LOP(OP_SETPGRP,XTERM);
8608 case KEY_setpriority:
8609 LOP(OP_SETPRIORITY,XTERM);
8611 case KEY_sethostent:
8617 case KEY_setservent:
8620 case KEY_setprotoent:
8630 LOP(OP_SEEKDIR,XTERM);
8632 case KEY_setsockopt:
8633 LOP(OP_SSOCKOPT,XTERM);
8639 LOP(OP_SHMCTL,XTERM);
8642 LOP(OP_SHMGET,XTERM);
8645 LOP(OP_SHMREAD,XTERM);
8648 LOP(OP_SHMWRITE,XTERM);
8651 LOP(OP_SHUTDOWN,XTERM);
8660 LOP(OP_SOCKET,XTERM);
8662 case KEY_socketpair:
8663 LOP(OP_SOCKPAIR,XTERM);
8666 checkcomma(s,PL_tokenbuf,"subroutine name");
8669 s = force_word(s,WORD,TRUE,TRUE);
8673 LOP(OP_SPLIT,XTERM);
8676 LOP(OP_SPRINTF,XTERM);
8679 LOP(OP_SPLICE,XTERM);
8694 LOP(OP_SUBSTR,XTERM);
8700 char * const tmpbuf = PL_tokenbuf + 1;
8701 expectation attrful;
8702 bool have_name, have_proto;
8703 const int key = tmp;
8705 SV *format_name = NULL;
8711 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
8712 SV *subtoken = PL_madskills
8713 ? newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr))
8718 s = SKIPSPACE2(s,tmpwhite);
8724 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
8725 (*s == ':' && s[1] == ':'))
8728 SV *nametoke = NULL;
8732 attrful = XATTRBLOCK;
8733 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8737 nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
8739 if (key == KEY_format)
8740 format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
8743 if (memchr(tmpbuf, ':', len) || key != KEY_sub
8745 PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
8747 sv_setpvn(PL_subname, tmpbuf, len);
8749 sv_setsv(PL_subname,PL_curstname);
8750 sv_catpvs(PL_subname,"::");
8751 sv_catpvn(PL_subname,tmpbuf,len);
8753 if (SvUTF8(PL_linestr))
8754 SvUTF8_on(PL_subname);
8760 CURMAD('X', nametoke);
8761 CURMAD('_', tmpwhite);
8762 force_ident_maybe_lex('&');
8764 s = SKIPSPACE2(d,tmpwhite);
8770 if (key == KEY_my || key == KEY_our || key==KEY_state)
8773 /* diag_listed_as: Missing name in "%s sub" */
8775 "Missing name in \"%s\"", PL_bufptr);
8777 PL_expect = XTERMBLOCK;
8778 attrful = XATTRTERM;
8779 sv_setpvs(PL_subname,"?");
8783 if (key == KEY_format) {
8785 PL_thistoken = subtoken;
8789 start_force(PL_curforce);
8790 NEXTVAL_NEXTTOKE.opval
8791 = (OP*)newSVOP(OP_CONST,0, format_name);
8792 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
8799 /* Look for a prototype */
8801 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8802 COPLINE_SET_FROM_MULTI_END;
8804 Perl_croak(aTHX_ "Prototype not terminated");
8805 (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
8810 CURMAD('q', PL_thisopen);
8811 CURMAD('_', tmpwhite);
8812 CURMAD('=', PL_thisstuff);
8813 CURMAD('Q', PL_thisclose);
8814 NEXTVAL_NEXTTOKE.opval =
8815 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8816 PL_lex_stuff = NULL;
8819 s = SKIPSPACE2(s,tmpwhite);
8827 if (*s == ':' && s[1] != ':')
8828 PL_expect = attrful;
8829 else if (*s != '{' && key == KEY_sub) {
8831 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8832 else if (*s != ';' && *s != '}')
8833 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8840 curmad('^', newSVpvs(""));
8841 CURMAD('_', tmpwhite);
8845 PL_thistoken = subtoken;
8846 PERL_UNUSED_VAR(have_proto);
8849 NEXTVAL_NEXTTOKE.opval =
8850 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8851 PL_lex_stuff = NULL;
8857 sv_setpvs(PL_subname, "__ANON__");
8859 sv_setpvs(PL_subname, "__ANON__::__ANON__");
8863 force_ident_maybe_lex('&');
8869 LOP(OP_SYSTEM,XREF);
8872 LOP(OP_SYMLINK,XTERM);
8875 LOP(OP_SYSCALL,XTERM);
8878 LOP(OP_SYSOPEN,XTERM);
8881 LOP(OP_SYSSEEK,XTERM);
8884 LOP(OP_SYSREAD,XTERM);
8887 LOP(OP_SYSWRITE,XTERM);
8892 TERM(sublex_start());
8913 LOP(OP_TRUNCATE,XTERM);
8925 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8927 pl_yylval.ival = CopLINE(PL_curcop);
8931 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8933 pl_yylval.ival = CopLINE(PL_curcop);
8937 LOP(OP_UNLINK,XTERM);
8943 LOP(OP_UNPACK,XTERM);
8946 LOP(OP_UTIME,XTERM);
8952 LOP(OP_UNSHIFT,XTERM);
8955 s = tokenize_use(1, s);
8965 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8967 pl_yylval.ival = CopLINE(PL_curcop);
8968 Perl_ck_warner_d(aTHX_
8969 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8970 "when is experimental");
8974 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8976 pl_yylval.ival = CopLINE(PL_curcop);
8980 PL_hints |= HINT_BLOCK_SCOPE;
8987 LOP(OP_WAITPID,XTERM);
8993 /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8994 * we use the same number on EBCDIC */
8995 gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8999 if (PL_expect == XOPERATOR) {
9000 if (*s == '=' && !PL_lex_allbrackets &&
9001 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
9009 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
9011 pl_yylval.ival = OP_XOR;
9020 Looks up an identifier in the pad or in a package
9023 PRIVATEREF if this is a lexical name.
9024 WORD if this belongs to a package.
9027 if we're in a my declaration
9028 croak if they tried to say my($foo::bar)
9029 build the ops for a my() declaration
9030 if it's an access to a my() variable
9031 build ops for access to a my() variable
9032 if in a dq string, and they've said @foo and we can't find @foo
9034 build ops for a bareword
9038 S_pending_ident(pTHX)
9042 const char pit = (char)pl_yylval.ival;
9043 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
9044 /* All routes through this function want to know if there is a colon. */
9045 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
9047 DEBUG_T({ PerlIO_printf(Perl_debug_log,
9048 "### Pending identifier '%s'\n", PL_tokenbuf); });
9050 /* if we're in a my(), we can't allow dynamics here.
9051 $foo'bar has already been turned into $foo::bar, so
9052 just check for colons.
9054 if it's a legal name, the OP is a PADANY.
9057 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9059 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
9060 "variable %s in \"our\"",
9061 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
9062 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
9066 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
9067 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
9068 UTF ? SVf_UTF8 : 0);
9070 pl_yylval.opval = newOP(OP_PADANY, 0);
9071 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
9072 UTF ? SVf_UTF8 : 0);
9078 build the ops for accesses to a my() variable.
9083 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9084 UTF ? SVf_UTF8 : 0);
9085 if (tmp != NOT_IN_PAD) {
9086 /* might be an "our" variable" */
9087 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9088 /* build ops for a bareword */
9089 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9090 HEK * const stashname = HvNAME_HEK(stash);
9091 SV * const sym = newSVhek(stashname);
9092 sv_catpvs(sym, "::");
9093 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9094 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
9095 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9099 ? (GV_ADDMULTI | GV_ADDINEVAL)
9102 ((PL_tokenbuf[0] == '$') ? SVt_PV
9103 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9108 pl_yylval.opval = newOP(OP_PADANY, 0);
9109 pl_yylval.opval->op_targ = tmp;
9115 Whine if they've said @foo in a doublequoted string,
9116 and @foo isn't a variable we can find in the symbol
9119 if (ckWARN(WARN_AMBIGUOUS) &&
9120 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
9121 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
9122 ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
9123 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9124 /* DO NOT warn for @- and @+ */
9125 && !( PL_tokenbuf[2] == '\0' &&
9126 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
9129 /* Downgraded from fatal to warning 20000522 mjd */
9130 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9131 "Possible unintended interpolation of %"UTF8f
9133 UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9137 /* build ops for a bareword */
9138 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
9139 newSVpvn_flags(PL_tokenbuf + 1,
9141 UTF ? SVf_UTF8 : 0 ));
9142 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9144 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
9145 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
9146 | ( UTF ? SVf_UTF8 : 0 ),
9147 ((PL_tokenbuf[0] == '$') ? SVt_PV
9148 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9154 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9158 PERL_ARGS_ASSERT_CHECKCOMMA;
9160 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
9161 if (ckWARN(WARN_SYNTAX)) {
9164 for (w = s+2; *w && level; w++) {
9172 /* the list of chars below is for end of statements or
9173 * block / parens, boolean operators (&&, ||, //) and branch
9174 * constructs (or, and, if, until, unless, while, err, for).
9175 * Not a very solid hack... */
9176 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9177 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9178 "%s (...) interpreted as function",name);
9181 while (s < PL_bufend && isSPACE(*s))
9185 while (s < PL_bufend && isSPACE(*s))
9187 if (isIDFIRST_lazy_if(s,UTF)) {
9188 const char * const w = s;
9189 s += UTF ? UTF8SKIP(s) : 1;
9190 while (isWORDCHAR_lazy_if(s,UTF))
9191 s += UTF ? UTF8SKIP(s) : 1;
9192 while (s < PL_bufend && isSPACE(*s))
9196 if (keyword(w, s - w, 0))
9199 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9200 if (gv && GvCVu(gv))
9202 Perl_croak(aTHX_ "No comma allowed after %s", what);
9207 /* S_new_constant(): do any overload::constant lookup.
9209 Either returns sv, or mortalizes/frees sv and returns a new SV*.
9210 Best used as sv=new_constant(..., sv, ...).
9211 If s, pv are NULL, calls subroutine with one argument,
9212 and <type> is used with error messages only.
9213 <type> is assumed to be well formed UTF-8 */
9216 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9217 SV *sv, SV *pv, const char *type, STRLEN typelen)
9220 HV * table = GvHV(PL_hintgv); /* ^H */
9225 const char *why1 = "", *why2 = "", *why3 = "";
9227 PERL_ARGS_ASSERT_NEW_CONSTANT;
9228 /* We assume that this is true: */
9229 if (*key == 'c') { assert (strEQ(key, "charnames")); }
9232 /* charnames doesn't work well if there have been errors found */
9233 if (PL_error_count > 0 && *key == 'c')
9235 SvREFCNT_dec_NN(sv);
9236 return &PL_sv_undef;
9239 sv_2mortal(sv); /* Parent created it permanently */
9241 || ! (PL_hints & HINT_LOCALIZE_HH)
9242 || ! (cvp = hv_fetch(table, key, keylen, FALSE))
9247 /* Here haven't found what we're looking for. If it is charnames,
9248 * perhaps it needs to be loaded. Try doing that before giving up */
9250 Perl_load_module(aTHX_
9252 newSVpvs("_charnames"),
9253 /* version parameter; no need to specify it, as if
9254 * we get too early a version, will fail anyway,
9255 * not being able to find '_charnames' */
9261 table = GvHV(PL_hintgv);
9263 && (PL_hints & HINT_LOCALIZE_HH)
9264 && (cvp = hv_fetch(table, key, keylen, FALSE))
9270 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9271 msg = Perl_form(aTHX_
9272 "Constant(%.*s) unknown",
9273 (int)(type ? typelen : len),
9279 why3 = "} is not defined";
9282 msg = Perl_form(aTHX_
9283 /* The +3 is for '\N{'; -4 for that, plus '}' */
9284 "Unknown charname '%.*s'", (int)typelen - 4, type + 3
9288 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
9289 (int)(type ? typelen : len),
9290 (type ? type: s), why1, why2, why3);
9293 yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9294 return SvREFCNT_inc_simple_NN(sv);
9299 pv = newSVpvn_flags(s, len, SVs_TEMP);
9301 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9303 typesv = &PL_sv_undef;
9305 PUSHSTACKi(PERLSI_OVERLOAD);
9317 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9321 /* Check the eval first */
9322 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9324 const char * errstr;
9325 sv_catpvs(errsv, "Propagated");
9326 errstr = SvPV_const(errsv, errlen);
9327 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9329 res = SvREFCNT_inc_simple_NN(sv);
9333 SvREFCNT_inc_simple_void_NN(res);
9342 why1 = "Call to &{$^H{";
9344 why3 = "}} did not return a defined value";
9346 (void)sv_2mortal(sv);
9353 PERL_STATIC_INLINE void
9354 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) {
9356 PERL_ARGS_ASSERT_PARSE_IDENT;
9360 Perl_croak(aTHX_ "%s", ident_too_long);
9361 if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
9362 /* The UTF-8 case must come first, otherwise things
9363 * like c\N{COMBINING TILDE} would start failing, as the
9364 * isWORDCHAR_A case below would gobble the 'c' up.
9367 char *t = *s + UTF8SKIP(*s);
9368 while (isIDCONT_utf8((U8*)t))
9370 if (*d + (t - *s) > e)
9371 Perl_croak(aTHX_ "%s", ident_too_long);
9372 Copy(*s, *d, t - *s, char);
9376 else if ( isWORDCHAR_A(**s) ) {
9379 } while (isWORDCHAR_A(**s) && *d < e);
9381 else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
9386 else if (allow_package && **s == ':' && (*s)[1] == ':'
9387 /* Disallow things like Foo::$bar. For the curious, this is
9388 * the code path that triggers the "Bad name after" warning
9389 * when looking for barewords.
9391 && (*s)[2] != '$') {
9401 /* Returns a NUL terminated string, with the length of the string written to
9405 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9409 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9410 bool is_utf8 = cBOOL(UTF);
9412 PERL_ARGS_ASSERT_SCAN_WORD;
9414 parse_ident(&s, &d, e, allow_package, is_utf8);
9421 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
9424 I32 herelines = PL_parser->herelines;
9425 SSize_t bracket = -1;
9428 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9429 bool is_utf8 = cBOOL(UTF);
9430 I32 orig_copline, tmp_copline = 0;
9432 PERL_ARGS_ASSERT_SCAN_IDENT;
9437 while (isDIGIT(*s)) {
9439 Perl_croak(aTHX_ "%s", ident_too_long);
9444 parse_ident(&s, &d, e, 1, is_utf8);
9449 /* Either a digit variable, or parse_ident() found an identifier
9450 (anything valid as a bareword), so job done and return. */
9451 if (PL_lex_state != LEX_NORMAL)
9452 PL_lex_state = LEX_INTERPENDMAYBE;
9455 if (*s == '$' && s[1] &&
9456 (isIDFIRST_lazy_if(s+1,is_utf8)
9457 || isDIGIT_A((U8)s[1])
9460 || strnEQ(s+1,"::",2)) )
9462 /* Dereferencing a value in a scalar variable.
9463 The alternatives are different syntaxes for a scalar variable.
9464 Using ' as a leading package separator isn't allowed. :: is. */
9467 /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */
9469 bracket = s - SvPVX(PL_linestr);
9471 orig_copline = CopLINE(PL_curcop);
9472 if (s < PL_bufend && isSPACE(*s)) {
9477 /* Is the byte 'd' a legal single character identifier name? 'u' is true
9478 * iff Unicode semantics are to be used. The legal ones are any of:
9480 * b) ASCII punctuation
9481 * c) When not under Unicode rules, any upper Latin1 character
9482 * d) \c?, \c\, \c^, \c_, and \cA..\cZ, minus the ones that have traditionally
9483 * been matched by \s on ASCII platforms. That is: \c?, plus 1-32, minus
9485 #define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d)) \
9486 || isDIGIT_A((U8)(d)) \
9487 || (!(u) && !isASCII((U8)(d))) \
9488 || ((((U8)(d)) < 32) \
9489 && (((((U8)(d)) >= 14) \
9490 || (((U8)(d)) <= 8 && (d) != 0) \
9491 || (((U8)(d)) == 13)))) \
9492 || (((U8)(d)) == toCTRL('?')))
9494 && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8)))
9496 if ( isCNTRL_A((U8)*s) ) {
9497 deprecate("literal control characters in variable names");
9501 const STRLEN skip = UTF8SKIP(s);
9504 for ( i = 0; i < skip; i++ )
9512 /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
9513 if (*d == '^' && *s && isCONTROLVAR(*s)) {
9517 /* Warn about ambiguous code after unary operators if {...} notation isn't
9518 used. There's no difference in ambiguity; it's merely a heuristic
9519 about when not to warn. */
9520 else if (ck_uni && bracket == -1)
9522 if (bracket != -1) {
9523 /* If we were processing {...} notation then... */
9524 if (isIDFIRST_lazy_if(d,is_utf8)) {
9525 /* if it starts as a valid identifier, assume that it is one.
9526 (the later check for } being at the expected point will trap
9527 cases where this doesn't pan out.) */
9528 d += is_utf8 ? UTF8SKIP(d) : 1;
9529 parse_ident(&s, &d, e, 1, is_utf8);
9531 tmp_copline = CopLINE(PL_curcop);
9532 if (s < PL_bufend && isSPACE(*s)) {
9535 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9536 /* ${foo[0]} and ${foo{bar}} notation. */
9537 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9538 const char * const brack =
9540 ((*s == '[') ? "[...]" : "{...}");
9541 orig_copline = CopLINE(PL_curcop);
9542 CopLINE_set(PL_curcop, tmp_copline);
9543 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9544 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9545 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9546 funny, dest, brack, funny, dest, brack);
9547 CopLINE_set(PL_curcop, orig_copline);
9550 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9551 PL_lex_allbrackets++;
9555 /* Handle extended ${^Foo} variables
9556 * 1999-02-27 mjd-perl-patch@plover.com */
9557 else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
9561 while (isWORDCHAR(*s) && d < e) {
9565 Perl_croak(aTHX_ "%s", ident_too_long);
9570 tmp_copline = CopLINE(PL_curcop);
9571 if (s < PL_bufend && isSPACE(*s)) {
9575 /* Expect to find a closing } after consuming any trailing whitespace.
9579 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9580 PL_lex_state = LEX_INTERPEND;
9583 if (PL_lex_state == LEX_NORMAL) {
9584 if (ckWARN(WARN_AMBIGUOUS) &&
9585 (keyword(dest, d - dest, 0)
9586 || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0)))
9588 SV *tmp = newSVpvn_flags( dest, d - dest,
9589 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
9592 orig_copline = CopLINE(PL_curcop);
9593 CopLINE_set(PL_curcop, tmp_copline);
9594 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9595 "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9596 funny, tmp, funny, tmp);
9597 CopLINE_set(PL_curcop, orig_copline);
9602 /* Didn't find the closing } at the point we expected, so restore
9603 state such that the next thing to process is the opening { and */
9604 s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
9605 CopLINE_set(PL_curcop, orig_copline);
9606 PL_parser->herelines = herelines;
9610 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9611 PL_lex_state = LEX_INTERPEND;
9616 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
9618 /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
9619 * the parse starting at 's', based on the subset that are valid in this
9620 * context input to this routine in 'valid_flags'. Advances s. Returns
9621 * TRUE if the input should be treated as a valid flag, so the next char
9622 * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
9623 * first call on the current regex. This routine will set it to any
9624 * charset modifier found. The caller shouldn't change it. This way,
9625 * another charset modifier encountered in the parse can be detected as an
9626 * error, as we have decided to allow only one */
9629 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9631 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9632 if (isWORDCHAR_lazy_if(*s, UTF)) {
9633 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9634 UTF ? SVf_UTF8 : 0);
9636 /* Pretend that it worked, so will continue processing before
9645 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
9646 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
9647 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
9648 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
9649 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
9650 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9651 case LOCALE_PAT_MOD:
9653 goto multiple_charsets;
9655 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9658 case UNICODE_PAT_MOD:
9660 goto multiple_charsets;
9662 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9665 case ASCII_RESTRICT_PAT_MOD:
9667 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9671 /* Error if previous modifier wasn't an 'a', but if it was, see
9672 * if, and accept, a second occurrence (only) */
9674 || get_regex_charset(*pmfl)
9675 != REGEX_ASCII_RESTRICTED_CHARSET)
9677 goto multiple_charsets;
9679 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9683 case DEPENDS_PAT_MOD:
9685 goto multiple_charsets;
9687 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9696 if (*charset != c) {
9697 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9699 else if (c == 'a') {
9700 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9703 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9706 /* Pretend that it worked, so will continue processing before dieing */
9712 S_scan_pat(pTHX_ char *start, I32 type)
9717 const char * const valid_flags =
9718 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9719 char charset = '\0'; /* character set modifier */
9724 PERL_ARGS_ASSERT_SCAN_PAT;
9726 s = scan_str(start,!!PL_madskills,FALSE, (PL_in_eval & EVAL_RE_REPARSING),
9727 TRUE /* look for escaped bracketed metas */ );
9730 const char * const delimiter = skipspace(start);
9734 ? "Search pattern not terminated or ternary operator parsed as search pattern"
9735 : "Search pattern not terminated" ));
9738 pm = (PMOP*)newPMOP(type, 0);
9739 if (PL_multi_open == '?') {
9740 /* This is the only point in the code that sets PMf_ONCE: */
9741 pm->op_pmflags |= PMf_ONCE;
9743 /* Hence it's safe to do this bit of PMOP book-keeping here, which
9744 allows us to restrict the list needed by reset to just the ??
9746 assert(type != OP_TRANS);
9748 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9751 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9754 elements = mg->mg_len / sizeof(PMOP**);
9755 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9756 ((PMOP**)mg->mg_ptr) [elements++] = pm;
9757 mg->mg_len = elements * sizeof(PMOP**);
9758 PmopSTASH_set(pm,PL_curstash);
9765 /* if qr/...(?{..}).../, then need to parse the pattern within a new
9766 * anon CV. False positives like qr/[(?{]/ are harmless */
9768 if (type == OP_QR) {
9770 char *e, *p = SvPV(PL_lex_stuff, len);
9772 for (; p < e; p++) {
9773 if (p[0] == '(' && p[1] == '?'
9774 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9776 pm->op_pmflags |= PMf_HAS_CV;
9780 pm->op_pmflags |= PMf_IS_QR;
9783 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
9785 if (PL_madskills && modstart != s) {
9786 SV* tmptoken = newSVpvn(modstart, s - modstart);
9787 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9790 /* issue a warning if /c is specified,but /g is not */
9791 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9793 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9794 "Use of /c modifier is meaningless without /g" );
9797 PL_lex_op = (OP*)pm;
9798 pl_yylval.ival = OP_MATCH;
9803 S_scan_subst(pTHX_ char *start)
9811 char charset = '\0'; /* character set modifier */
9816 PERL_ARGS_ASSERT_SCAN_SUBST;
9818 pl_yylval.ival = OP_NULL;
9820 s = scan_str(start,!!PL_madskills,FALSE,FALSE,
9821 TRUE /* look for escaped bracketed metas */ );
9824 Perl_croak(aTHX_ "Substitution pattern not terminated");
9826 if (s[-1] == PL_multi_open)
9830 CURMAD('q', PL_thisopen);
9831 CURMAD('_', PL_thiswhite);
9832 CURMAD('E', PL_thisstuff);
9833 CURMAD('Q', PL_thisclose);
9834 PL_realtokenstart = s - SvPVX(PL_linestr);
9838 first_start = PL_multi_start;
9839 first_line = CopLINE(PL_curcop);
9840 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
9843 SvREFCNT_dec(PL_lex_stuff);
9844 PL_lex_stuff = NULL;
9846 Perl_croak(aTHX_ "Substitution replacement not terminated");
9848 PL_multi_start = first_start; /* so whole substitution is taken together */
9850 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9854 CURMAD('z', PL_thisopen);
9855 CURMAD('R', PL_thisstuff);
9856 CURMAD('Z', PL_thisclose);
9862 if (*s == EXEC_PAT_MOD) {
9866 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9875 curmad('m', newSVpvn(modstart, s - modstart));
9876 append_madprops(PL_thismad, (OP*)pm, 0);
9880 if ((pm->op_pmflags & PMf_CONTINUE)) {
9881 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9885 SV * const repl = newSVpvs("");
9888 pm->op_pmflags |= PMf_EVAL;
9891 sv_catpvs(repl, "eval ");
9893 sv_catpvs(repl, "do ");
9895 sv_catpvs(repl, "{");
9896 sv_catsv(repl, PL_sublex_info.repl);
9897 sv_catpvs(repl, "}");
9899 SvREFCNT_dec(PL_sublex_info.repl);
9900 PL_sublex_info.repl = repl;
9902 if (CopLINE(PL_curcop) != first_line) {
9903 sv_upgrade(PL_sublex_info.repl, SVt_PVNV);
9904 ((XPVNV*)SvANY(PL_sublex_info.repl))->xnv_u.xpad_cop_seq.xlow =
9905 CopLINE(PL_curcop) - first_line;
9906 CopLINE_set(PL_curcop, first_line);
9909 PL_lex_op = (OP*)pm;
9910 pl_yylval.ival = OP_SUBST;
9915 S_scan_trans(pTHX_ char *start)
9923 bool nondestruct = 0;
9928 PERL_ARGS_ASSERT_SCAN_TRANS;
9930 pl_yylval.ival = OP_NULL;
9932 s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE);
9934 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9936 if (s[-1] == PL_multi_open)
9940 CURMAD('q', PL_thisopen);
9941 CURMAD('_', PL_thiswhite);
9942 CURMAD('E', PL_thisstuff);
9943 CURMAD('Q', PL_thisclose);
9944 PL_realtokenstart = s - SvPVX(PL_linestr);
9948 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
9951 SvREFCNT_dec(PL_lex_stuff);
9952 PL_lex_stuff = NULL;
9954 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9957 CURMAD('z', PL_thisopen);
9958 CURMAD('R', PL_thisstuff);
9959 CURMAD('Z', PL_thisclose);
9962 complement = del = squash = 0;
9969 complement = OPpTRANS_COMPLEMENT;
9972 del = OPpTRANS_DELETE;
9975 squash = OPpTRANS_SQUASH;
9987 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9988 o->op_private &= ~OPpTRANS_ALL;
9989 o->op_private |= del|squash|complement|
9990 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9991 (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF : 0);
9994 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9999 curmad('m', newSVpvn(modstart, s - modstart));
10000 append_madprops(PL_thismad, o, 0);
10009 Takes a pointer to the first < in <<FOO.
10010 Returns a pointer to the byte following <<FOO.
10012 This function scans a heredoc, which involves different methods
10013 depending on whether we are in a string eval, quoted construct, etc.
10014 This is because PL_linestr could containing a single line of input, or
10015 a whole string being evalled, or the contents of the current quote-
10018 The two basic methods are:
10019 - Steal lines from the input stream
10020 - Scan the heredoc in PL_linestr and remove it therefrom
10022 In a file scope or filtered eval, the first method is used; in a
10023 string eval, the second.
10025 In a quote-like operator, we have to choose between the two,
10026 depending on where we can find a newline. We peek into outer lex-
10027 ing scopes until we find one with a newline in it. If we reach the
10028 outermost lexing scope and it is a file, we use the stream method.
10029 Otherwise it is treated as an eval.
10033 S_scan_heredoc(pTHX_ char *s)
10036 I32 op_type = OP_SCALAR;
10043 const bool infile = PL_rsfp || PL_parser->filtered;
10044 const line_t origline = CopLINE(PL_curcop);
10045 LEXSHARED *shared = PL_parser->lex_shared;
10047 I32 stuffstart = s - SvPVX(PL_linestr);
10050 PL_realtokenstart = -1;
10053 PERL_ARGS_ASSERT_SCAN_HEREDOC;
10056 d = PL_tokenbuf + 1;
10057 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
10058 *PL_tokenbuf = '\n';
10060 while (SPACE_OR_TAB(*peek))
10062 if (*peek == '`' || *peek == '\'' || *peek =='"') {
10065 s = delimcpy(d, e, s, PL_bufend, term, &len);
10066 if (s == PL_bufend)
10067 Perl_croak(aTHX_ "Unterminated delimiter for here document");
10073 /* <<\FOO is equivalent to <<'FOO' */
10077 if (!isWORDCHAR_lazy_if(s,UTF))
10078 deprecate("bare << to mean <<\"\"");
10079 for (; isWORDCHAR_lazy_if(s,UTF); s++) {
10084 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
10085 Perl_croak(aTHX_ "Delimiter for here document is too long");
10088 len = d - PL_tokenbuf;
10091 if (PL_madskills) {
10092 tstart = PL_tokenbuf + 1;
10093 PL_thisclose = newSVpvn(tstart, len - 1);
10094 tstart = SvPVX(PL_linestr) + stuffstart;
10095 PL_thisopen = newSVpvn(tstart, s - tstart);
10096 stuffstart = s - SvPVX(PL_linestr);
10099 #ifndef PERL_STRICT_CR
10100 d = strchr(s, '\r');
10102 char * const olds = s;
10104 while (s < PL_bufend) {
10110 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
10119 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10124 if (PL_madskills) {
10125 tstart = SvPVX(PL_linestr) + stuffstart;
10127 sv_catpvn(PL_thisstuff, tstart, s - tstart);
10129 PL_thisstuff = newSVpvn(tstart, s - tstart);
10132 stuffstart = s - SvPVX(PL_linestr);
10135 tmpstr = newSV_type(SVt_PVIV);
10136 SvGROW(tmpstr, 80);
10137 if (term == '\'') {
10138 op_type = OP_CONST;
10139 SvIV_set(tmpstr, -1);
10141 else if (term == '`') {
10142 op_type = OP_BACKTICK;
10143 SvIV_set(tmpstr, '\\');
10146 PL_multi_start = origline + 1 + PL_parser->herelines;
10147 PL_multi_open = PL_multi_close = '<';
10148 /* inside a string eval or quote-like operator */
10149 if (!infile || PL_lex_inwhat) {
10152 char * const olds = s;
10153 PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
10154 /* These two fields are not set until an inner lexing scope is
10155 entered. But we need them set here. */
10156 shared->ls_bufptr = s;
10157 shared->ls_linestr = PL_linestr;
10159 /* Look for a newline. If the current buffer does not have one,
10160 peek into the line buffer of the parent lexing scope, going
10161 up as many levels as necessary to find one with a newline
10164 while (!(s = (char *)memchr(
10165 (void *)shared->ls_bufptr, '\n',
10166 SvEND(shared->ls_linestr)-shared->ls_bufptr
10168 shared = shared->ls_prev;
10169 /* shared is only null if we have gone beyond the outermost
10170 lexing scope. In a file, we will have broken out of the
10171 loop in the previous iteration. In an eval, the string buf-
10172 fer ends with "\n;", so the while condition above will have
10173 evaluated to false. So shared can never be null. */
10175 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10176 most lexing scope. In a file, shared->ls_linestr at that
10177 level is just one line, so there is no body to steal. */
10178 if (infile && !shared->ls_prev) {
10184 s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10187 linestr = shared->ls_linestr;
10188 bufend = SvEND(linestr);
10190 while (s < bufend - len + 1 &&
10191 memNE(s,PL_tokenbuf,len) ) {
10193 ++PL_parser->herelines;
10195 if (s >= bufend - len + 1) {
10198 sv_setpvn(tmpstr,d+1,s-d);
10200 if (PL_madskills) {
10202 sv_catpvn(PL_thisstuff, d + 1, s - d);
10204 PL_thisstuff = newSVpvn(d + 1, s - d);
10205 stuffstart = s - SvPVX(PL_linestr);
10209 /* the preceding stmt passes a newline */
10210 PL_parser->herelines++;
10212 /* s now points to the newline after the heredoc terminator.
10213 d points to the newline before the body of the heredoc.
10216 /* We are going to modify linestr in place here, so set
10217 aside copies of the string if necessary for re-evals or
10219 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10220 check shared->re_eval_str. */
10221 if (shared->re_eval_start || shared->re_eval_str) {
10222 /* Set aside the rest of the regexp */
10223 if (!shared->re_eval_str)
10224 shared->re_eval_str =
10225 newSVpvn(shared->re_eval_start,
10226 bufend - shared->re_eval_start);
10227 shared->re_eval_start -= s-d;
10229 if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
10230 CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
10231 cx->blk_eval.cur_text == linestr)
10233 cx->blk_eval.cur_text = newSVsv(linestr);
10234 SvSCREAM_on(cx->blk_eval.cur_text);
10236 /* Copy everything from s onwards back to d. */
10237 Move(s,d,bufend-s + 1,char);
10238 SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10239 /* Setting PL_bufend only applies when we have not dug deeper
10240 into other scopes, because sublex_done sets PL_bufend to
10241 SvEND(PL_linestr). */
10242 if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
10249 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
10250 term = PL_tokenbuf[1];
10252 linestr_save = PL_linestr; /* must restore this afterwards */
10253 d = s; /* and this */
10254 PL_linestr = newSVpvs("");
10255 PL_bufend = SvPVX(PL_linestr);
10258 if (PL_madskills) {
10259 tstart = SvPVX(PL_linestr) + stuffstart;
10261 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10263 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10266 PL_bufptr = PL_bufend;
10267 CopLINE_set(PL_curcop,
10268 origline + 1 + PL_parser->herelines);
10269 if (!lex_next_chunk(LEX_NO_TERM)
10270 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
10271 SvREFCNT_dec(linestr_save);
10274 CopLINE_set(PL_curcop, origline);
10275 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10276 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10277 /* ^That should be enough to avoid this needing to grow: */
10278 sv_catpvs(PL_linestr, "\n\0");
10279 assert(s == SvPVX(PL_linestr));
10280 PL_bufend = SvEND(PL_linestr);
10284 stuffstart = s - SvPVX(PL_linestr);
10286 PL_parser->herelines++;
10287 PL_last_lop = PL_last_uni = NULL;
10288 #ifndef PERL_STRICT_CR
10289 if (PL_bufend - PL_linestart >= 2) {
10290 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
10291 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10293 PL_bufend[-2] = '\n';
10295 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10297 else if (PL_bufend[-1] == '\r')
10298 PL_bufend[-1] = '\n';
10300 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10301 PL_bufend[-1] = '\n';
10303 if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
10304 SvREFCNT_dec(PL_linestr);
10305 PL_linestr = linestr_save;
10306 PL_linestart = SvPVX(linestr_save);
10307 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10312 sv_catsv(tmpstr,PL_linestr);
10316 PL_multi_end = origline + PL_parser->herelines;
10317 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10318 SvPV_shrink_to_cur(tmpstr);
10321 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10323 else if (PL_encoding)
10324 sv_recode_to_utf8(tmpstr, PL_encoding);
10326 PL_lex_stuff = tmpstr;
10327 pl_yylval.ival = op_type;
10331 SvREFCNT_dec(tmpstr);
10332 CopLINE_set(PL_curcop, origline);
10333 missingterm(PL_tokenbuf + 1);
10336 /* scan_inputsymbol
10337 takes: current position in input buffer
10338 returns: new position in input buffer
10339 side-effects: pl_yylval and lex_op are set.
10344 <FH> read from filehandle
10345 <pkg::FH> read from package qualified filehandle
10346 <pkg'FH> read from package qualified filehandle
10347 <$fh> read from filehandle in $fh
10348 <*.h> filename glob
10353 S_scan_inputsymbol(pTHX_ char *start)
10356 char *s = start; /* current position in buffer */
10359 char *d = PL_tokenbuf; /* start of temp holding space */
10360 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
10362 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10364 end = strchr(s, '\n');
10367 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
10369 /* die if we didn't have space for the contents of the <>,
10370 or if it didn't end, or if we see a newline
10373 if (len >= (I32)sizeof PL_tokenbuf)
10374 Perl_croak(aTHX_ "Excessively long <> operator");
10376 Perl_croak(aTHX_ "Unterminated <> operator");
10381 Remember, only scalar variables are interpreted as filehandles by
10382 this code. Anything more complex (e.g., <$fh{$num}>) will be
10383 treated as a glob() call.
10384 This code makes use of the fact that except for the $ at the front,
10385 a scalar variable and a filehandle look the same.
10387 if (*d == '$' && d[1]) d++;
10389 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10390 while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
10391 d += UTF ? UTF8SKIP(d) : 1;
10393 /* If we've tried to read what we allow filehandles to look like, and
10394 there's still text left, then it must be a glob() and not a getline.
10395 Use scan_str to pull out the stuff between the <> and treat it
10396 as nothing more than a string.
10399 if (d - PL_tokenbuf != len) {
10400 pl_yylval.ival = OP_GLOB;
10401 s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE);
10403 Perl_croak(aTHX_ "Glob not terminated");
10407 bool readline_overriden = FALSE;
10410 /* we're in a filehandle read situation */
10413 /* turn <> into <ARGV> */
10415 Copy("ARGV",d,5,char);
10417 /* Check whether readline() is overriden */
10418 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
10420 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
10422 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
10423 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
10424 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
10425 readline_overriden = TRUE;
10427 /* if <$fh>, create the ops to turn the variable into a
10431 /* try to find it in the pad for this block, otherwise find
10432 add symbol table ops
10434 const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
10435 if (tmp != NOT_IN_PAD) {
10436 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10437 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10438 HEK * const stashname = HvNAME_HEK(stash);
10439 SV * const sym = sv_2mortal(newSVhek(stashname));
10440 sv_catpvs(sym, "::");
10441 sv_catpv(sym, d+1);
10446 OP * const o = newOP(OP_PADSV, 0);
10448 PL_lex_op = readline_overriden
10449 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10450 op_append_elem(OP_LIST, o,
10451 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10452 : (OP*)newUNOP(OP_READLINE, 0, o);
10461 ? (GV_ADDMULTI | GV_ADDINEVAL)
10462 : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
10464 PL_lex_op = readline_overriden
10465 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10466 op_append_elem(OP_LIST,
10467 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10468 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10469 : (OP*)newUNOP(OP_READLINE, 0,
10470 newUNOP(OP_RV2SV, 0,
10471 newGVOP(OP_GV, 0, gv)));
10473 if (!readline_overriden)
10474 PL_lex_op->op_flags |= OPf_SPECIAL;
10475 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10476 pl_yylval.ival = OP_NULL;
10479 /* If it's none of the above, it must be a literal filehandle
10480 (<Foo::BAR> or <FOO>) so build a simple readline OP */
10482 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
10483 PL_lex_op = readline_overriden
10484 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10485 op_append_elem(OP_LIST,
10486 newGVOP(OP_GV, 0, gv),
10487 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10488 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
10489 pl_yylval.ival = OP_NULL;
10499 start position in buffer
10500 keep_quoted preserve \ on the embedded delimiter(s)
10501 keep_delims preserve the delimiters around the string
10502 re_reparse compiling a run-time /(?{})/:
10503 collapse // to /, and skip encoding src
10504 deprecate_escaped_meta issue a deprecation warning for cer-
10505 tain paired metacharacters that appear
10507 returns: position to continue reading from buffer
10508 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10509 updates the read buffer.
10511 This subroutine pulls a string out of the input. It is called for:
10512 q single quotes q(literal text)
10513 ' single quotes 'literal text'
10514 qq double quotes qq(interpolate $here please)
10515 " double quotes "interpolate $here please"
10516 qx backticks qx(/bin/ls -l)
10517 ` backticks `/bin/ls -l`
10518 qw quote words @EXPORT_OK = qw( func() $spam )
10519 m// regexp match m/this/
10520 s/// regexp substitute s/this/that/
10521 tr/// string transliterate tr/this/that/
10522 y/// string transliterate y/this/that/
10523 ($*@) sub prototypes sub foo ($)
10524 (stuff) sub attr parameters sub foo : attr(stuff)
10525 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
10527 In most of these cases (all but <>, patterns and transliterate)
10528 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
10529 calls scan_str(). s/// makes yylex() call scan_subst() which calls
10530 scan_str(). tr/// and y/// make yylex() call scan_trans() which
10533 It skips whitespace before the string starts, and treats the first
10534 character as the delimiter. If the delimiter is one of ([{< then
10535 the corresponding "close" character )]}> is used as the closing
10536 delimiter. It allows quoting of delimiters, and if the string has
10537 balanced delimiters ([{<>}]) it allows nesting.
10539 On success, the SV with the resulting string is put into lex_stuff or,
10540 if that is already non-NULL, into lex_repl. The second case occurs only
10541 when parsing the RHS of the special constructs s/// and tr/// (y///).
10542 For convenience, the terminating delimiter character is stuffed into
10547 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
10548 bool deprecate_escaped_meta
10552 SV *sv; /* scalar value: string */
10553 const char *tmps; /* temp string, used for delimiter matching */
10554 char *s = start; /* current position in the buffer */
10555 char term; /* terminating character */
10556 char *to; /* current position in the sv's data */
10557 I32 brackets = 1; /* bracket nesting level */
10558 bool has_utf8 = FALSE; /* is there any utf8 content? */
10559 I32 termcode; /* terminating char. code */
10560 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
10561 STRLEN termlen; /* length of terminating string */
10562 int last_off = 0; /* last position for nesting bracket */
10563 char *escaped_open = NULL;
10570 PERL_ARGS_ASSERT_SCAN_STR;
10572 /* skip space before the delimiter */
10578 if (PL_realtokenstart >= 0) {
10579 stuffstart = PL_realtokenstart;
10580 PL_realtokenstart = -1;
10583 stuffstart = start - SvPVX(PL_linestr);
10585 /* mark where we are, in case we need to report errors */
10588 /* after skipping whitespace, the next character is the terminator */
10591 termcode = termstr[0] = term;
10595 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10596 Copy(s, termstr, termlen, U8);
10597 if (!UTF8_IS_INVARIANT(term))
10601 /* mark where we are */
10602 PL_multi_start = CopLINE(PL_curcop);
10603 PL_multi_open = term;
10604 herelines = PL_parser->herelines;
10606 /* find corresponding closing delimiter */
10607 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10608 termcode = termstr[0] = term = tmps[5];
10610 PL_multi_close = term;
10612 /* A warning is raised if the input parameter requires it for escaped (by a
10613 * backslash) paired metacharacters {} [] and () when the delimiters are
10614 * those same characters, and the backslash is ineffective. This doesn't
10615 * happen for <>, as they aren't metas. */
10616 if (deprecate_escaped_meta
10617 && (PL_multi_open == PL_multi_close
10618 || PL_multi_open == '<'
10619 || ! ckWARN_d(WARN_DEPRECATED)))
10621 deprecate_escaped_meta = FALSE;
10624 /* create a new SV to hold the contents. 79 is the SV's initial length.
10625 What a random number. */
10626 sv = newSV_type(SVt_PVIV);
10628 SvIV_set(sv, termcode);
10629 (void)SvPOK_only(sv); /* validate pointer */
10631 /* move past delimiter and try to read a complete string */
10633 sv_catpvn(sv, s, termlen);
10636 tstart = SvPVX(PL_linestr) + stuffstart;
10637 if (PL_madskills && !PL_thisopen && !keep_delims) {
10638 PL_thisopen = newSVpvn(tstart, s - tstart);
10639 stuffstart = s - SvPVX(PL_linestr);
10643 if (PL_encoding && !UTF && !re_reparse) {
10647 int offset = s - SvPVX_const(PL_linestr);
10648 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
10649 &offset, (char*)termstr, termlen);
10653 if (SvIsCOW(PL_linestr)) {
10654 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos;
10655 STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos;
10656 STRLEN last_lop_pos, re_eval_start_pos, s_pos;
10657 char *buf = SvPVX(PL_linestr);
10658 bufend_pos = PL_parser->bufend - buf;
10659 bufptr_pos = PL_parser->bufptr - buf;
10660 oldbufptr_pos = PL_parser->oldbufptr - buf;
10661 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
10662 linestart_pos = PL_parser->linestart - buf;
10663 last_uni_pos = PL_parser->last_uni
10664 ? PL_parser->last_uni - buf
10666 last_lop_pos = PL_parser->last_lop
10667 ? PL_parser->last_lop - buf
10669 re_eval_start_pos =
10670 PL_parser->lex_shared->re_eval_start ?
10671 PL_parser->lex_shared->re_eval_start - buf : 0;
10674 sv_force_normal(PL_linestr);
10676 buf = SvPVX(PL_linestr);
10677 PL_parser->bufend = buf + bufend_pos;
10678 PL_parser->bufptr = buf + bufptr_pos;
10679 PL_parser->oldbufptr = buf + oldbufptr_pos;
10680 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
10681 PL_parser->linestart = buf + linestart_pos;
10682 if (PL_parser->last_uni)
10683 PL_parser->last_uni = buf + last_uni_pos;
10684 if (PL_parser->last_lop)
10685 PL_parser->last_lop = buf + last_lop_pos;
10686 if (PL_parser->lex_shared->re_eval_start)
10687 PL_parser->lex_shared->re_eval_start =
10688 buf + re_eval_start_pos;
10691 ns = SvPVX_const(PL_linestr) + offset;
10692 svlast = SvEND(sv) - 1;
10694 for (; s < ns; s++) {
10695 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10696 COPLINE_INC_WITH_HERELINES;
10699 goto read_more_line;
10701 /* handle quoted delimiters */
10702 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10704 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10706 if ((svlast-1 - t) % 2) {
10707 if (!keep_quoted) {
10708 *(svlast-1) = term;
10710 SvCUR_set(sv, SvCUR(sv) - 1);
10715 if (PL_multi_open == PL_multi_close) {
10721 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
10722 /* At here, all closes are "was quoted" one,
10723 so we don't check PL_multi_close. */
10725 if (!keep_quoted && *(t+1) == PL_multi_open)
10730 else if (*t == PL_multi_open)
10738 SvCUR_set(sv, w - SvPVX_const(sv));
10740 last_off = w - SvPVX(sv);
10741 if (--brackets <= 0)
10746 if (!keep_delims) {
10747 SvCUR_set(sv, SvCUR(sv) - 1);
10753 /* extend sv if need be */
10754 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10755 /* set 'to' to the next character in the sv's string */
10756 to = SvPVX(sv)+SvCUR(sv);
10758 /* if open delimiter is the close delimiter read unbridle */
10759 if (PL_multi_open == PL_multi_close) {
10760 for (; s < PL_bufend; s++,to++) {
10761 /* embedded newlines increment the current line number */
10762 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10763 COPLINE_INC_WITH_HERELINES;
10764 /* handle quoted delimiters */
10765 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10768 || (re_reparse && s[1] == '\\'))
10771 /* any other quotes are simply copied straight through */
10775 /* terminate when run out of buffer (the for() condition), or
10776 have found the terminator */
10777 else if (*s == term) {
10780 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10783 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10789 /* if the terminator isn't the same as the start character (e.g.,
10790 matched brackets), we have to allow more in the quoting, and
10791 be prepared for nested brackets.
10794 /* read until we run out of string, or we find the terminator */
10795 for (; s < PL_bufend; s++,to++) {
10796 /* embedded newlines increment the line count */
10797 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10798 COPLINE_INC_WITH_HERELINES;
10799 /* backslashes can escape the open or closing characters */
10800 if (*s == '\\' && s+1 < PL_bufend) {
10801 if (!keep_quoted &&
10802 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10806 /* Here, 'deprecate_escaped_meta' is true iff the
10807 * delimiters are paired metacharacters, and 's' points
10808 * to an occurrence of one of them within the string,
10809 * which was preceded by a backslash. If this is a
10810 * context where the delimiter is also a metacharacter,
10811 * the backslash is useless, and deprecated. () and []
10812 * are meta in any context. {} are meta only when
10813 * appearing in a quantifier or in things like '\p{'
10814 * (but '\\p{' isn't meta). They also aren't meta
10815 * unless there is a matching closed, escaped char
10816 * later on within the string. If 's' points to an
10817 * open, set a flag; if to a close, test that flag, and
10818 * raise a warning if it was set */
10820 if (deprecate_escaped_meta) {
10821 if (*s == PL_multi_open) {
10825 /* Look for a closing '\}' */
10826 else if (regcurly(s, TRUE)) {
10829 /* Look for e.g. '\x{' */
10830 else if (s - start > 2
10831 && _generic_isCC(*(s-2),
10832 _CC_BACKSLASH_FOO_LBRACE_IS_META))
10833 { /* Exclude '\\x', '\\\\x', etc. */
10834 char *lookbehind = s - 4;
10835 bool is_meta = TRUE;
10836 while (lookbehind >= start
10837 && *lookbehind == '\\')
10839 is_meta = ! is_meta;
10847 else if (escaped_open) {
10848 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
10849 "Useless use of '\\'; doesn't escape metacharacter '%c'", PL_multi_open);
10850 escaped_open = NULL;
10857 /* allow nested opens and closes */
10858 else if (*s == PL_multi_close && --brackets <= 0)
10860 else if (*s == PL_multi_open)
10862 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10867 /* terminate the copied string and update the sv's end-of-string */
10869 SvCUR_set(sv, to - SvPVX_const(sv));
10872 * this next chunk reads more into the buffer if we're not done yet
10876 break; /* handle case where we are done yet :-) */
10878 #ifndef PERL_STRICT_CR
10879 if (to - SvPVX_const(sv) >= 2) {
10880 if ((to[-2] == '\r' && to[-1] == '\n') ||
10881 (to[-2] == '\n' && to[-1] == '\r'))
10885 SvCUR_set(sv, to - SvPVX_const(sv));
10887 else if (to[-1] == '\r')
10890 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10895 /* if we're out of file, or a read fails, bail and reset the current
10896 line marker so we can report where the unterminated string began
10899 if (PL_madskills) {
10900 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10902 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10904 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10907 COPLINE_INC_WITH_HERELINES;
10908 PL_bufptr = PL_bufend;
10909 if (!lex_next_chunk(0)) {
10911 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10920 /* at this point, we have successfully read the delimited string */
10922 if (!PL_encoding || UTF || re_reparse) {
10924 if (PL_madskills) {
10925 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10926 const int len = s - tstart;
10928 sv_catpvn(PL_thisstuff, tstart, len);
10930 PL_thisstuff = newSVpvn(tstart, len);
10931 if (!PL_thisclose && !keep_delims)
10932 PL_thisclose = newSVpvn(s,termlen);
10937 sv_catpvn(sv, s, termlen);
10942 if (PL_madskills) {
10943 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10944 const int len = s - tstart - termlen;
10946 sv_catpvn(PL_thisstuff, tstart, len);
10948 PL_thisstuff = newSVpvn(tstart, len);
10949 if (!PL_thisclose && !keep_delims)
10950 PL_thisclose = newSVpvn(s - termlen,termlen);
10954 if (has_utf8 || (PL_encoding && !re_reparse))
10957 PL_multi_end = CopLINE(PL_curcop);
10958 CopLINE_set(PL_curcop, PL_multi_start);
10959 PL_parser->herelines = herelines;
10961 /* if we allocated too much space, give some back */
10962 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10963 SvLEN_set(sv, SvCUR(sv) + 1);
10964 SvPV_renew(sv, SvLEN(sv));
10967 /* decide whether this is the first or second quoted string we've read
10972 PL_sublex_info.repl = sv;
10980 takes: pointer to position in buffer
10981 returns: pointer to new position in buffer
10982 side-effects: builds ops for the constant in pl_yylval.op
10984 Read a number in any of the formats that Perl accepts:
10986 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10987 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
10990 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10992 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10995 If it reads a number without a decimal point or an exponent, it will
10996 try converting the number to an integer and see if it can do so
10997 without loss of precision.
11001 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
11004 const char *s = start; /* current position in buffer */
11005 char *d; /* destination in temp buffer */
11006 char *e; /* end of temp buffer */
11007 NV nv; /* number read, as a double */
11008 SV *sv = NULL; /* place to put the converted number */
11009 bool floatit; /* boolean: int or float? */
11010 const char *lastub = NULL; /* position of last underbar */
11011 static const char* const number_too_long = "Number too long";
11013 PERL_ARGS_ASSERT_SCAN_NUM;
11015 /* We use the first character to decide what type of number this is */
11019 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
11021 /* if it starts with a 0, it could be an octal number, a decimal in
11022 0.13 disguise, or a hexadecimal number, or a binary number. */
11026 u holds the "number so far"
11027 shift the power of 2 of the base
11028 (hex == 4, octal == 3, binary == 1)
11029 overflowed was the number more than we can hold?
11031 Shift is used when we add a digit. It also serves as an "are
11032 we in octal/hex/binary?" indicator to disallow hex characters
11033 when in octal mode.
11038 bool overflowed = FALSE;
11039 bool just_zero = TRUE; /* just plain 0 or binary number? */
11040 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11041 static const char* const bases[5] =
11042 { "", "binary", "", "octal", "hexadecimal" };
11043 static const char* const Bases[5] =
11044 { "", "Binary", "", "Octal", "Hexadecimal" };
11045 static const char* const maxima[5] =
11047 "0b11111111111111111111111111111111",
11051 const char *base, *Base, *max;
11053 /* check for hex */
11054 if (s[1] == 'x' || s[1] == 'X') {
11058 } else if (s[1] == 'b' || s[1] == 'B') {
11063 /* check for a decimal in disguise */
11064 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
11066 /* so it must be octal */
11073 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11074 "Misplaced _ in number");
11078 base = bases[shift];
11079 Base = Bases[shift];
11080 max = maxima[shift];
11082 /* read the rest of the number */
11084 /* x is used in the overflow test,
11085 b is the digit we're adding on. */
11090 /* if we don't mention it, we're done */
11094 /* _ are ignored -- but warned about if consecutive */
11096 if (lastub && s == lastub + 1)
11097 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11098 "Misplaced _ in number");
11102 /* 8 and 9 are not octal */
11103 case '8': case '9':
11105 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11109 case '2': case '3': case '4':
11110 case '5': case '6': case '7':
11112 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11115 case '0': case '1':
11116 b = *s++ & 15; /* ASCII digit -> value of digit */
11120 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11121 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11122 /* make sure they said 0x */
11125 b = (*s++ & 7) + 9;
11127 /* Prepare to put the digit we have onto the end
11128 of the number so far. We check for overflows.
11134 x = u << shift; /* make room for the digit */
11136 if ((x >> shift) != u
11137 && !(PL_hints & HINT_NEW_BINARY)) {
11140 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11141 "Integer overflow in %s number",
11144 u = x | b; /* add the digit to the end */
11147 n *= nvshift[shift];
11148 /* If an NV has not enough bits in its
11149 * mantissa to represent an UV this summing of
11150 * small low-order numbers is a waste of time
11151 * (because the NV cannot preserve the
11152 * low-order bits anyway): we could just
11153 * remember when did we overflow and in the
11154 * end just multiply n by the right
11162 /* if we get here, we had success: make a scalar value from
11167 /* final misplaced underbar check */
11168 if (s[-1] == '_') {
11169 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11173 if (n > 4294967295.0)
11174 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11175 "%s number > %s non-portable",
11181 if (u > 0xffffffff)
11182 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11183 "%s number > %s non-portable",
11188 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
11189 sv = new_constant(start, s - start, "integer",
11190 sv, NULL, NULL, 0);
11191 else if (PL_hints & HINT_NEW_BINARY)
11192 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
11197 handle decimal numbers.
11198 we're also sent here when we read a 0 as the first digit
11200 case '1': case '2': case '3': case '4': case '5':
11201 case '6': case '7': case '8': case '9': case '.':
11204 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
11207 /* read next group of digits and _ and copy into d */
11208 while (isDIGIT(*s) || *s == '_') {
11209 /* skip underscores, checking for misplaced ones
11213 if (lastub && s == lastub + 1)
11214 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11215 "Misplaced _ in number");
11219 /* check for end of fixed-length buffer */
11221 Perl_croak(aTHX_ "%s", number_too_long);
11222 /* if we're ok, copy the character */
11227 /* final misplaced underbar check */
11228 if (lastub && s == lastub + 1) {
11229 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11232 /* read a decimal portion if there is one. avoid
11233 3..5 being interpreted as the number 3. followed
11236 if (*s == '.' && s[1] != '.') {
11241 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11242 "Misplaced _ in number");
11246 /* copy, ignoring underbars, until we run out of digits.
11248 for (; isDIGIT(*s) || *s == '_'; s++) {
11249 /* fixed length buffer check */
11251 Perl_croak(aTHX_ "%s", number_too_long);
11253 if (lastub && s == lastub + 1)
11254 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11255 "Misplaced _ in number");
11261 /* fractional part ending in underbar? */
11262 if (s[-1] == '_') {
11263 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11264 "Misplaced _ in number");
11266 if (*s == '.' && isDIGIT(s[1])) {
11267 /* oops, it's really a v-string, but without the "v" */
11273 /* read exponent part, if present */
11274 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
11278 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
11279 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
11281 /* stray preinitial _ */
11283 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11284 "Misplaced _ in number");
11288 /* allow positive or negative exponent */
11289 if (*s == '+' || *s == '-')
11292 /* stray initial _ */
11294 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11295 "Misplaced _ in number");
11299 /* read digits of exponent */
11300 while (isDIGIT(*s) || *s == '_') {
11303 Perl_croak(aTHX_ "%s", number_too_long);
11307 if (((lastub && s == lastub + 1) ||
11308 (!isDIGIT(s[1]) && s[1] != '_')))
11309 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11310 "Misplaced _ in number");
11318 We try to do an integer conversion first if no characters
11319 indicating "float" have been found.
11324 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
11326 if (flags == IS_NUMBER_IN_UV) {
11328 sv = newSViv(uv); /* Prefer IVs over UVs. */
11331 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11332 if (uv <= (UV) IV_MIN)
11333 sv = newSViv(-(IV)uv);
11340 /* terminate the string */
11342 nv = Atof(PL_tokenbuf);
11347 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
11348 const char *const key = floatit ? "float" : "integer";
11349 const STRLEN keylen = floatit ? 5 : 7;
11350 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
11351 key, keylen, sv, NULL, NULL, 0);
11355 /* if it starts with a v, it could be a v-string */
11358 sv = newSV(5); /* preallocate storage space */
11359 ENTER_with_name("scan_vstring");
11361 s = scan_vstring(s, PL_bufend, sv);
11362 SvREFCNT_inc_simple_void_NN(sv);
11363 LEAVE_with_name("scan_vstring");
11367 /* make the op for the constant and return */
11370 lvalp->opval = newSVOP(OP_CONST, 0, sv);
11372 lvalp->opval = NULL;
11378 S_scan_formline(pTHX_ char *s)
11383 SV * const stuff = newSVpvs("");
11384 bool needargs = FALSE;
11385 bool eofmt = FALSE;
11387 char *tokenstart = s;
11388 SV* savewhite = NULL;
11390 if (PL_madskills) {
11391 savewhite = PL_thiswhite;
11396 PERL_ARGS_ASSERT_SCAN_FORMLINE;
11398 while (!needargs) {
11401 #ifdef PERL_STRICT_CR
11402 while (SPACE_OR_TAB(*t))
11405 while (SPACE_OR_TAB(*t) || *t == '\r')
11408 if (*t == '\n' || t == PL_bufend) {
11413 eol = (char *) memchr(s,'\n',PL_bufend-s);
11417 for (t = s; t < eol; t++) {
11418 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
11420 goto enough; /* ~~ must be first line in formline */
11422 if (*t == '@' || *t == '^')
11426 sv_catpvn(stuff, s, eol-s);
11427 #ifndef PERL_STRICT_CR
11428 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
11429 char *end = SvPVX(stuff) + SvCUR(stuff);
11432 SvCUR_set(stuff, SvCUR(stuff) - 1);
11440 if ((PL_rsfp || PL_parser->filtered)
11441 && PL_parser->form_lex_state == LEX_NORMAL) {
11444 if (PL_madskills) {
11446 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
11448 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
11451 PL_bufptr = PL_bufend;
11452 COPLINE_INC_WITH_HERELINES;
11453 got_some = lex_next_chunk(0);
11454 CopLINE_dec(PL_curcop);
11457 tokenstart = PL_bufptr;
11465 if (!SvCUR(stuff) || needargs)
11466 PL_lex_state = PL_parser->form_lex_state;
11467 if (SvCUR(stuff)) {
11468 PL_expect = XSTATE;
11470 start_force(PL_curforce);
11471 NEXTVAL_NEXTTOKE.ival = 0;
11472 force_next(FORMLBRACK);
11475 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
11477 else if (PL_encoding)
11478 sv_recode_to_utf8(stuff, PL_encoding);
11480 start_force(PL_curforce);
11481 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
11485 SvREFCNT_dec(stuff);
11487 PL_lex_formbrack = 0;
11490 if (PL_madskills) {
11492 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
11494 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
11495 PL_thiswhite = savewhite;
11502 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
11505 const I32 oldsavestack_ix = PL_savestack_ix;
11506 CV* const outsidecv = PL_compcv;
11508 SAVEI32(PL_subline);
11509 save_item(PL_subname);
11510 SAVESPTR(PL_compcv);
11512 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
11513 CvFLAGS(PL_compcv) |= flags;
11515 PL_subline = CopLINE(PL_curcop);
11516 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
11517 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
11518 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
11519 if (outsidecv && CvPADLIST(outsidecv))
11520 CvPADLIST(PL_compcv)->xpadl_outid =
11521 PadlistNAMES(CvPADLIST(outsidecv));
11523 return oldsavestack_ix;
11527 S_yywarn(pTHX_ const char *const s, U32 flags)
11531 PERL_ARGS_ASSERT_YYWARN;
11533 PL_in_eval |= EVAL_WARNONLY;
11534 yyerror_pv(s, flags);
11535 PL_in_eval &= ~EVAL_WARNONLY;
11540 Perl_yyerror(pTHX_ const char *const s)
11542 PERL_ARGS_ASSERT_YYERROR;
11543 return yyerror_pvn(s, strlen(s), 0);
11547 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11549 PERL_ARGS_ASSERT_YYERROR_PV;
11550 return yyerror_pvn(s, strlen(s), flags);
11554 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11557 const char *context = NULL;
11560 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
11561 int yychar = PL_parser->yychar;
11563 PERL_ARGS_ASSERT_YYERROR_PVN;
11565 if (!yychar || (yychar == ';' && !PL_rsfp))
11566 sv_catpvs(where_sv, "at EOF");
11567 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
11568 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
11569 PL_oldbufptr != PL_bufptr) {
11572 The code below is removed for NetWare because it abends/crashes on NetWare
11573 when the script has error such as not having the closing quotes like:
11574 if ($var eq "value)
11575 Checking of white spaces is anyway done in NetWare code.
11578 while (isSPACE(*PL_oldoldbufptr))
11581 context = PL_oldoldbufptr;
11582 contlen = PL_bufptr - PL_oldoldbufptr;
11584 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
11585 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
11588 The code below is removed for NetWare because it abends/crashes on NetWare
11589 when the script has error such as not having the closing quotes like:
11590 if ($var eq "value)
11591 Checking of white spaces is anyway done in NetWare code.
11594 while (isSPACE(*PL_oldbufptr))
11597 context = PL_oldbufptr;
11598 contlen = PL_bufptr - PL_oldbufptr;
11600 else if (yychar > 255)
11601 sv_catpvs(where_sv, "next token ???");
11602 else if (yychar == -2) { /* YYEMPTY */
11603 if (PL_lex_state == LEX_NORMAL ||
11604 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
11605 sv_catpvs(where_sv, "at end of line");
11606 else if (PL_lex_inpat)
11607 sv_catpvs(where_sv, "within pattern");
11609 sv_catpvs(where_sv, "within string");
11612 sv_catpvs(where_sv, "next char ");
11614 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11615 else if (isPRINT_LC(yychar)) {
11616 const char string = yychar;
11617 sv_catpvn(where_sv, &string, 1);
11620 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11622 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
11623 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
11624 OutCopFILE(PL_curcop),
11625 (IV)(PL_parser->preambling == NOLINE
11626 ? CopLINE(PL_curcop)
11627 : PL_parser->preambling));
11629 Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
11630 UTF8fARG(UTF, contlen, context));
11632 Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
11633 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11634 Perl_sv_catpvf(aTHX_ msg,
11635 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
11636 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11639 if (PL_in_eval & EVAL_WARNONLY) {
11640 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
11644 if (PL_error_count >= 10) {
11646 if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
11647 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11648 SVfARG(errsv), OutCopFILE(PL_curcop));
11650 Perl_croak(aTHX_ "%s has too many errors.\n",
11651 OutCopFILE(PL_curcop));
11654 PL_in_my_stash = NULL;
11659 S_swallow_bom(pTHX_ U8 *s)
11662 const STRLEN slen = SvCUR(PL_linestr);
11664 PERL_ARGS_ASSERT_SWALLOW_BOM;
11668 if (s[1] == 0xFE) {
11669 /* UTF-16 little-endian? (or UTF-32LE?) */
11670 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
11671 /* diag_listed_as: Unsupported script encoding %s */
11672 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11673 #ifndef PERL_NO_UTF16_FILTER
11674 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11676 if (PL_bufend > (char*)s) {
11677 s = add_utf16_textfilter(s, TRUE);
11680 /* diag_listed_as: Unsupported script encoding %s */
11681 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11686 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
11687 #ifndef PERL_NO_UTF16_FILTER
11688 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11690 if (PL_bufend > (char *)s) {
11691 s = add_utf16_textfilter(s, FALSE);
11694 /* diag_listed_as: Unsupported script encoding %s */
11695 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11699 case BOM_UTF8_FIRST_BYTE: {
11700 const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
11701 if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
11702 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11703 s += len + 1; /* UTF-8 */
11710 if (s[2] == 0xFE && s[3] == 0xFF) {
11711 /* UTF-32 big-endian */
11712 /* diag_listed_as: Unsupported script encoding %s */
11713 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11716 else if (s[2] == 0 && s[3] != 0) {
11719 * are a good indicator of UTF-16BE. */
11720 #ifndef PERL_NO_UTF16_FILTER
11721 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11722 s = add_utf16_textfilter(s, FALSE);
11724 /* diag_listed_as: Unsupported script encoding %s */
11725 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11731 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11734 * are a good indicator of UTF-16LE. */
11735 #ifndef PERL_NO_UTF16_FILTER
11736 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11737 s = add_utf16_textfilter(s, TRUE);
11739 /* diag_listed_as: Unsupported script encoding %s */
11740 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11748 #ifndef PERL_NO_UTF16_FILTER
11750 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11753 SV *const filter = FILTER_DATA(idx);
11754 /* We re-use this each time round, throwing the contents away before we
11756 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11757 SV *const utf8_buffer = filter;
11758 IV status = IoPAGE(filter);
11759 const bool reverse = cBOOL(IoLINES(filter));
11762 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11764 /* As we're automatically added, at the lowest level, and hence only called
11765 from this file, we can be sure that we're not called in block mode. Hence
11766 don't bother writing code to deal with block mode. */
11768 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11771 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11773 DEBUG_P(PerlIO_printf(Perl_debug_log,
11774 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11775 FPTR2DPTR(void *, S_utf16_textfilter),
11776 reverse ? 'l' : 'b', idx, maxlen, status,
11777 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11784 /* First, look in our buffer of existing UTF-8 data: */
11785 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11789 } else if (status == 0) {
11791 IoPAGE(filter) = 0;
11792 nl = SvEND(utf8_buffer);
11795 STRLEN got = nl - SvPVX(utf8_buffer);
11796 /* Did we have anything to append? */
11798 sv_catpvn(sv, SvPVX(utf8_buffer), got);
11799 /* Everything else in this code works just fine if SVp_POK isn't
11800 set. This, however, needs it, and we need it to work, else
11801 we loop infinitely because the buffer is never consumed. */
11802 sv_chop(utf8_buffer, nl);
11806 /* OK, not a complete line there, so need to read some more UTF-16.
11807 Read an extra octect if the buffer currently has an odd number. */
11811 if (SvCUR(utf16_buffer) >= 2) {
11812 /* Location of the high octet of the last complete code point.
11813 Gosh, UTF-16 is a pain. All the benefits of variable length,
11814 *coupled* with all the benefits of partial reads and
11816 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11817 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11819 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11823 /* We have the first half of a surrogate. Read more. */
11824 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11827 status = FILTER_READ(idx + 1, utf16_buffer,
11828 160 + (SvCUR(utf16_buffer) & 1));
11829 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
11830 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11833 IoPAGE(filter) = status;
11838 chars = SvCUR(utf16_buffer) >> 1;
11839 have = SvCUR(utf8_buffer);
11840 SvGROW(utf8_buffer, have + chars * 3 + 1);
11843 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11844 (U8*)SvPVX_const(utf8_buffer) + have,
11845 chars * 2, &newlen);
11847 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11848 (U8*)SvPVX_const(utf8_buffer) + have,
11849 chars * 2, &newlen);
11851 SvCUR_set(utf8_buffer, have + newlen);
11854 /* No need to keep this SV "well-formed" with a '\0' after the end, as
11855 it's private to us, and utf16_to_utf8{,reversed} take a
11856 (pointer,length) pair, rather than a NUL-terminated string. */
11857 if(SvCUR(utf16_buffer) & 1) {
11858 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11859 SvCUR_set(utf16_buffer, 1);
11861 SvCUR_set(utf16_buffer, 0);
11864 DEBUG_P(PerlIO_printf(Perl_debug_log,
11865 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11867 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11868 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11873 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11875 SV *filter = filter_add(S_utf16_textfilter, NULL);
11877 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11879 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11880 sv_setpvs(filter, "");
11881 IoLINES(filter) = reversed;
11882 IoPAGE(filter) = 1; /* Not EOF */
11884 /* Sadly, we have to return a valid pointer, come what may, so we have to
11885 ignore any error return from this. */
11886 SvCUR_set(PL_linestr, 0);
11887 if (FILTER_READ(0, PL_linestr, 0)) {
11888 SvUTF8_on(PL_linestr);
11890 SvUTF8_on(PL_linestr);
11892 PL_bufend = SvEND(PL_linestr);
11893 return (U8*)SvPVX(PL_linestr);
11898 Returns a pointer to the next character after the parsed
11899 vstring, as well as updating the passed in sv.
11901 Function must be called like
11903 sv = sv_2mortal(newSV(5));
11904 s = scan_vstring(s,e,sv);
11906 where s and e are the start and end of the string.
11907 The sv should already be large enough to store the vstring
11908 passed in, for performance reasons.
11910 This function may croak if fatal warnings are enabled in the
11911 calling scope, hence the sv_2mortal in the example (to prevent
11912 a leak). Make sure to do SvREFCNT_inc afterwards if you use
11918 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11921 const char *pos = s;
11922 const char *start = s;
11924 PERL_ARGS_ASSERT_SCAN_VSTRING;
11926 if (*pos == 'v') pos++; /* get past 'v' */
11927 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11929 if ( *pos != '.') {
11930 /* this may not be a v-string if followed by => */
11931 const char *next = pos;
11932 while (next < e && isSPACE(*next))
11934 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11935 /* return string not v-string */
11936 sv_setpvn(sv,(char *)s,pos-s);
11937 return (char *)pos;
11941 if (!isALPHA(*pos)) {
11942 U8 tmpbuf[UTF8_MAXBYTES+1];
11945 s++; /* get past 'v' */
11950 /* this is atoi() that tolerates underscores */
11953 const char *end = pos;
11955 while (--end >= s) {
11957 const UV orev = rev;
11958 rev += (*end - '0') * mult;
11961 /* diag_listed_as: Integer overflow in %s number */
11962 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11963 "Integer overflow in decimal number");
11967 if (rev > 0x7FFFFFFF)
11968 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11970 /* Append native character for the rev point */
11971 tmpend = uvchr_to_utf8(tmpbuf, rev);
11972 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11973 if (!UVCHR_IS_INVARIANT(rev))
11975 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11981 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11985 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11992 Perl_keyword_plugin_standard(pTHX_
11993 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11995 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11996 PERL_UNUSED_CONTEXT;
11997 PERL_UNUSED_ARG(keyword_ptr);
11998 PERL_UNUSED_ARG(keyword_len);
11999 PERL_UNUSED_ARG(op_ptr);
12000 return KEYWORD_PLUGIN_DECLINE;
12003 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
12005 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
12007 SAVEI32(PL_lex_brackets);
12008 if (PL_lex_brackets > 100)
12009 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
12010 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
12011 SAVEI32(PL_lex_allbrackets);
12012 PL_lex_allbrackets = 0;
12013 SAVEI8(PL_lex_fakeeof);
12014 PL_lex_fakeeof = (U8)fakeeof;
12015 if(yyparse(gramtype) && !PL_parser->error_count)
12016 qerror(Perl_mess(aTHX_ "Parse error"));
12019 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
12021 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
12025 SAVEVPTR(PL_eval_root);
12026 PL_eval_root = NULL;
12027 parse_recdescent(gramtype, fakeeof);
12033 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
12035 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
12038 if (flags & ~PARSE_OPTIONAL)
12039 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
12040 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
12041 if (!exprop && !(flags & PARSE_OPTIONAL)) {
12042 if (!PL_parser->error_count)
12043 qerror(Perl_mess(aTHX_ "Parse error"));
12044 exprop = newOP(OP_NULL, 0);
12050 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
12052 Parse a Perl arithmetic expression. This may contain operators of precedence
12053 down to the bit shift operators. The expression must be followed (and thus
12054 terminated) either by a comparison or lower-precedence operator or by
12055 something that would normally terminate an expression such as semicolon.
12056 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12057 otherwise it is mandatory. It is up to the caller to ensure that the
12058 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12059 the source of the code to be parsed and the lexical context for the
12062 The op tree representing the expression is returned. If an optional
12063 expression is absent, a null pointer is returned, otherwise the pointer
12066 If an error occurs in parsing or compilation, in most cases a valid op
12067 tree is returned anyway. The error is reflected in the parser state,
12068 normally resulting in a single exception at the top level of parsing
12069 which covers all the compilation errors that occurred. Some compilation
12070 errors, however, will throw an exception immediately.
12076 Perl_parse_arithexpr(pTHX_ U32 flags)
12078 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
12082 =for apidoc Amx|OP *|parse_termexpr|U32 flags
12084 Parse a Perl term expression. This may contain operators of precedence
12085 down to the assignment operators. The expression must be followed (and thus
12086 terminated) either by a comma or lower-precedence operator or by
12087 something that would normally terminate an expression such as semicolon.
12088 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12089 otherwise it is mandatory. It is up to the caller to ensure that the
12090 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12091 the source of the code to be parsed and the lexical context for the
12094 The op tree representing the expression is returned. If an optional
12095 expression is absent, a null pointer is returned, otherwise the pointer
12098 If an error occurs in parsing or compilation, in most cases a valid op
12099 tree is returned anyway. The error is reflected in the parser state,
12100 normally resulting in a single exception at the top level of parsing
12101 which covers all the compilation errors that occurred. Some compilation
12102 errors, however, will throw an exception immediately.
12108 Perl_parse_termexpr(pTHX_ U32 flags)
12110 return parse_expr(LEX_FAKEEOF_COMMA, flags);
12114 =for apidoc Amx|OP *|parse_listexpr|U32 flags
12116 Parse a Perl list expression. This may contain operators of precedence
12117 down to the comma operator. The expression must be followed (and thus
12118 terminated) either by a low-precedence logic operator such as C<or> or by
12119 something that would normally terminate an expression such as semicolon.
12120 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12121 otherwise it is mandatory. It is up to the caller to ensure that the
12122 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12123 the source of the code to be parsed and the lexical context for the
12126 The op tree representing the expression is returned. If an optional
12127 expression is absent, a null pointer is returned, otherwise the pointer
12130 If an error occurs in parsing or compilation, in most cases a valid op
12131 tree is returned anyway. The error is reflected in the parser state,
12132 normally resulting in a single exception at the top level of parsing
12133 which covers all the compilation errors that occurred. Some compilation
12134 errors, however, will throw an exception immediately.
12140 Perl_parse_listexpr(pTHX_ U32 flags)
12142 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
12146 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
12148 Parse a single complete Perl expression. This allows the full
12149 expression grammar, including the lowest-precedence operators such
12150 as C<or>. The expression must be followed (and thus terminated) by a
12151 token that an expression would normally be terminated by: end-of-file,
12152 closing bracketing punctuation, semicolon, or one of the keywords that
12153 signals a postfix expression-statement modifier. If I<flags> includes
12154 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
12155 mandatory. It is up to the caller to ensure that the dynamic parser
12156 state (L</PL_parser> et al) is correctly set to reflect the source of
12157 the code to be parsed and the lexical context for the expression.
12159 The op tree representing the expression is returned. If an optional
12160 expression is absent, a null pointer is returned, otherwise the pointer
12163 If an error occurs in parsing or compilation, in most cases a valid op
12164 tree is returned anyway. The error is reflected in the parser state,
12165 normally resulting in a single exception at the top level of parsing
12166 which covers all the compilation errors that occurred. Some compilation
12167 errors, however, will throw an exception immediately.
12173 Perl_parse_fullexpr(pTHX_ U32 flags)
12175 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
12179 =for apidoc Amx|OP *|parse_block|U32 flags
12181 Parse a single complete Perl code block. This consists of an opening
12182 brace, a sequence of statements, and a closing brace. The block
12183 constitutes a lexical scope, so C<my> variables and various compile-time
12184 effects can be contained within it. It is up to the caller to ensure
12185 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12186 reflect the source of the code to be parsed and the lexical context for
12189 The op tree representing the code block is returned. This is always a
12190 real op, never a null pointer. It will normally be a C<lineseq> list,
12191 including C<nextstate> or equivalent ops. No ops to construct any kind
12192 of runtime scope are included by virtue of it being a block.
12194 If an error occurs in parsing or compilation, in most cases a valid op
12195 tree (most likely null) is returned anyway. The error is reflected in
12196 the parser state, normally resulting in a single exception at the top
12197 level of parsing which covers all the compilation errors that occurred.
12198 Some compilation errors, however, will throw an exception immediately.
12200 The I<flags> parameter is reserved for future use, and must always
12207 Perl_parse_block(pTHX_ U32 flags)
12210 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
12211 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
12215 =for apidoc Amx|OP *|parse_barestmt|U32 flags
12217 Parse a single unadorned Perl statement. This may be a normal imperative
12218 statement or a declaration that has compile-time effect. It does not
12219 include any label or other affixture. It is up to the caller to ensure
12220 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12221 reflect the source of the code to be parsed and the lexical context for
12224 The op tree representing the statement is returned. This may be a
12225 null pointer if the statement is null, for example if it was actually
12226 a subroutine definition (which has compile-time side effects). If not
12227 null, it will be ops directly implementing the statement, suitable to
12228 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
12229 equivalent op (except for those embedded in a scope contained entirely
12230 within the statement).
12232 If an error occurs in parsing or compilation, in most cases a valid op
12233 tree (most likely null) is returned anyway. The error is reflected in
12234 the parser state, normally resulting in a single exception at the top
12235 level of parsing which covers all the compilation errors that occurred.
12236 Some compilation errors, however, will throw an exception immediately.
12238 The I<flags> parameter is reserved for future use, and must always
12245 Perl_parse_barestmt(pTHX_ U32 flags)
12248 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
12249 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
12253 =for apidoc Amx|SV *|parse_label|U32 flags
12255 Parse a single label, possibly optional, of the type that may prefix a
12256 Perl statement. It is up to the caller to ensure that the dynamic parser
12257 state (L</PL_parser> et al) is correctly set to reflect the source of
12258 the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
12259 label is optional, otherwise it is mandatory.
12261 The name of the label is returned in the form of a fresh scalar. If an
12262 optional label is absent, a null pointer is returned.
12264 If an error occurs in parsing, which can only occur if the label is
12265 mandatory, a valid label is returned anyway. The error is reflected in
12266 the parser state, normally resulting in a single exception at the top
12267 level of parsing which covers all the compilation errors that occurred.
12273 Perl_parse_label(pTHX_ U32 flags)
12275 if (flags & ~PARSE_OPTIONAL)
12276 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
12277 if (PL_lex_state == LEX_KNOWNEXT) {
12278 PL_parser->yychar = yylex();
12279 if (PL_parser->yychar == LABEL) {
12280 char * const lpv = pl_yylval.pval;
12281 STRLEN llen = strlen(lpv);
12282 PL_parser->yychar = YYEMPTY;
12283 return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
12290 STRLEN wlen, bufptr_pos;
12293 if (!isIDFIRST_lazy_if(s, UTF))
12295 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
12296 if (word_takes_any_delimeter(s, wlen))
12298 bufptr_pos = s - SvPVX(PL_linestr);
12300 lex_read_space(LEX_KEEP_PREVIOUS);
12302 s = SvPVX(PL_linestr) + bufptr_pos;
12303 if (t[0] == ':' && t[1] != ':') {
12304 PL_oldoldbufptr = PL_oldbufptr;
12307 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
12311 if (flags & PARSE_OPTIONAL) {
12314 qerror(Perl_mess(aTHX_ "Parse error"));
12315 return newSVpvs("x");
12322 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
12324 Parse a single complete Perl statement. This may be a normal imperative
12325 statement or a declaration that has compile-time effect, and may include
12326 optional labels. It is up to the caller to ensure that the dynamic
12327 parser state (L</PL_parser> et al) is correctly set to reflect the source
12328 of the code to be parsed and the lexical context for the statement.
12330 The op tree representing the statement is returned. This may be a
12331 null pointer if the statement is null, for example if it was actually
12332 a subroutine definition (which has compile-time side effects). If not
12333 null, it will be the result of a L</newSTATEOP> call, normally including
12334 a C<nextstate> or equivalent op.
12336 If an error occurs in parsing or compilation, in most cases a valid op
12337 tree (most likely null) is returned anyway. The error is reflected in
12338 the parser state, normally resulting in a single exception at the top
12339 level of parsing which covers all the compilation errors that occurred.
12340 Some compilation errors, however, will throw an exception immediately.
12342 The I<flags> parameter is reserved for future use, and must always
12349 Perl_parse_fullstmt(pTHX_ U32 flags)
12352 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
12353 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
12357 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
12359 Parse a sequence of zero or more Perl statements. These may be normal
12360 imperative statements, including optional labels, or declarations
12361 that have compile-time effect, or any mixture thereof. The statement
12362 sequence ends when a closing brace or end-of-file is encountered in a
12363 place where a new statement could have validly started. It is up to
12364 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
12365 is correctly set to reflect the source of the code to be parsed and the
12366 lexical context for the statements.
12368 The op tree representing the statement sequence is returned. This may
12369 be a null pointer if the statements were all null, for example if there
12370 were no statements or if there were only subroutine definitions (which
12371 have compile-time side effects). If not null, it will be a C<lineseq>
12372 list, normally including C<nextstate> or equivalent ops.
12374 If an error occurs in parsing or compilation, in most cases a valid op
12375 tree is returned anyway. The error is reflected in the parser state,
12376 normally resulting in a single exception at the top level of parsing
12377 which covers all the compilation errors that occurred. Some compilation
12378 errors, however, will throw an exception immediately.
12380 The I<flags> parameter is reserved for future use, and must always
12387 Perl_parse_stmtseq(pTHX_ U32 flags)
12392 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
12393 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
12394 c = lex_peek_unichar(0);
12395 if (c != -1 && c != /*{*/'}')
12396 qerror(Perl_mess(aTHX_ "Parse error"));
12402 * c-indentation-style: bsd
12403 * c-basic-offset: 4
12404 * indent-tabs-mode: nil
12407 * ex: set ts=8 sts=4 sw=4 et: