3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 =head1 Lexer interface
27 This is the lower layer of the Perl parser, managing characters and tokens.
29 =for apidoc AmU|yy_parser *|PL_parser
31 Pointer to a structure encapsulating the state of the parsing operation
32 currently in progress. The pointer can be locally changed to perform
33 a nested parse without interfering with the state of an outer parse.
34 Individual members of C<PL_parser> have their own documentation.
40 #define PERL_IN_TOKE_C
42 #include "dquote_static.c"
44 #define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
47 #define pl_yylval (PL_parser->yylval)
49 /* XXX temporary backwards compatibility */
50 #define PL_lex_brackets (PL_parser->lex_brackets)
51 #define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52 #define PL_lex_fakeeof (PL_parser->lex_fakeeof)
53 #define PL_lex_brackstack (PL_parser->lex_brackstack)
54 #define PL_lex_casemods (PL_parser->lex_casemods)
55 #define PL_lex_casestack (PL_parser->lex_casestack)
56 #define PL_lex_defer (PL_parser->lex_defer)
57 #define PL_lex_dojoin (PL_parser->lex_dojoin)
58 #define PL_lex_expect (PL_parser->lex_expect)
59 #define PL_lex_formbrack (PL_parser->lex_formbrack)
60 #define PL_lex_inpat (PL_parser->lex_inpat)
61 #define PL_lex_inwhat (PL_parser->lex_inwhat)
62 #define PL_lex_op (PL_parser->lex_op)
63 #define PL_lex_repl (PL_parser->lex_repl)
64 #define PL_lex_starts (PL_parser->lex_starts)
65 #define PL_lex_stuff (PL_parser->lex_stuff)
66 #define PL_multi_start (PL_parser->multi_start)
67 #define PL_multi_open (PL_parser->multi_open)
68 #define PL_multi_close (PL_parser->multi_close)
69 #define PL_preambled (PL_parser->preambled)
70 #define PL_sublex_info (PL_parser->sublex_info)
71 #define PL_linestr (PL_parser->linestr)
72 #define PL_expect (PL_parser->expect)
73 #define PL_copline (PL_parser->copline)
74 #define PL_bufptr (PL_parser->bufptr)
75 #define PL_oldbufptr (PL_parser->oldbufptr)
76 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
77 #define PL_linestart (PL_parser->linestart)
78 #define PL_bufend (PL_parser->bufend)
79 #define PL_last_uni (PL_parser->last_uni)
80 #define PL_last_lop (PL_parser->last_lop)
81 #define PL_last_lop_op (PL_parser->last_lop_op)
82 #define PL_lex_state (PL_parser->lex_state)
83 #define PL_rsfp (PL_parser->rsfp)
84 #define PL_rsfp_filters (PL_parser->rsfp_filters)
85 #define PL_in_my (PL_parser->in_my)
86 #define PL_in_my_stash (PL_parser->in_my_stash)
87 #define PL_tokenbuf (PL_parser->tokenbuf)
88 #define PL_multi_end (PL_parser->multi_end)
89 #define PL_error_count (PL_parser->error_count)
92 # define PL_endwhite (PL_parser->endwhite)
93 # define PL_faketokens (PL_parser->faketokens)
94 # define PL_lasttoke (PL_parser->lasttoke)
95 # define PL_nextwhite (PL_parser->nextwhite)
96 # define PL_realtokenstart (PL_parser->realtokenstart)
97 # define PL_skipwhite (PL_parser->skipwhite)
98 # define PL_thisclose (PL_parser->thisclose)
99 # define PL_thismad (PL_parser->thismad)
100 # define PL_thisopen (PL_parser->thisopen)
101 # define PL_thisstuff (PL_parser->thisstuff)
102 # define PL_thistoken (PL_parser->thistoken)
103 # define PL_thiswhite (PL_parser->thiswhite)
104 # define PL_thiswhite (PL_parser->thiswhite)
105 # define PL_nexttoke (PL_parser->nexttoke)
106 # define PL_curforce (PL_parser->curforce)
108 # define PL_nexttoke (PL_parser->nexttoke)
109 # define PL_nexttype (PL_parser->nexttype)
110 # define PL_nextval (PL_parser->nextval)
113 static const char* const ident_too_long = "Identifier too long";
116 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
117 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
119 # define CURMAD(slot,sv)
120 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
123 #define XENUMMASK 0x3f
124 #define XFAKEEOF 0x40
125 #define XFAKEBRACK 0x80
127 #ifdef USE_UTF8_SCRIPTS
128 # define UTF (!IN_BYTES)
130 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
133 /* The maximum number of characters preceding the unrecognized one to display */
134 #define UNRECOGNIZED_PRECEDE_COUNT 10
136 /* In variables named $^X, these are the legal values for X.
137 * 1999-02-27 mjd-perl-patch@plover.com */
138 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
140 #define SPACE_OR_TAB(c) isBLANK_A(c)
142 /* LEX_* are values for PL_lex_state, the state of the lexer.
143 * They are arranged oddly so that the guard on the switch statement
144 * can get by with a single comparison (if the compiler is smart enough).
146 * These values refer to the various states within a sublex parse,
147 * i.e. within a double quotish string
150 /* #define LEX_NOTPARSING 11 is done in perl.h. */
152 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
153 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
154 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
155 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
156 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
158 /* at end of code, eg "$x" followed by: */
159 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
160 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
162 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
163 string or after \E, $foo, etc */
164 #define LEX_INTERPCONST 2 /* NOT USED */
165 #define LEX_FORMLINE 1 /* expecting a format line */
166 #define LEX_KNOWNEXT 0 /* next token known; just return it */
170 static const char* const lex_state_names[] = {
189 #include "keywords.h"
191 /* CLINE is a macro that ensures PL_copline has a sane value */
196 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
199 # define SKIPSPACE0(s) skipspace0(s)
200 # define SKIPSPACE1(s) skipspace1(s)
201 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
202 # define PEEKSPACE(s) skipspace2(s,0)
204 # define SKIPSPACE0(s) skipspace(s)
205 # define SKIPSPACE1(s) skipspace(s)
206 # define SKIPSPACE2(s,tsv) skipspace(s)
207 # define PEEKSPACE(s) skipspace(s)
211 * Convenience functions to return different tokens and prime the
212 * lexer for the next token. They all take an argument.
214 * TOKEN : generic token (used for '(', DOLSHARP, etc)
215 * OPERATOR : generic operator
216 * AOPERATOR : assignment operator
217 * PREBLOCK : beginning the block after an if, while, foreach, ...
218 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
219 * PREREF : *EXPR where EXPR is not a simple identifier
220 * TERM : expression term
221 * LOOPX : loop exiting command (goto, last, dump, etc)
222 * FTST : file test operator
223 * FUN0 : zero-argument function
224 * FUN0OP : zero-argument function, with its op created in this file
225 * FUN1 : not used, except for not, which isn't a UNIOP
226 * BOop : bitwise or or xor
228 * SHop : shift operator
229 * PWop : power operator
230 * PMop : pattern-matching operator
231 * Aop : addition-level operator
232 * Mop : multiplication-level operator
233 * Eop : equality-testing operator
234 * Rop : relational operator <= != gt
236 * Also see LOP and lop() below.
239 #ifdef DEBUGGING /* Serve -DT. */
240 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
242 # define REPORT(retval) (retval)
245 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
246 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
247 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
248 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
249 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
250 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
251 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
252 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
253 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
254 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
255 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
256 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
257 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
258 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
259 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
260 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
261 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
262 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
263 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
264 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
265 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
267 /* This bit of chicanery makes a unary function followed by
268 * a parenthesis into a function with one argument, highest precedence.
269 * The UNIDOR macro is for unary functions that can be followed by the //
270 * operator (such as C<shift // 0>).
272 #define UNI3(f,x,have_x) { \
273 pl_yylval.ival = f; \
274 if (have_x) PL_expect = x; \
276 PL_last_uni = PL_oldbufptr; \
277 PL_last_lop_op = f; \
279 return REPORT( (int)FUNC1 ); \
281 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
283 #define UNI(f) UNI3(f,XTERM,1)
284 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
285 #define UNIPROTO(f,optional) { \
286 if (optional) PL_last_uni = PL_oldbufptr; \
290 #define UNIBRACK(f) UNI3(f,0,0)
292 /* grandfather return to old style */
295 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
296 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
297 pl_yylval.ival = (f); \
303 #define COPLINE_INC_WITH_HERELINES \
305 CopLINE_inc(PL_curcop); \
306 if (PL_parser->lex_shared->herelines) \
307 CopLINE(PL_curcop) += PL_parser->lex_shared->herelines, \
308 PL_parser->lex_shared->herelines = 0; \
314 /* how to interpret the pl_yylval associated with the token */
318 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
323 static struct debug_tokens {
325 enum token_type type;
327 } const debug_tokens[] =
329 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
330 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
331 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
332 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
333 { ARROW, TOKENTYPE_NONE, "ARROW" },
334 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
335 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
336 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
337 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
338 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
339 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
340 { DO, TOKENTYPE_NONE, "DO" },
341 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
342 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
343 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
344 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
345 { ELSE, TOKENTYPE_NONE, "ELSE" },
346 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
347 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
348 { FOR, TOKENTYPE_IVAL, "FOR" },
349 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
350 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
351 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
352 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
353 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
354 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
355 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
356 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
357 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
358 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
359 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
360 { IF, TOKENTYPE_IVAL, "IF" },
361 { LABEL, TOKENTYPE_PVAL, "LABEL" },
362 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
363 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
364 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
365 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
366 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
367 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
368 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
369 { MY, TOKENTYPE_IVAL, "MY" },
370 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
371 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
372 { OROP, TOKENTYPE_IVAL, "OROP" },
373 { OROR, TOKENTYPE_NONE, "OROR" },
374 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
375 { PEG, TOKENTYPE_NONE, "PEG" },
376 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
377 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
378 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
379 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
380 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
381 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
382 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
383 { PREINC, TOKENTYPE_NONE, "PREINC" },
384 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
385 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
386 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
387 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
388 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
389 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
390 { SUB, TOKENTYPE_NONE, "SUB" },
391 { THING, TOKENTYPE_OPVAL, "THING" },
392 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
393 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
394 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
395 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
396 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
397 { USE, TOKENTYPE_IVAL, "USE" },
398 { WHEN, TOKENTYPE_IVAL, "WHEN" },
399 { WHILE, TOKENTYPE_IVAL, "WHILE" },
400 { WORD, TOKENTYPE_OPVAL, "WORD" },
401 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
402 { 0, TOKENTYPE_NONE, NULL }
405 /* dump the returned token in rv, plus any optional arg in pl_yylval */
408 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
412 PERL_ARGS_ASSERT_TOKEREPORT;
415 const char *name = NULL;
416 enum token_type type = TOKENTYPE_NONE;
417 const struct debug_tokens *p;
418 SV* const report = newSVpvs("<== ");
420 for (p = debug_tokens; p->token; p++) {
421 if (p->token == (int)rv) {
428 Perl_sv_catpv(aTHX_ report, name);
429 else if ((char)rv > ' ' && (char)rv <= '~')
431 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
433 sv_catpvs(report, " (pending identifier)");
436 sv_catpvs(report, "EOF");
438 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
443 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
445 case TOKENTYPE_OPNUM:
446 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
447 PL_op_name[lvalp->ival]);
450 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
452 case TOKENTYPE_OPVAL:
454 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
455 PL_op_name[lvalp->opval->op_type]);
456 if (lvalp->opval->op_type == OP_CONST) {
457 Perl_sv_catpvf(aTHX_ report, " %s",
458 SvPEEK(cSVOPx_sv(lvalp->opval)));
463 sv_catpvs(report, "(opval=null)");
466 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
472 /* print the buffer with suitable escapes */
475 S_printbuf(pTHX_ const char *const fmt, const char *const s)
477 SV* const tmp = newSVpvs("");
479 PERL_ARGS_ASSERT_PRINTBUF;
481 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
488 S_deprecate_commaless_var_list(pTHX) {
490 deprecate("comma-less variable list");
491 return REPORT(','); /* grandfather non-comma-format format */
497 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
498 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
502 S_ao(pTHX_ int toketype)
505 if (*PL_bufptr == '=') {
507 if (toketype == ANDAND)
508 pl_yylval.ival = OP_ANDASSIGN;
509 else if (toketype == OROR)
510 pl_yylval.ival = OP_ORASSIGN;
511 else if (toketype == DORDOR)
512 pl_yylval.ival = OP_DORASSIGN;
520 * When Perl expects an operator and finds something else, no_op
521 * prints the warning. It always prints "<something> found where
522 * operator expected. It prints "Missing semicolon on previous line?"
523 * if the surprise occurs at the start of the line. "do you need to
524 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
525 * where the compiler doesn't know if foo is a method call or a function.
526 * It prints "Missing operator before end of line" if there's nothing
527 * after the missing operator, or "... before <...>" if there is something
528 * after the missing operator.
532 S_no_op(pTHX_ const char *const what, char *s)
535 char * const oldbp = PL_bufptr;
536 const bool is_first = (PL_oldbufptr == PL_linestart);
538 PERL_ARGS_ASSERT_NO_OP;
544 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
545 if (ckWARN_d(WARN_SYNTAX)) {
547 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
548 "\t(Missing semicolon on previous line?)\n");
549 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
551 for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
552 t += UTF ? UTF8SKIP(t) : 1)
554 if (t < PL_bufptr && isSPACE(*t))
555 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
556 "\t(Do you need to predeclare %"UTF8f"?)\n",
557 UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
561 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
562 "\t(Missing operator before %"UTF8f"?)\n",
563 UTF8fARG(UTF, s - oldbp, oldbp));
571 * Complain about missing quote/regexp/heredoc terminator.
572 * If it's called with NULL then it cauterizes the line buffer.
573 * If we're in a delimited string and the delimiter is a control
574 * character, it's reformatted into a two-char sequence like ^C.
579 S_missingterm(pTHX_ char *s)
585 char * const nl = strrchr(s,'\n');
589 else if (isCNTRL(PL_multi_close)) {
591 tmpbuf[1] = (char)toCTRL(PL_multi_close);
596 *tmpbuf = (char)PL_multi_close;
600 q = strchr(s,'"') ? '\'' : '"';
601 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
607 * Check whether the named feature is enabled.
610 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
613 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
615 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
617 assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
619 if (namelen > MAX_FEATURE_LEN)
621 memcpy(&he_name[8], name, namelen);
623 return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
624 REFCOUNTED_HE_EXISTS));
628 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
629 * utf16-to-utf8-reversed.
632 #ifdef PERL_CR_FILTER
636 const char *s = SvPVX_const(sv);
637 const char * const e = s + SvCUR(sv);
639 PERL_ARGS_ASSERT_STRIP_RETURN;
641 /* outer loop optimized to do nothing if there are no CR-LFs */
643 if (*s++ == '\r' && *s == '\n') {
644 /* hit a CR-LF, need to copy the rest */
648 if (*s == '\r' && s[1] == '\n')
659 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
661 const I32 count = FILTER_READ(idx+1, sv, maxlen);
662 if (count > 0 && !maxlen)
669 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
671 Creates and initialises a new lexer/parser state object, supplying
672 a context in which to lex and parse from a new source of Perl code.
673 A pointer to the new state object is placed in L</PL_parser>. An entry
674 is made on the save stack so that upon unwinding the new state object
675 will be destroyed and the former value of L</PL_parser> will be restored.
676 Nothing else need be done to clean up the parsing context.
678 The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
679 non-null, provides a string (in SV form) containing code to be parsed.
680 A copy of the string is made, so subsequent modification of I<line>
681 does not affect parsing. I<rsfp>, if non-null, provides an input stream
682 from which code will be read to be parsed. If both are non-null, the
683 code in I<line> comes first and must consist of complete lines of input,
684 and I<rsfp> supplies the remainder of the source.
686 The I<flags> parameter is reserved for future use. Currently it is only
687 used by perl internally, so extensions should always pass zero.
692 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
693 can share filters with the current parser.
694 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
695 caller, hence isn't owned by the parser, so shouldn't be closed on parser
696 destruction. This is used to handle the case of defaulting to reading the
697 script from the standard input because no filename was given on the command
698 line (without getting confused by situation where STDIN has been closed, so
699 the script handle is opened on fd 0) */
702 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
705 const char *s = NULL;
706 yy_parser *parser, *oparser;
707 if (flags && flags & ~LEX_START_FLAGS)
708 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
710 /* create and initialise a parser */
712 Newxz(parser, 1, yy_parser);
713 parser->old_parser = oparser = PL_parser;
716 parser->stack = NULL;
718 parser->stack_size = 0;
720 /* on scope exit, free this parser and restore any outer one */
722 parser->saved_curcop = PL_curcop;
724 /* initialise lexer state */
727 parser->curforce = -1;
729 parser->nexttoke = 0;
731 parser->error_count = oparser ? oparser->error_count : 0;
732 parser->copline = NOLINE;
733 parser->lex_state = LEX_NORMAL;
734 parser->expect = XSTATE;
736 parser->rsfp_filters =
737 !(flags & LEX_START_SAME_FILTER) || !oparser
739 : MUTABLE_AV(SvREFCNT_inc(
740 oparser->rsfp_filters
741 ? oparser->rsfp_filters
742 : (oparser->rsfp_filters = newAV())
745 Newx(parser->lex_brackstack, 120, char);
746 Newx(parser->lex_casestack, 12, char);
747 *parser->lex_casestack = '\0';
748 Newxz(parser->lex_shared, 1, LEXSHARED);
752 s = SvPV_const(line, len);
753 parser->linestr = flags & LEX_START_COPIED
754 ? SvREFCNT_inc_simple_NN(line)
755 : newSVpvn_flags(s, len, SvUTF8(line));
756 sv_catpvs(parser->linestr, "\n;");
758 parser->linestr = newSVpvs("\n;");
760 parser->oldoldbufptr =
763 parser->linestart = SvPVX(parser->linestr);
764 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
765 parser->last_lop = parser->last_uni = NULL;
766 parser->lex_flags = flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
767 |LEX_DONT_CLOSE_RSFP);
769 parser->in_pod = parser->filtered = 0;
773 /* delete a parser object */
776 Perl_parser_free(pTHX_ const yy_parser *parser)
778 PERL_ARGS_ASSERT_PARSER_FREE;
780 PL_curcop = parser->saved_curcop;
781 SvREFCNT_dec(parser->linestr);
783 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
784 PerlIO_clearerr(parser->rsfp);
785 else if (parser->rsfp && (!parser->old_parser ||
786 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
787 PerlIO_close(parser->rsfp);
788 SvREFCNT_dec(parser->rsfp_filters);
789 SvREFCNT_dec(parser->lex_stuff);
790 SvREFCNT_dec(parser->sublex_info.repl);
792 Safefree(parser->lex_brackstack);
793 Safefree(parser->lex_casestack);
794 Safefree(parser->lex_shared);
795 PL_parser = parser->old_parser;
800 Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab)
803 I32 nexttoke = parser->lasttoke;
805 I32 nexttoke = parser->nexttoke;
807 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
810 if (S_is_opval_token(parser->nexttoke[nexttoke].next_type
812 && parser->nexttoke[nexttoke].next_val.opval
813 && parser->nexttoke[nexttoke].next_val.opval->op_slabbed
814 && OpSLAB(parser->nexttoke[nexttoke].next_val.opval) == slab) {
815 op_free(parser->nexttoke[nexttoke].next_val.opval);
816 parser->nexttoke[nexttoke].next_val.opval = NULL;
819 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
820 && parser->nextval[nexttoke].opval
821 && parser->nextval[nexttoke].opval->op_slabbed
822 && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
823 op_free(parser->nextval[nexttoke].opval);
824 parser->nextval[nexttoke].opval = NULL;
832 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
834 Buffer scalar containing the chunk currently under consideration of the
835 text currently being lexed. This is always a plain string scalar (for
836 which C<SvPOK> is true). It is not intended to be used as a scalar by
837 normal scalar means; instead refer to the buffer directly by the pointer
838 variables described below.
840 The lexer maintains various C<char*> pointers to things in the
841 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
842 reallocated, all of these pointers must be updated. Don't attempt to
843 do this manually, but rather use L</lex_grow_linestr> if you need to
844 reallocate the buffer.
846 The content of the text chunk in the buffer is commonly exactly one
847 complete line of input, up to and including a newline terminator,
848 but there are situations where it is otherwise. The octets of the
849 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
850 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
851 flag on this scalar, which may disagree with it.
853 For direct examination of the buffer, the variable
854 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
855 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
856 of these pointers is usually preferable to examination of the scalar
857 through normal scalar means.
859 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
861 Direct pointer to the end of the chunk of text currently being lexed, the
862 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
863 + SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
864 always located at the end of the buffer, and does not count as part of
865 the buffer's contents.
867 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
869 Points to the current position of lexing inside the lexer buffer.
870 Characters around this point may be freely examined, within
871 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
872 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
873 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
875 Lexing code (whether in the Perl core or not) moves this pointer past
876 the characters that it consumes. It is also expected to perform some
877 bookkeeping whenever a newline character is consumed. This movement
878 can be more conveniently performed by the function L</lex_read_to>,
879 which handles newlines appropriately.
881 Interpretation of the buffer's octets can be abstracted out by
882 using the slightly higher-level functions L</lex_peek_unichar> and
883 L</lex_read_unichar>.
885 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
887 Points to the start of the current line inside the lexer buffer.
888 This is useful for indicating at which column an error occurred, and
889 not much else. This must be updated by any lexing code that consumes
890 a newline; the function L</lex_read_to> handles this detail.
896 =for apidoc Amx|bool|lex_bufutf8
898 Indicates whether the octets in the lexer buffer
899 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
900 of Unicode characters. If not, they should be interpreted as Latin-1
901 characters. This is analogous to the C<SvUTF8> flag for scalars.
903 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
904 contains valid UTF-8. Lexing code must be robust in the face of invalid
907 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
908 is significant, but not the whole story regarding the input character
909 encoding. Normally, when a file is being read, the scalar contains octets
910 and its C<SvUTF8> flag is off, but the octets should be interpreted as
911 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
912 however, the scalar may have the C<SvUTF8> flag on, and in this case its
913 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
914 is in effect. This logic may change in the future; use this function
915 instead of implementing the logic yourself.
921 Perl_lex_bufutf8(pTHX)
927 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
929 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
930 at least I<len> octets (including terminating NUL). Returns a
931 pointer to the reallocated buffer. This is necessary before making
932 any direct modification of the buffer that would increase its length.
933 L</lex_stuff_pvn> provides a more convenient way to insert text into
936 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
937 this function updates all of the lexer's variables that point directly
944 Perl_lex_grow_linestr(pTHX_ STRLEN len)
948 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
949 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
950 linestr = PL_parser->linestr;
951 buf = SvPVX(linestr);
952 if (len <= SvLEN(linestr))
954 bufend_pos = PL_parser->bufend - buf;
955 bufptr_pos = PL_parser->bufptr - buf;
956 oldbufptr_pos = PL_parser->oldbufptr - buf;
957 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
958 linestart_pos = PL_parser->linestart - buf;
959 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
960 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
961 re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
962 PL_parser->lex_shared->re_eval_start - buf : 0;
964 buf = sv_grow(linestr, len);
966 PL_parser->bufend = buf + bufend_pos;
967 PL_parser->bufptr = buf + bufptr_pos;
968 PL_parser->oldbufptr = buf + oldbufptr_pos;
969 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
970 PL_parser->linestart = buf + linestart_pos;
971 if (PL_parser->last_uni)
972 PL_parser->last_uni = buf + last_uni_pos;
973 if (PL_parser->last_lop)
974 PL_parser->last_lop = buf + last_lop_pos;
975 if (PL_parser->lex_shared->re_eval_start)
976 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
981 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
983 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
984 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
985 reallocating the buffer if necessary. This means that lexing code that
986 runs later will see the characters as if they had appeared in the input.
987 It is not recommended to do this as part of normal parsing, and most
988 uses of this facility run the risk of the inserted characters being
989 interpreted in an unintended manner.
991 The string to be inserted is represented by I<len> octets starting
992 at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
993 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
994 The characters are recoded for the lexer buffer, according to how the
995 buffer is currently being interpreted (L</lex_bufutf8>). If a string
996 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
997 function is more convenient.
1003 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1007 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1008 if (flags & ~(LEX_STUFF_UTF8))
1009 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1011 if (flags & LEX_STUFF_UTF8) {
1014 STRLEN highhalf = 0; /* Count of variants */
1015 const char *p, *e = pv+len;
1016 for (p = pv; p != e; p++) {
1017 if (! UTF8_IS_INVARIANT(*p)) {
1023 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1024 bufptr = PL_parser->bufptr;
1025 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1026 SvCUR_set(PL_parser->linestr,
1027 SvCUR(PL_parser->linestr) + len+highhalf);
1028 PL_parser->bufend += len+highhalf;
1029 for (p = pv; p != e; p++) {
1031 if (! UTF8_IS_INVARIANT(c)) {
1032 *bufptr++ = UTF8_TWO_BYTE_HI(c);
1033 *bufptr++ = UTF8_TWO_BYTE_LO(c);
1035 *bufptr++ = (char)c;
1040 if (flags & LEX_STUFF_UTF8) {
1041 STRLEN highhalf = 0;
1042 const char *p, *e = pv+len;
1043 for (p = pv; p != e; p++) {
1045 if (UTF8_IS_ABOVE_LATIN1(c)) {
1046 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1047 "non-Latin-1 character into Latin-1 input");
1048 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1051 } else if (! UTF8_IS_INVARIANT(c)) {
1052 /* malformed UTF-8 */
1054 SAVESPTR(PL_warnhook);
1055 PL_warnhook = PERL_WARNHOOK_FATAL;
1056 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
1062 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1063 bufptr = PL_parser->bufptr;
1064 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1065 SvCUR_set(PL_parser->linestr,
1066 SvCUR(PL_parser->linestr) + len-highhalf);
1067 PL_parser->bufend += len-highhalf;
1070 if (UTF8_IS_INVARIANT(*p)) {
1076 *bufptr++ = TWO_BYTE_UTF8_TO_UNI(*p, *(p+1));
1082 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1083 bufptr = PL_parser->bufptr;
1084 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1085 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1086 PL_parser->bufend += len;
1087 Copy(pv, bufptr, len, char);
1093 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1095 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1096 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1097 reallocating the buffer if necessary. This means that lexing code that
1098 runs later will see the characters as if they had appeared in the input.
1099 It is not recommended to do this as part of normal parsing, and most
1100 uses of this facility run the risk of the inserted characters being
1101 interpreted in an unintended manner.
1103 The string to be inserted is represented by octets starting at I<pv>
1104 and continuing to the first nul. These octets are interpreted as either
1105 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1106 in I<flags>. The characters are recoded for the lexer buffer, according
1107 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1108 If it is not convenient to nul-terminate a string to be inserted, the
1109 L</lex_stuff_pvn> function is more appropriate.
1115 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1117 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1118 lex_stuff_pvn(pv, strlen(pv), flags);
1122 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1124 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1125 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1126 reallocating the buffer if necessary. This means that lexing code that
1127 runs later will see the characters as if they had appeared in the input.
1128 It is not recommended to do this as part of normal parsing, and most
1129 uses of this facility run the risk of the inserted characters being
1130 interpreted in an unintended manner.
1132 The string to be inserted is the string value of I<sv>. The characters
1133 are recoded for the lexer buffer, according to how the buffer is currently
1134 being interpreted (L</lex_bufutf8>). If a string to be inserted is
1135 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1136 need to construct a scalar.
1142 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1146 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1148 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1150 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1154 =for apidoc Amx|void|lex_unstuff|char *ptr
1156 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1157 I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1158 This hides the discarded text from any lexing code that runs later,
1159 as if the text had never appeared.
1161 This is not the normal way to consume lexed text. For that, use
1168 Perl_lex_unstuff(pTHX_ char *ptr)
1172 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1173 buf = PL_parser->bufptr;
1175 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1178 bufend = PL_parser->bufend;
1180 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1181 unstuff_len = ptr - buf;
1182 Move(ptr, buf, bufend+1-ptr, char);
1183 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1184 PL_parser->bufend = bufend - unstuff_len;
1188 =for apidoc Amx|void|lex_read_to|char *ptr
1190 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1191 to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1192 performing the correct bookkeeping whenever a newline character is passed.
1193 This is the normal way to consume lexed text.
1195 Interpretation of the buffer's octets can be abstracted out by
1196 using the slightly higher-level functions L</lex_peek_unichar> and
1197 L</lex_read_unichar>.
1203 Perl_lex_read_to(pTHX_ char *ptr)
1206 PERL_ARGS_ASSERT_LEX_READ_TO;
1207 s = PL_parser->bufptr;
1208 if (ptr < s || ptr > PL_parser->bufend)
1209 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1210 for (; s != ptr; s++)
1212 COPLINE_INC_WITH_HERELINES;
1213 PL_parser->linestart = s+1;
1215 PL_parser->bufptr = ptr;
1219 =for apidoc Amx|void|lex_discard_to|char *ptr
1221 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1222 up to I<ptr>. The remaining content of the buffer will be moved, and
1223 all pointers into the buffer updated appropriately. I<ptr> must not
1224 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1225 it is not permitted to discard text that has yet to be lexed.
1227 Normally it is not necessarily to do this directly, because it suffices to
1228 use the implicit discarding behaviour of L</lex_next_chunk> and things
1229 based on it. However, if a token stretches across multiple lines,
1230 and the lexing code has kept multiple lines of text in the buffer for
1231 that purpose, then after completion of the token it would be wise to
1232 explicitly discard the now-unneeded earlier lines, to avoid future
1233 multi-line tokens growing the buffer without bound.
1239 Perl_lex_discard_to(pTHX_ char *ptr)
1243 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1244 buf = SvPVX(PL_parser->linestr);
1246 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1249 if (ptr > PL_parser->bufptr)
1250 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1251 discard_len = ptr - buf;
1252 if (PL_parser->oldbufptr < ptr)
1253 PL_parser->oldbufptr = ptr;
1254 if (PL_parser->oldoldbufptr < ptr)
1255 PL_parser->oldoldbufptr = ptr;
1256 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1257 PL_parser->last_uni = NULL;
1258 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1259 PL_parser->last_lop = NULL;
1260 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1261 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1262 PL_parser->bufend -= discard_len;
1263 PL_parser->bufptr -= discard_len;
1264 PL_parser->oldbufptr -= discard_len;
1265 PL_parser->oldoldbufptr -= discard_len;
1266 if (PL_parser->last_uni)
1267 PL_parser->last_uni -= discard_len;
1268 if (PL_parser->last_lop)
1269 PL_parser->last_lop -= discard_len;
1273 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1275 Reads in the next chunk of text to be lexed, appending it to
1276 L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1277 looked to the end of the current chunk and wants to know more. It is
1278 usual, but not necessary, for lexing to have consumed the entirety of
1279 the current chunk at this time.
1281 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1282 chunk (i.e., the current chunk has been entirely consumed), normally the
1283 current chunk will be discarded at the same time that the new chunk is
1284 read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1285 will not be discarded. If the current chunk has not been entirely
1286 consumed, then it will not be discarded regardless of the flag.
1288 Returns true if some new text was added to the buffer, or false if the
1289 buffer has reached the end of the input text.
1294 #define LEX_FAKE_EOF 0x80000000
1295 #define LEX_NO_TERM 0x40000000
1298 Perl_lex_next_chunk(pTHX_ U32 flags)
1302 STRLEN old_bufend_pos, new_bufend_pos;
1303 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1304 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1305 bool got_some_for_debugger = 0;
1307 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1308 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1309 linestr = PL_parser->linestr;
1310 buf = SvPVX(linestr);
1311 if (!(flags & LEX_KEEP_PREVIOUS) &&
1312 PL_parser->bufptr == PL_parser->bufend) {
1313 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1315 if (PL_parser->last_uni != PL_parser->bufend)
1316 PL_parser->last_uni = NULL;
1317 if (PL_parser->last_lop != PL_parser->bufend)
1318 PL_parser->last_lop = NULL;
1319 last_uni_pos = last_lop_pos = 0;
1323 old_bufend_pos = PL_parser->bufend - buf;
1324 bufptr_pos = PL_parser->bufptr - buf;
1325 oldbufptr_pos = PL_parser->oldbufptr - buf;
1326 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1327 linestart_pos = PL_parser->linestart - buf;
1328 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1329 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1331 if (flags & LEX_FAKE_EOF) {
1333 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1335 } else if (filter_gets(linestr, old_bufend_pos)) {
1337 got_some_for_debugger = 1;
1338 } else if (flags & LEX_NO_TERM) {
1341 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1342 sv_setpvs(linestr, "");
1344 /* End of real input. Close filehandle (unless it was STDIN),
1345 * then add implicit termination.
1347 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1348 PerlIO_clearerr(PL_parser->rsfp);
1349 else if (PL_parser->rsfp)
1350 (void)PerlIO_close(PL_parser->rsfp);
1351 PL_parser->rsfp = NULL;
1352 PL_parser->in_pod = PL_parser->filtered = 0;
1354 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1357 if (!PL_in_eval && PL_minus_p) {
1359 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1360 PL_minus_n = PL_minus_p = 0;
1361 } else if (!PL_in_eval && PL_minus_n) {
1362 sv_catpvs(linestr, /*{*/";}");
1365 sv_catpvs(linestr, ";");
1368 buf = SvPVX(linestr);
1369 new_bufend_pos = SvCUR(linestr);
1370 PL_parser->bufend = buf + new_bufend_pos;
1371 PL_parser->bufptr = buf + bufptr_pos;
1372 PL_parser->oldbufptr = buf + oldbufptr_pos;
1373 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1374 PL_parser->linestart = buf + linestart_pos;
1375 if (PL_parser->last_uni)
1376 PL_parser->last_uni = buf + last_uni_pos;
1377 if (PL_parser->last_lop)
1378 PL_parser->last_lop = buf + last_lop_pos;
1379 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1380 PL_curstash != PL_debstash) {
1381 /* debugger active and we're not compiling the debugger code,
1382 * so store the line into the debugger's array of lines
1384 update_debugger_info(NULL, buf+old_bufend_pos,
1385 new_bufend_pos-old_bufend_pos);
1391 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1393 Looks ahead one (Unicode) character in the text currently being lexed.
1394 Returns the codepoint (unsigned integer value) of the next character,
1395 or -1 if lexing has reached the end of the input text. To consume the
1396 peeked character, use L</lex_read_unichar>.
1398 If the next character is in (or extends into) the next chunk of input
1399 text, the next chunk will be read in. Normally the current chunk will be
1400 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1401 then the current chunk will not be discarded.
1403 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1404 is encountered, an exception is generated.
1410 Perl_lex_peek_unichar(pTHX_ U32 flags)
1414 if (flags & ~(LEX_KEEP_PREVIOUS))
1415 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1416 s = PL_parser->bufptr;
1417 bufend = PL_parser->bufend;
1423 if (!lex_next_chunk(flags))
1425 s = PL_parser->bufptr;
1426 bufend = PL_parser->bufend;
1429 if (UTF8_IS_INVARIANT(head))
1431 if (UTF8_IS_START(head)) {
1432 len = UTF8SKIP(&head);
1433 while ((STRLEN)(bufend-s) < len) {
1434 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1436 s = PL_parser->bufptr;
1437 bufend = PL_parser->bufend;
1440 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1441 if (retlen == (STRLEN)-1) {
1442 /* malformed UTF-8 */
1444 SAVESPTR(PL_warnhook);
1445 PL_warnhook = PERL_WARNHOOK_FATAL;
1446 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1452 if (!lex_next_chunk(flags))
1454 s = PL_parser->bufptr;
1461 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1463 Reads the next (Unicode) character in the text currently being lexed.
1464 Returns the codepoint (unsigned integer value) of the character read,
1465 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1466 if lexing has reached the end of the input text. To non-destructively
1467 examine the next character, use L</lex_peek_unichar> instead.
1469 If the next character is in (or extends into) the next chunk of input
1470 text, the next chunk will be read in. Normally the current chunk will be
1471 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1472 then the current chunk will not be discarded.
1474 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1475 is encountered, an exception is generated.
1481 Perl_lex_read_unichar(pTHX_ U32 flags)
1484 if (flags & ~(LEX_KEEP_PREVIOUS))
1485 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1486 c = lex_peek_unichar(flags);
1489 COPLINE_INC_WITH_HERELINES;
1491 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1493 ++(PL_parser->bufptr);
1499 =for apidoc Amx|void|lex_read_space|U32 flags
1501 Reads optional spaces, in Perl style, in the text currently being
1502 lexed. The spaces may include ordinary whitespace characters and
1503 Perl-style comments. C<#line> directives are processed if encountered.
1504 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1505 at a non-space character (or the end of the input text).
1507 If spaces extend into the next chunk of input text, the next chunk will
1508 be read in. Normally the current chunk will be discarded at the same
1509 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1510 chunk will not be discarded.
1515 #define LEX_NO_INCLINE 0x40000000
1516 #define LEX_NO_NEXT_CHUNK 0x80000000
1519 Perl_lex_read_space(pTHX_ U32 flags)
1522 const bool can_incline = !(flags & LEX_NO_INCLINE);
1523 bool need_incline = 0;
1524 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1525 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1528 sv_free(PL_skipwhite);
1529 PL_skipwhite = NULL;
1532 PL_skipwhite = newSVpvs("");
1533 #endif /* PERL_MAD */
1534 s = PL_parser->bufptr;
1535 bufend = PL_parser->bufend;
1541 } while (!(c == '\n' || (c == 0 && s == bufend)));
1542 } else if (c == '\n') {
1545 PL_parser->linestart = s;
1551 } else if (isSPACE(c)) {
1553 } else if (c == 0 && s == bufend) {
1557 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1558 #endif /* PERL_MAD */
1559 if (flags & LEX_NO_NEXT_CHUNK)
1561 PL_parser->bufptr = s;
1562 if (can_incline) COPLINE_INC_WITH_HERELINES;
1563 got_more = lex_next_chunk(flags);
1564 if (can_incline) CopLINE_dec(PL_curcop);
1565 s = PL_parser->bufptr;
1566 bufend = PL_parser->bufend;
1569 if (can_incline && need_incline && PL_parser->rsfp) {
1579 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1580 #endif /* PERL_MAD */
1581 PL_parser->bufptr = s;
1586 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1588 This function performs syntax checking on a prototype, C<proto>.
1589 If C<warn> is true, any illegal characters or mismatched brackets
1590 will trigger illegalproto warnings, declaring that they were
1591 detected in the prototype for C<name>.
1593 The return value is C<true> if this is a valid prototype, and
1594 C<false> if it is not, regardless of whether C<warn> was C<true> or
1597 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1604 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
1606 STRLEN len, origlen;
1607 char *p = proto ? SvPV(proto, len) : NULL;
1608 bool bad_proto = FALSE;
1609 bool in_brackets = FALSE;
1610 bool after_slash = FALSE;
1611 char greedy_proto = ' ';
1612 bool proto_after_greedy_proto = FALSE;
1613 bool must_be_last = FALSE;
1614 bool underscore = FALSE;
1615 bool bad_proto_after_underscore = FALSE;
1617 PERL_ARGS_ASSERT_VALIDATE_PROTO;
1623 for (; len--; p++) {
1626 proto_after_greedy_proto = TRUE;
1628 if (!strchr(";@%", *p))
1629 bad_proto_after_underscore = TRUE;
1632 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1639 in_brackets = FALSE;
1640 else if ((*p == '@' || *p == '%') &&
1643 must_be_last = TRUE;
1652 after_slash = FALSE;
1657 SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1660 ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1661 origlen, UNI_DISPLAY_ISPRINT)
1662 : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1664 if (proto_after_greedy_proto)
1665 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1666 "Prototype after '%c' for %"SVf" : %s",
1667 greedy_proto, SVfARG(name), p);
1669 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1670 "Missing ']' in prototype for %"SVf" : %s",
1673 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1674 "Illegal character in prototype for %"SVf" : %s",
1676 if (bad_proto_after_underscore)
1677 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1678 "Illegal character after '_' in prototype for %"SVf" : %s",
1682 return (! (proto_after_greedy_proto || bad_proto) );
1687 * This subroutine has nothing to do with tilting, whether at windmills
1688 * or pinball tables. Its name is short for "increment line". It
1689 * increments the current line number in CopLINE(PL_curcop) and checks
1690 * to see whether the line starts with a comment of the form
1691 * # line 500 "foo.pm"
1692 * If so, it sets the current line number and file to the values in the comment.
1696 S_incline(pTHX_ const char *s)
1704 PERL_ARGS_ASSERT_INCLINE;
1706 COPLINE_INC_WITH_HERELINES;
1707 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1708 && s+1 == PL_bufend && *s == ';') {
1709 /* fake newline in string eval */
1710 CopLINE_dec(PL_curcop);
1715 while (SPACE_OR_TAB(*s))
1717 if (strnEQ(s, "line", 4))
1721 if (SPACE_OR_TAB(*s))
1725 while (SPACE_OR_TAB(*s))
1733 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1735 while (SPACE_OR_TAB(*s))
1737 if (*s == '"' && (t = strchr(s+1, '"'))) {
1743 while (!isSPACE(*t))
1747 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1749 if (*e != '\n' && *e != '\0')
1750 return; /* false alarm */
1752 line_num = atoi(n)-1;
1755 const STRLEN len = t - s;
1756 SV *const temp_sv = CopFILESV(PL_curcop);
1761 cf = SvPVX(temp_sv);
1762 tmplen = SvCUR(temp_sv);
1768 if (!PL_rsfp && !PL_parser->filtered) {
1769 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1770 * to *{"::_<newfilename"} */
1771 /* However, the long form of evals is only turned on by the
1772 debugger - usually they're "(eval %lu)" */
1776 STRLEN tmplen2 = len;
1777 if (tmplen + 2 <= sizeof smallbuf)
1780 Newx(tmpbuf, tmplen + 2, char);
1783 memcpy(tmpbuf + 2, cf, tmplen);
1785 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1790 if (tmplen2 + 2 <= sizeof smallbuf)
1793 Newx(tmpbuf2, tmplen2 + 2, char);
1795 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1796 /* Either they malloc'd it, or we malloc'd it,
1797 so no prefix is present in ours. */
1802 memcpy(tmpbuf2 + 2, s, tmplen2);
1805 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1807 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1808 /* adjust ${"::_<newfilename"} to store the new file name */
1809 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1810 /* The line number may differ. If that is the case,
1811 alias the saved lines that are in the array.
1812 Otherwise alias the whole array. */
1813 if (CopLINE(PL_curcop) == line_num) {
1814 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1815 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1817 else if (GvAV(*gvp)) {
1818 AV * const av = GvAV(*gvp);
1819 const I32 start = CopLINE(PL_curcop)+1;
1820 I32 items = AvFILLp(av) - start;
1822 AV * const av2 = GvAVn(gv2);
1823 SV **svp = AvARRAY(av) + start;
1824 I32 l = (I32)line_num+1;
1826 av_store(av2, l++, SvREFCNT_inc(*svp++));
1831 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1833 if (tmpbuf != smallbuf) Safefree(tmpbuf);
1835 CopFILE_free(PL_curcop);
1836 CopFILE_setn(PL_curcop, s, len);
1838 CopLINE_set(PL_curcop, line_num);
1841 #define skipspace(s) skipspace_flags(s, 0)
1844 /* skip space before PL_thistoken */
1847 S_skipspace0(pTHX_ char *s)
1849 PERL_ARGS_ASSERT_SKIPSPACE0;
1856 PL_thiswhite = newSVpvs("");
1857 sv_catsv(PL_thiswhite, PL_skipwhite);
1858 sv_free(PL_skipwhite);
1861 PL_realtokenstart = s - SvPVX(PL_linestr);
1865 /* skip space after PL_thistoken */
1868 S_skipspace1(pTHX_ char *s)
1870 const char *start = s;
1871 I32 startoff = start - SvPVX(PL_linestr);
1873 PERL_ARGS_ASSERT_SKIPSPACE1;
1878 start = SvPVX(PL_linestr) + startoff;
1879 if (!PL_thistoken && PL_realtokenstart >= 0) {
1880 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1881 PL_thistoken = newSVpvn(tstart, start - tstart);
1883 PL_realtokenstart = -1;
1886 PL_nextwhite = newSVpvs("");
1887 sv_catsv(PL_nextwhite, PL_skipwhite);
1888 sv_free(PL_skipwhite);
1895 S_skipspace2(pTHX_ char *s, SV **svp)
1898 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1899 const I32 startoff = s - SvPVX(PL_linestr);
1901 PERL_ARGS_ASSERT_SKIPSPACE2;
1904 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1905 if (!PL_madskills || !svp)
1907 start = SvPVX(PL_linestr) + startoff;
1908 if (!PL_thistoken && PL_realtokenstart >= 0) {
1909 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1910 PL_thistoken = newSVpvn(tstart, start - tstart);
1911 PL_realtokenstart = -1;
1915 *svp = newSVpvs("");
1916 sv_setsv(*svp, PL_skipwhite);
1917 sv_free(PL_skipwhite);
1926 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1928 AV *av = CopFILEAVx(PL_curcop);
1930 SV * const sv = newSV_type(SVt_PVMG);
1932 sv_setsv_flags(sv, orig_sv, 0); /* no cow */
1934 sv_setpvn(sv, buf, len);
1937 av_store(av, (I32)CopLINE(PL_curcop), sv);
1943 * Called to gobble the appropriate amount and type of whitespace.
1944 * Skips comments as well.
1948 S_skipspace_flags(pTHX_ char *s, U32 flags)
1952 #endif /* PERL_MAD */
1953 PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1956 sv_free(PL_skipwhite);
1957 PL_skipwhite = NULL;
1959 #endif /* PERL_MAD */
1960 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1961 while (s < PL_bufend && SPACE_OR_TAB(*s))
1964 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1966 lex_read_space(flags | LEX_KEEP_PREVIOUS |
1967 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1968 LEX_NO_NEXT_CHUNK : 0));
1970 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1971 if (PL_linestart > PL_bufptr)
1972 PL_bufptr = PL_linestart;
1977 PL_skipwhite = newSVpvn(start, s-start);
1978 #endif /* PERL_MAD */
1984 * Check the unary operators to ensure there's no ambiguity in how they're
1985 * used. An ambiguous piece of code would be:
1987 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1988 * the +5 is its argument.
1998 if (PL_oldoldbufptr != PL_last_uni)
2000 while (isSPACE(*PL_last_uni))
2003 while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
2005 if ((t = strchr(s, '(')) && t < PL_bufptr)
2008 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
2009 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
2010 (int)(s - PL_last_uni), PL_last_uni);
2014 * LOP : macro to build a list operator. Its behaviour has been replaced
2015 * with a subroutine, S_lop() for which LOP is just another name.
2018 #define LOP(f,x) return lop(f,x,s)
2022 * Build a list operator (or something that might be one). The rules:
2023 * - if we have a next token, then it's a list operator [why?]
2024 * - if the next thing is an opening paren, then it's a function
2025 * - else it's a list operator
2029 S_lop(pTHX_ I32 f, int x, char *s)
2033 PERL_ARGS_ASSERT_LOP;
2039 PL_last_lop = PL_oldbufptr;
2040 PL_last_lop_op = (OPCODE)f;
2049 return REPORT(FUNC);
2052 return REPORT(FUNC);
2055 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
2056 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2057 return REPORT(LSTOP);
2064 * Sets up for an eventual force_next(). start_force(0) basically does
2065 * an unshift, while start_force(-1) does a push. yylex removes items
2070 S_start_force(pTHX_ int where)
2074 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
2075 where = PL_lasttoke;
2076 assert(PL_curforce < 0 || PL_curforce == where);
2077 if (PL_curforce != where) {
2078 for (i = PL_lasttoke; i > where; --i) {
2079 PL_nexttoke[i] = PL_nexttoke[i-1];
2083 if (PL_curforce < 0) /* in case of duplicate start_force() */
2084 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
2085 PL_curforce = where;
2088 curmad('^', newSVpvs(""));
2089 CURMAD('_', PL_nextwhite);
2094 S_curmad(pTHX_ char slot, SV *sv)
2100 if (PL_curforce < 0)
2101 where = &PL_thismad;
2103 where = &PL_nexttoke[PL_curforce].next_mad;
2109 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
2111 else if (PL_encoding) {
2112 sv_recode_to_utf8(sv, PL_encoding);
2117 /* keep a slot open for the head of the list? */
2118 if (slot != '_' && *where && (*where)->mad_key == '^') {
2119 (*where)->mad_key = slot;
2120 sv_free(MUTABLE_SV(((*where)->mad_val)));
2121 (*where)->mad_val = (void*)sv;
2124 addmad(newMADsv(slot, sv), where, 0);
2127 # define start_force(where) NOOP
2128 # define curmad(slot, sv) NOOP
2133 * When the lexer realizes it knows the next token (for instance,
2134 * it is reordering tokens for the parser) then it can call S_force_next
2135 * to know what token to return the next time the lexer is called. Caller
2136 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
2137 * and possibly PL_expect to ensure the lexer handles the token correctly.
2141 S_force_next(pTHX_ I32 type)
2146 PerlIO_printf(Perl_debug_log, "### forced token:\n");
2147 tokereport(type, &NEXTVAL_NEXTTOKE);
2151 if (PL_curforce < 0)
2152 start_force(PL_lasttoke);
2153 PL_nexttoke[PL_curforce].next_type = type;
2154 if (PL_lex_state != LEX_KNOWNEXT)
2155 PL_lex_defer = PL_lex_state;
2156 PL_lex_state = LEX_KNOWNEXT;
2157 PL_lex_expect = PL_expect;
2160 PL_nexttype[PL_nexttoke] = type;
2162 if (PL_lex_state != LEX_KNOWNEXT) {
2163 PL_lex_defer = PL_lex_state;
2164 PL_lex_expect = PL_expect;
2165 PL_lex_state = LEX_KNOWNEXT;
2173 int yyc = PL_parser->yychar;
2174 if (yyc != YYEMPTY) {
2177 NEXTVAL_NEXTTOKE = PL_parser->yylval;
2178 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2179 PL_lex_allbrackets--;
2181 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2182 } else if (yyc == '('/*)*/) {
2183 PL_lex_allbrackets--;
2188 PL_parser->yychar = YYEMPTY;
2193 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2196 SV * const sv = newSVpvn_utf8(start, len,
2199 && !is_ascii_string((const U8*)start, len)
2200 && is_utf8_string((const U8*)start, len));
2206 * When the lexer knows the next thing is a word (for instance, it has
2207 * just seen -> and it knows that the next char is a word char, then
2208 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2212 * char *start : buffer position (must be within PL_linestr)
2213 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2214 * int check_keyword : if true, Perl checks to make sure the word isn't
2215 * a keyword (do this if the word is a label, e.g. goto FOO)
2216 * int allow_pack : if true, : characters will also be allowed (require,
2217 * use, etc. do this)
2218 * int allow_initial_tick : used by the "sub" lexer only.
2222 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2228 PERL_ARGS_ASSERT_FORCE_WORD;
2230 start = SKIPSPACE1(start);
2232 if (isIDFIRST_lazy_if(s,UTF) ||
2233 (allow_pack && *s == ':') )
2235 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2236 if (check_keyword) {
2237 char *s2 = PL_tokenbuf;
2238 if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
2240 if (keyword(s2, len, 0))
2243 start_force(PL_curforce);
2245 curmad('X', newSVpvn(start,s-start));
2246 if (token == METHOD) {
2251 PL_expect = XOPERATOR;
2255 curmad('g', newSVpvs( "forced" ));
2256 NEXTVAL_NEXTTOKE.opval
2257 = (OP*)newSVOP(OP_CONST,0,
2258 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2259 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2267 * Called when the lexer wants $foo *foo &foo etc, but the program
2268 * text only contains the "foo" portion. The first argument is a pointer
2269 * to the "foo", and the second argument is the type symbol to prefix.
2270 * Forces the next token to be a "WORD".
2271 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2275 S_force_ident(pTHX_ const char *s, int kind)
2279 PERL_ARGS_ASSERT_FORCE_IDENT;
2282 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2283 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2284 UTF ? SVf_UTF8 : 0));
2285 start_force(PL_curforce);
2286 NEXTVAL_NEXTTOKE.opval = o;
2289 o->op_private = OPpCONST_ENTERED;
2290 /* XXX see note in pp_entereval() for why we forgo typo
2291 warnings if the symbol must be introduced in an eval.
2293 gv_fetchpvn_flags(s, len,
2294 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2295 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2296 kind == '$' ? SVt_PV :
2297 kind == '@' ? SVt_PVAV :
2298 kind == '%' ? SVt_PVHV :
2306 S_force_ident_maybe_lex(pTHX_ char pit)
2308 start_force(PL_curforce);
2309 NEXTVAL_NEXTTOKE.ival = pit;
2314 Perl_str_to_version(pTHX_ SV *sv)
2319 const char *start = SvPV_const(sv,len);
2320 const char * const end = start + len;
2321 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2323 PERL_ARGS_ASSERT_STR_TO_VERSION;
2325 while (start < end) {
2329 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2334 retval += ((NV)n)/nshift;
2343 * Forces the next token to be a version number.
2344 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2345 * and if "guessing" is TRUE, then no new token is created (and the caller
2346 * must use an alternative parsing method).
2350 S_force_version(pTHX_ char *s, int guessing)
2356 I32 startoff = s - SvPVX(PL_linestr);
2359 PERL_ARGS_ASSERT_FORCE_VERSION;
2367 while (isDIGIT(*d) || *d == '_' || *d == '.')
2371 start_force(PL_curforce);
2372 curmad('X', newSVpvn(s,d-s));
2375 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2377 #ifdef USE_LOCALE_NUMERIC
2378 char *loc = savepv(setlocale(LC_NUMERIC, NULL));
2379 setlocale(LC_NUMERIC, "C");
2381 s = scan_num(s, &pl_yylval);
2382 #ifdef USE_LOCALE_NUMERIC
2383 setlocale(LC_NUMERIC, loc);
2386 version = pl_yylval.opval;
2387 ver = cSVOPx(version)->op_sv;
2388 if (SvPOK(ver) && !SvNIOK(ver)) {
2389 SvUPGRADE(ver, SVt_PVNV);
2390 SvNV_set(ver, str_to_version(ver));
2391 SvNOK_on(ver); /* hint that it is a version */
2394 else if (guessing) {
2397 sv_free(PL_nextwhite); /* let next token collect whitespace */
2399 s = SvPVX(PL_linestr) + startoff;
2407 if (PL_madskills && !version) {
2408 sv_free(PL_nextwhite); /* let next token collect whitespace */
2410 s = SvPVX(PL_linestr) + startoff;
2413 /* NOTE: The parser sees the package name and the VERSION swapped */
2414 start_force(PL_curforce);
2415 NEXTVAL_NEXTTOKE.opval = version;
2422 * S_force_strict_version
2423 * Forces the next token to be a version number using strict syntax rules.
2427 S_force_strict_version(pTHX_ char *s)
2432 I32 startoff = s - SvPVX(PL_linestr);
2434 const char *errstr = NULL;
2436 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2438 while (isSPACE(*s)) /* leading whitespace */
2441 if (is_STRICT_VERSION(s,&errstr)) {
2443 s = (char *)scan_version(s, ver, 0);
2444 version = newSVOP(OP_CONST, 0, ver);
2446 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2447 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2451 yyerror(errstr); /* version required */
2456 if (PL_madskills && !version) {
2457 sv_free(PL_nextwhite); /* let next token collect whitespace */
2459 s = SvPVX(PL_linestr) + startoff;
2462 /* NOTE: The parser sees the package name and the VERSION swapped */
2463 start_force(PL_curforce);
2464 NEXTVAL_NEXTTOKE.opval = version;
2472 * Tokenize a quoted string passed in as an SV. It finds the next
2473 * chunk, up to end of string or a backslash. It may make a new
2474 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2479 S_tokeq(pTHX_ SV *sv)
2488 PERL_ARGS_ASSERT_TOKEQ;
2493 s = SvPV_force(sv, len);
2494 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2497 /* This is relying on the SV being "well formed" with a trailing '\0' */
2498 while (s < send && !(*s == '\\' && s[1] == '\\'))
2503 if ( PL_hints & HINT_NEW_STRING ) {
2504 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2508 if (s + 1 < send && (s[1] == '\\'))
2509 s++; /* all that, just for this */
2514 SvCUR_set(sv, d - SvPVX_const(sv));
2516 if ( PL_hints & HINT_NEW_STRING )
2517 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2522 * Now come three functions related to double-quote context,
2523 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2524 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2525 * interact with PL_lex_state, and create fake ( ... ) argument lists
2526 * to handle functions and concatenation.
2530 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
2535 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2537 * Pattern matching will set PL_lex_op to the pattern-matching op to
2538 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2540 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2542 * Everything else becomes a FUNC.
2544 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2545 * had an OP_CONST or OP_READLINE). This just sets us up for a
2546 * call to S_sublex_push().
2550 S_sublex_start(pTHX)
2553 const I32 op_type = pl_yylval.ival;
2555 if (op_type == OP_NULL) {
2556 pl_yylval.opval = PL_lex_op;
2560 if (op_type == OP_CONST || op_type == OP_READLINE) {
2561 SV *sv = tokeq(PL_lex_stuff);
2563 if (SvTYPE(sv) == SVt_PVIV) {
2564 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2566 const char * const p = SvPV_const(sv, len);
2567 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2571 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2572 PL_lex_stuff = NULL;
2573 /* Allow <FH> // "foo" */
2574 if (op_type == OP_READLINE)
2575 PL_expect = XTERMORDORDOR;
2578 else if (op_type == OP_BACKTICK && PL_lex_op) {
2579 /* readpipe() vas overriden */
2580 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2581 pl_yylval.opval = PL_lex_op;
2583 PL_lex_stuff = NULL;
2587 PL_sublex_info.super_state = PL_lex_state;
2588 PL_sublex_info.sub_inwhat = (U16)op_type;
2589 PL_sublex_info.sub_op = PL_lex_op;
2590 PL_lex_state = LEX_INTERPPUSH;
2594 pl_yylval.opval = PL_lex_op;
2604 * Create a new scope to save the lexing state. The scope will be
2605 * ended in S_sublex_done. Returns a '(', starting the function arguments
2606 * to the uc, lc, etc. found before.
2607 * Sets PL_lex_state to LEX_INTERPCONCAT.
2617 PL_lex_state = PL_sublex_info.super_state;
2618 SAVEBOOL(PL_lex_dojoin);
2619 SAVEI32(PL_lex_brackets);
2620 SAVEI32(PL_lex_allbrackets);
2621 SAVEI32(PL_lex_formbrack);
2622 SAVEI8(PL_lex_fakeeof);
2623 SAVEI32(PL_lex_casemods);
2624 SAVEI32(PL_lex_starts);
2625 SAVEI8(PL_lex_state);
2626 SAVESPTR(PL_lex_repl);
2627 SAVEVPTR(PL_lex_inpat);
2628 SAVEI16(PL_lex_inwhat);
2629 SAVECOPLINE(PL_curcop);
2630 SAVEPPTR(PL_bufptr);
2631 SAVEPPTR(PL_bufend);
2632 SAVEPPTR(PL_oldbufptr);
2633 SAVEPPTR(PL_oldoldbufptr);
2634 SAVEPPTR(PL_last_lop);
2635 SAVEPPTR(PL_last_uni);
2636 SAVEPPTR(PL_linestart);
2637 SAVESPTR(PL_linestr);
2638 SAVEGENERICPV(PL_lex_brackstack);
2639 SAVEGENERICPV(PL_lex_casestack);
2640 SAVEGENERICPV(PL_parser->lex_shared);
2641 SAVEBOOL(PL_parser->lex_re_reparsing);
2643 /* The here-doc parser needs to be able to peek into outer lexing
2644 scopes to find the body of the here-doc. So we put PL_linestr and
2645 PL_bufptr into lex_shared, to ‘share’ those values.
2647 PL_parser->lex_shared->ls_linestr = PL_linestr;
2648 PL_parser->lex_shared->ls_bufptr = PL_bufptr;
2650 PL_linestr = PL_lex_stuff;
2651 PL_lex_repl = PL_sublex_info.repl;
2652 PL_lex_stuff = NULL;
2653 PL_sublex_info.repl = NULL;
2655 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2656 = SvPVX(PL_linestr);
2657 PL_bufend += SvCUR(PL_linestr);
2658 PL_last_lop = PL_last_uni = NULL;
2659 SAVEFREESV(PL_linestr);
2660 if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2662 PL_lex_dojoin = FALSE;
2663 PL_lex_brackets = PL_lex_formbrack = 0;
2664 PL_lex_allbrackets = 0;
2665 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2666 Newx(PL_lex_brackstack, 120, char);
2667 Newx(PL_lex_casestack, 12, char);
2668 PL_lex_casemods = 0;
2669 *PL_lex_casestack = '\0';
2671 PL_lex_state = LEX_INTERPCONCAT;
2672 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2674 Newxz(shared, 1, LEXSHARED);
2675 shared->ls_prev = PL_parser->lex_shared;
2676 PL_parser->lex_shared = shared;
2678 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2679 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2680 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2681 PL_lex_inpat = PL_sublex_info.sub_op;
2683 PL_lex_inpat = NULL;
2685 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2686 PL_in_eval &= ~EVAL_RE_REPARSING;
2693 * Restores lexer state after a S_sublex_push.
2700 if (!PL_lex_starts++) {
2701 SV * const sv = newSVpvs("");
2702 if (SvUTF8(PL_linestr))
2704 PL_expect = XOPERATOR;
2705 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2709 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2710 PL_lex_state = LEX_INTERPCASEMOD;
2714 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2715 assert(PL_lex_inwhat != OP_TRANSR);
2716 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2717 PL_linestr = PL_lex_repl;
2719 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2720 PL_bufend += SvCUR(PL_linestr);
2721 PL_last_lop = PL_last_uni = NULL;
2722 PL_lex_dojoin = FALSE;
2723 PL_lex_brackets = 0;
2724 PL_lex_allbrackets = 0;
2725 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2726 PL_lex_casemods = 0;
2727 *PL_lex_casestack = '\0';
2729 if (SvEVALED(PL_lex_repl)) {
2730 PL_lex_state = LEX_INTERPNORMAL;
2732 /* we don't clear PL_lex_repl here, so that we can check later
2733 whether this is an evalled subst; that means we rely on the
2734 logic to ensure sublex_done() is called again only via the
2735 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2738 PL_lex_state = LEX_INTERPCONCAT;
2748 PL_endwhite = newSVpvs("");
2749 sv_catsv(PL_endwhite, PL_thiswhite);
2753 sv_setpvs(PL_thistoken,"");
2755 PL_realtokenstart = -1;
2759 PL_bufend = SvPVX(PL_linestr);
2760 PL_bufend += SvCUR(PL_linestr);
2761 PL_expect = XOPERATOR;
2762 PL_sublex_info.sub_inwhat = 0;
2767 PERL_STATIC_INLINE SV*
2768 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2770 /* <s> points to first character of interior of \N{}, <e> to one beyond the
2771 * interior, hence to the "}". Finds what the name resolves to, returning
2772 * an SV* containing it; NULL if no valid one found */
2774 SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2781 const U8* first_bad_char_loc;
2782 const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2784 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2786 if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2788 &first_bad_char_loc))
2790 /* If warnings are on, this will print a more detailed analysis of what
2791 * is wrong than the error message below */
2792 utf8n_to_uvuni(first_bad_char_loc,
2793 e - ((char *) first_bad_char_loc),
2796 /* We deliberately don't try to print the malformed character, which
2797 * might not print very well; it also may be just the first of many
2798 * malformations, so don't print what comes after it */
2799 yyerror(Perl_form(aTHX_
2800 "Malformed UTF-8 character immediately after '%.*s'",
2801 (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
2805 res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2806 /* include the <}> */
2807 e - backslash_ptr + 1);
2809 SvREFCNT_dec_NN(res);
2813 /* See if the charnames handler is the Perl core's, and if so, we can skip
2814 * the validation needed for a user-supplied one, as Perl's does its own
2816 table = GvHV(PL_hintgv); /* ^H */
2817 cvp = hv_fetchs(table, "charnames", FALSE);
2818 if (cvp && (cv = *cvp) && SvROK(cv) && ((rv = SvRV(cv)) != NULL)
2819 && SvTYPE(rv) == SVt_PVCV && ((stash = CvSTASH(rv)) != NULL))
2821 const char * const name = HvNAME(stash);
2822 if strEQ(name, "_charnames") {
2827 /* Here, it isn't Perl's charname handler. We can't rely on a
2828 * user-supplied handler to validate the input name. For non-ut8 input,
2829 * look to see that the first character is legal. Then loop through the
2830 * rest checking that each is a continuation */
2832 /* This code needs to be sync'ed with a regex in _charnames.pm which does
2836 if (! isALPHAU(*s)) {
2841 if (! isCHARNAME_CONT(*s)) {
2844 if (*s == ' ' && *(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2845 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2846 "A sequence of multiple spaces in a charnames "
2847 "alias definition is deprecated");
2851 if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2852 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2853 "Trailing white-space in a charnames alias "
2854 "definition is deprecated");
2858 /* Similarly for utf8. For invariants can check directly; for other
2859 * Latin1, can calculate their code point and check; otherwise use a
2861 if (UTF8_IS_INVARIANT(*s)) {
2862 if (! isALPHAU(*s)) {
2866 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2867 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s, *(s+1))))) {
2873 if (! PL_utf8_charname_begin) {
2874 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2875 PL_utf8_charname_begin = _core_swash_init("utf8",
2876 "_Perl_Charname_Begin",
2878 1, 0, NULL, &flags);
2880 if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2887 if (UTF8_IS_INVARIANT(*s)) {
2888 if (! isCHARNAME_CONT(*s)) {
2891 if (*s == ' ' && *(s-1) == ' '
2892 && ckWARN_d(WARN_DEPRECATED)) {
2893 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2894 "A sequence of multiple spaces in a charnam"
2895 "es alias definition is deprecated");
2899 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2900 if (! isCHARNAME_CONT(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s,
2908 if (! PL_utf8_charname_continue) {
2909 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2910 PL_utf8_charname_continue = _core_swash_init("utf8",
2911 "_Perl_Charname_Continue",
2913 1, 0, NULL, &flags);
2915 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2921 if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2922 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2923 "Trailing white-space in a charnames alias "
2924 "definition is deprecated");
2928 if (SvUTF8(res)) { /* Don't accept malformed input */
2929 const U8* first_bad_char_loc;
2931 const char* const str = SvPV_const(res, len);
2932 if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2933 /* If warnings are on, this will print a more detailed analysis of
2934 * what is wrong than the error message below */
2935 utf8n_to_uvuni(first_bad_char_loc,
2936 (char *) first_bad_char_loc - str,
2939 /* We deliberately don't try to print the malformed character,
2940 * which might not print very well; it also may be just the first
2941 * of many malformations, so don't print what comes after it */
2944 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2945 (int) (e - backslash_ptr + 1), backslash_ptr,
2946 (int) ((char *) first_bad_char_loc - str), str
2956 int bad_char_size = ((UTF) ? UTF8SKIP(s) : 1);
2958 /* The final %.*s makes sure that should the trailing NUL be missing
2959 * that this print won't run off the end of the string */
2962 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2963 (int)(s - backslash_ptr + bad_char_size), backslash_ptr,
2964 (int)(e - s + bad_char_size), s + bad_char_size
2966 UTF ? SVf_UTF8 : 0);
2974 Extracts the next constant part of a pattern, double-quoted string,
2975 or transliteration. This is terrifying code.
2977 For example, in parsing the double-quoted string "ab\x63$d", it would
2978 stop at the '$' and return an OP_CONST containing 'abc'.
2980 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2981 processing a pattern (PL_lex_inpat is true), a transliteration
2982 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2984 Returns a pointer to the character scanned up to. If this is
2985 advanced from the start pointer supplied (i.e. if anything was
2986 successfully parsed), will leave an OP_CONST for the substring scanned
2987 in pl_yylval. Caller must intuit reason for not parsing further
2988 by looking at the next characters herself.
2992 \N{FOO} => \N{U+hex_for_character_FOO}
2993 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2996 all other \-char, including \N and \N{ apart from \N{ABC}
2999 @ and $ where it appears to be a var, but not for $ as tail anchor
3004 In transliterations:
3005 characters are VERY literal, except for - not at the start or end
3006 of the string, which indicates a range. If the range is in bytes,
3007 scan_const expands the range to the full set of intermediate
3008 characters. If the range is in utf8, the hyphen is replaced with
3009 a certain range mark which will be handled by pmtrans() in op.c.
3011 In double-quoted strings:
3013 double-quoted style: \r and \n
3014 constants: \x31, etc.
3015 deprecated backrefs: \1 (in substitution replacements)
3016 case and quoting: \U \Q \E
3019 scan_const does *not* construct ops to handle interpolated strings.
3020 It stops processing as soon as it finds an embedded $ or @ variable
3021 and leaves it to the caller to work out what's going on.
3023 embedded arrays (whether in pattern or not) could be:
3024 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
3026 $ in double-quoted strings must be the symbol of an embedded scalar.
3028 $ in pattern could be $foo or could be tail anchor. Assumption:
3029 it's a tail anchor if $ is the last thing in the string, or if it's
3030 followed by one of "()| \r\n\t"
3032 \1 (backreferences) are turned into $1 in substitutions
3034 The structure of the code is
3035 while (there's a character to process) {
3036 handle transliteration ranges
3037 skip regexp comments /(?#comment)/ and codes /(?{code})/
3038 skip #-initiated comments in //x patterns
3039 check for embedded arrays
3040 check for embedded scalars
3042 deprecate \1 in substitution replacements
3043 handle string-changing backslashes \l \U \Q \E, etc.
3044 switch (what was escaped) {
3045 handle \- in a transliteration (becomes a literal -)
3046 if a pattern and not \N{, go treat as regular character
3047 handle \132 (octal characters)
3048 handle \x15 and \x{1234} (hex characters)
3049 handle \N{name} (named characters, also \N{3,5} in a pattern)
3050 handle \cV (control characters)
3051 handle printf-style backslashes (\f, \r, \n, etc)
3054 } (end if backslash)
3055 handle regular character
3056 } (end while character to read)
3061 S_scan_const(pTHX_ char *start)
3064 char *send = PL_bufend; /* end of the constant */
3065 SV *sv = newSV(send - start); /* sv for the constant. See
3066 note below on sizing. */
3067 char *s = start; /* start of the constant */
3068 char *d = SvPVX(sv); /* destination for copies */
3069 bool dorange = FALSE; /* are we in a translit range? */
3070 bool didrange = FALSE; /* did we just finish a range? */
3071 bool in_charclass = FALSE; /* within /[...]/ */
3072 bool has_utf8 = FALSE; /* Output constant is UTF8 */
3073 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
3074 to be UTF8? But, this can
3075 show as true when the source
3076 isn't utf8, as for example
3077 when it is entirely composed
3079 SV *res; /* result from charnames */
3081 /* Note on sizing: The scanned constant is placed into sv, which is
3082 * initialized by newSV() assuming one byte of output for every byte of
3083 * input. This routine expects newSV() to allocate an extra byte for a
3084 * trailing NUL, which this routine will append if it gets to the end of
3085 * the input. There may be more bytes of input than output (eg., \N{LATIN
3086 * CAPITAL LETTER A}), or more output than input if the constant ends up
3087 * recoded to utf8, but each time a construct is found that might increase
3088 * the needed size, SvGROW() is called. Its size parameter each time is
3089 * based on the best guess estimate at the time, namely the length used so
3090 * far, plus the length the current construct will occupy, plus room for
3091 * the trailing NUL, plus one byte for every input byte still unscanned */
3093 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3096 UV literal_endpoint = 0;
3097 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
3100 PERL_ARGS_ASSERT_SCAN_CONST;
3102 assert(PL_lex_inwhat != OP_TRANSR);
3103 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3104 /* If we are doing a trans and we know we want UTF8 set expectation */
3105 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
3106 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3109 /* Protect sv from errors and fatal warnings. */
3110 ENTER_with_name("scan_const");
3113 while (s < send || dorange) {
3115 /* get transliterations out of the way (they're most literal) */
3116 if (PL_lex_inwhat == OP_TRANS) {
3117 /* expand a range A-Z to the full set of characters. AIE! */
3119 I32 i; /* current expanded character */
3120 I32 min; /* first character in range */
3121 I32 max; /* last character in range */
3132 char * const c = (char*)utf8_hop((U8*)d, -1);
3136 *c = (char)UTF_TO_NATIVE(0xff);
3137 /* mark the range as done, and continue */
3143 i = d - SvPVX_const(sv); /* remember current offset */
3146 SvLEN(sv) + (has_utf8 ?
3147 (512 - UTF_CONTINUATION_MARK +
3150 /* How many two-byte within 0..255: 128 in UTF-8,
3151 * 96 in UTF-8-mod. */
3153 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
3155 d = SvPVX(sv) + i; /* refresh d after realloc */
3159 for (j = 0; j <= 1; j++) {
3160 char * const c = (char*)utf8_hop((U8*)d, -1);
3161 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
3167 max = (U8)0xff; /* only to \xff */
3168 uvmax = uv; /* \x{100} to uvmax */
3170 d = c; /* eat endpoint chars */
3175 d -= 2; /* eat the first char and the - */
3176 min = (U8)*d; /* first char in range */
3177 max = (U8)d[1]; /* last char in range */
3184 "Invalid range \"%c-%c\" in transliteration operator",
3185 (char)min, (char)max);
3189 if (literal_endpoint == 2 &&
3190 ((isLOWER(min) && isLOWER(max)) ||
3191 (isUPPER(min) && isUPPER(max)))) {
3193 for (i = min; i <= max; i++)
3195 *d++ = NATIVE_TO_NEED(has_utf8,i);
3197 for (i = min; i <= max; i++)
3199 *d++ = NATIVE_TO_NEED(has_utf8,i);
3204 for (i = min; i <= max; i++)
3207 const U8 ch = (U8)NATIVE_TO_UTF(i);
3208 if (UNI_IS_INVARIANT(ch))
3211 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
3212 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
3221 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
3223 *d++ = (char)UTF_TO_NATIVE(0xff);
3225 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
3229 /* mark the range as done, and continue */
3233 literal_endpoint = 0;
3238 /* range begins (ignore - as first or last char) */
3239 else if (*s == '-' && s+1 < send && s != start) {
3241 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
3248 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
3258 literal_endpoint = 0;
3259 native_range = TRUE;
3264 /* if we get here, we're not doing a transliteration */
3266 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3269 while (s1 >= start && *s1-- == '\\')
3272 in_charclass = TRUE;
3275 else if (*s == ']' && PL_lex_inpat && in_charclass) {
3278 while (s1 >= start && *s1-- == '\\')
3281 in_charclass = FALSE;
3284 /* skip for regexp comments /(?#comment)/, except for the last
3285 * char, which will be done separately.
3286 * Stop on (?{..}) and friends */
3288 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
3290 while (s+1 < send && *s != ')')
3291 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3293 else if (!PL_lex_casemods && !in_charclass &&
3294 ( s[2] == '{' /* This should match regcomp.c */
3295 || (s[2] == '?' && s[3] == '{')))
3301 /* likewise skip #-initiated comments in //x patterns */
3302 else if (*s == '#' && PL_lex_inpat &&
3303 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
3304 while (s+1 < send && *s != '\n')
3305 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3308 /* no further processing of single-quoted regex */
3309 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3310 goto default_action;
3312 /* check for embedded arrays
3313 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3315 else if (*s == '@' && s[1]) {
3316 if (isWORDCHAR_lazy_if(s+1,UTF))
3318 if (strchr(":'{$", s[1]))
3320 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3321 break; /* in regexp, neither @+ nor @- are interpolated */
3324 /* check for embedded scalars. only stop if we're sure it's a
3327 else if (*s == '$') {
3328 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
3330 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3332 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3333 "Possible unintended interpolation of $\\ in regex");
3335 break; /* in regexp, $ might be tail anchor */
3339 /* End of else if chain - OP_TRANS rejoin rest */
3342 if (*s == '\\' && s+1 < send) {
3343 char* e; /* Can be used for ending '}', etc. */
3347 /* warn on \1 - \9 in substitution replacements, but note that \11
3348 * is an octal; and \19 is \1 followed by '9' */
3349 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
3350 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
3352 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3357 /* string-change backslash escapes */
3358 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3362 /* In a pattern, process \N, but skip any other backslash escapes.
3363 * This is because we don't want to translate an escape sequence
3364 * into a meta symbol and have the regex compiler use the meta
3365 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3366 * in spite of this, we do have to process \N here while the proper
3367 * charnames handler is in scope. See bugs #56444 and #62056.
3368 * There is a complication because \N in a pattern may also stand
3369 * for 'match a non-nl', and not mean a charname, in which case its
3370 * processing should be deferred to the regex compiler. To be a
3371 * charname it must be followed immediately by a '{', and not look
3372 * like \N followed by a curly quantifier, i.e., not something like
3373 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3375 else if (PL_lex_inpat
3378 || regcurly(s + 1, FALSE)))
3380 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
3381 goto default_action;
3386 /* quoted - in transliterations */
3388 if (PL_lex_inwhat == OP_TRANS) {
3395 if ((isALPHANUMERIC(*s)))
3396 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3397 "Unrecognized escape \\%c passed through",
3399 /* default action is to copy the quoted character */
3400 goto default_action;
3403 /* eg. \132 indicates the octal constant 0132 */
3404 case '0': case '1': case '2': case '3':
3405 case '4': case '5': case '6': case '7':
3407 I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3409 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
3411 if (len < 3 && s < send && isDIGIT(*s)
3412 && ckWARN(WARN_MISC))
3414 Perl_warner(aTHX_ packWARN(WARN_MISC),
3415 "%s", form_short_octal_warning(s, len));
3418 goto NUM_ESCAPE_INSERT;
3420 /* eg. \o{24} indicates the octal constant \024 */
3425 bool valid = grok_bslash_o(&s, &uv, &error,
3426 TRUE, /* Output warning */
3427 FALSE, /* Not strict */
3428 TRUE, /* Output warnings for
3435 goto NUM_ESCAPE_INSERT;
3438 /* eg. \x24 indicates the hex constant 0x24 */
3443 bool valid = grok_bslash_x(&s, &uv, &error,
3444 TRUE, /* Output warning */
3445 FALSE, /* Not strict */
3446 TRUE, /* Output warnings for
3456 /* Insert oct or hex escaped character. There will always be
3457 * enough room in sv since such escapes will be longer than any
3458 * UTF-8 sequence they can end up as, except if they force us
3459 * to recode the rest of the string into utf8 */
3461 /* Here uv is the ordinal of the next character being added in
3462 * unicode (converted from native). */
3463 if (!UNI_IS_INVARIANT(uv)) {
3464 if (!has_utf8 && uv > 255) {
3465 /* Might need to recode whatever we have accumulated so
3466 * far if it contains any chars variant in utf8 or
3469 SvCUR_set(sv, d - SvPVX_const(sv));
3472 /* See Note on sizing above. */
3473 sv_utf8_upgrade_flags_grow(sv,
3474 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3475 UNISKIP(uv) + (STRLEN)(send - s) + 1);
3476 d = SvPVX(sv) + SvCUR(sv);
3481 d = (char*)uvuni_to_utf8((U8*)d, uv);
3482 if (PL_lex_inwhat == OP_TRANS &&
3483 PL_sublex_info.sub_op) {
3484 PL_sublex_info.sub_op->op_private |=
3485 (PL_lex_repl ? OPpTRANS_FROM_UTF
3489 if (uv > 255 && !dorange)
3490 native_range = FALSE;
3503 /* In a non-pattern \N must be a named character, like \N{LATIN
3504 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3505 * mean to match a non-newline. For non-patterns, named
3506 * characters are converted to their string equivalents. In
3507 * patterns, named characters are not converted to their
3508 * ultimate forms for the same reasons that other escapes
3509 * aren't. Instead, they are converted to the \N{U+...} form
3510 * to get the value from the charnames that is in effect right
3511 * now, while preserving the fact that it was a named character
3512 * so that the regex compiler knows this */
3514 /* This section of code doesn't generally use the
3515 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
3516 * a close examination of this macro and determined it is a
3517 * no-op except on utfebcdic variant characters. Every
3518 * character generated by this that would normally need to be
3519 * enclosed by this macro is invariant, so the macro is not
3520 * needed, and would complicate use of copy(). XXX There are
3521 * other parts of this file where the macro is used
3522 * inconsistently, but are saved by it being a no-op */
3524 /* The structure of this section of code (besides checking for
3525 * errors and upgrading to utf8) is:
3526 * Further disambiguate between the two meanings of \N, and if
3527 * not a charname, go process it elsewhere
3528 * If of form \N{U+...}, pass it through if a pattern;
3529 * otherwise convert to utf8
3530 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3531 * pattern; otherwise convert to utf8 */
3533 /* Here, s points to the 'N'; the test below is guaranteed to
3534 * succeed if we are being called on a pattern as we already
3535 * know from a test above that the next character is a '{'.
3536 * On a non-pattern \N must mean 'named sequence, which
3537 * requires braces */
3540 yyerror("Missing braces on \\N{}");
3545 /* If there is no matching '}', it is an error. */
3546 if (! (e = strchr(s, '}'))) {
3547 if (! PL_lex_inpat) {
3548 yyerror("Missing right brace on \\N{}");
3550 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3555 /* Here it looks like a named character */
3557 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3558 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3559 | PERL_SCAN_DISALLOW_PREFIX;
3562 /* For \N{U+...}, the '...' is a unicode value even on
3563 * EBCDIC machines */
3564 s += 2; /* Skip to next char after the 'U+' */
3566 uv = grok_hex(s, &len, &flags, NULL);
3567 if (len == 0 || len != (STRLEN)(e - s)) {
3568 yyerror("Invalid hexadecimal number in \\N{U+...}");
3575 /* On non-EBCDIC platforms, pass through to the regex
3576 * compiler unchanged. The reason we evaluated the
3577 * number above is to make sure there wasn't a syntax
3578 * error. But on EBCDIC we convert to native so
3579 * downstream code can continue to assume it's native
3581 s -= 5; /* Include the '\N{U+' */
3583 d += my_snprintf(d, e - s + 1 + 1, /* includes the }
3586 (unsigned int) UNI_TO_NATIVE(uv));
3588 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3592 else { /* Not a pattern: convert the hex to string */
3594 /* If destination is not in utf8, unconditionally
3595 * recode it to be so. This is because \N{} implies
3596 * Unicode semantics, and scalars have to be in utf8
3597 * to guarantee those semantics */
3599 SvCUR_set(sv, d - SvPVX_const(sv));
3602 /* See Note on sizing above. */
3603 sv_utf8_upgrade_flags_grow(
3605 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3606 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3607 d = SvPVX(sv) + SvCUR(sv);
3611 /* Add the string to the output */
3612 if (UNI_IS_INVARIANT(uv)) {
3615 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3618 else /* Here is \N{NAME} but not \N{U+...}. */
3619 if ((res = get_and_check_backslash_N_name(s, e)))
3622 const char *str = SvPV_const(res, len);
3625 if (! len) { /* The name resolved to an empty string */
3626 Copy("\\N{}", d, 4, char);
3630 /* In order to not lose information for the regex
3631 * compiler, pass the result in the specially made
3632 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3633 * the code points in hex of each character
3634 * returned by charnames */
3636 const char *str_end = str + len;
3637 const STRLEN off = d - SvPVX_const(sv);
3639 if (! SvUTF8(res)) {
3640 /* For the non-UTF-8 case, we can determine the
3641 * exact length needed without having to parse
3642 * through the string. Each character takes up
3643 * 2 hex digits plus either a trailing dot or
3645 d = off + SvGROW(sv, off
3647 + 6 /* For the "\N{U+", and
3649 + (STRLEN)(send - e));
3650 Copy("\\N{U+", d, 5, char);
3652 while (str < str_end) {
3654 my_snprintf(hex_string, sizeof(hex_string),
3655 "%02X.", (U8) *str);
3656 Copy(hex_string, d, 3, char);
3660 d--; /* We will overwrite below the final
3661 dot with a right brace */
3664 STRLEN char_length; /* cur char's byte length */
3666 /* and the number of bytes after this is
3667 * translated into hex digits */
3668 STRLEN output_length;
3670 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3671 * for max('U+', '.'); and 1 for NUL */
3672 char hex_string[2 * UTF8_MAXBYTES + 5];
3674 /* Get the first character of the result. */
3675 U32 uv = utf8n_to_uvuni((U8 *) str,
3679 /* Convert first code point to hex, including
3680 * the boiler plate before it. For all these,
3681 * we convert to native format so that
3682 * downstream code can continue to assume the
3683 * input is native */
3685 my_snprintf(hex_string, sizeof(hex_string),
3687 (unsigned int) UNI_TO_NATIVE(uv));
3689 /* Make sure there is enough space to hold it */
3690 d = off + SvGROW(sv, off
3692 + (STRLEN)(send - e)
3693 + 2); /* '}' + NUL */
3695 Copy(hex_string, d, output_length, char);
3698 /* For each subsequent character, append dot and
3699 * its ordinal in hex */
3700 while ((str += char_length) < str_end) {
3701 const STRLEN off = d - SvPVX_const(sv);
3702 U32 uv = utf8n_to_uvuni((U8 *) str,
3707 my_snprintf(hex_string,
3710 (unsigned int) UNI_TO_NATIVE(uv));
3712 d = off + SvGROW(sv, off
3714 + (STRLEN)(send - e)
3715 + 2); /* '}' + NUL */
3716 Copy(hex_string, d, output_length, char);
3721 *d++ = '}'; /* Done. Add the trailing brace */
3724 else { /* Here, not in a pattern. Convert the name to a
3727 /* If destination is not in utf8, unconditionally
3728 * recode it to be so. This is because \N{} implies
3729 * Unicode semantics, and scalars have to be in utf8
3730 * to guarantee those semantics */
3732 SvCUR_set(sv, d - SvPVX_const(sv));
3735 /* See Note on sizing above. */
3736 sv_utf8_upgrade_flags_grow(sv,
3737 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3738 len + (STRLEN)(send - s) + 1);
3739 d = SvPVX(sv) + SvCUR(sv);
3741 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3743 /* See Note on sizing above. (NOTE: SvCUR() is not
3744 * set correctly here). */
3745 const STRLEN off = d - SvPVX_const(sv);
3746 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3748 Copy(str, d, len, char);
3754 } /* End \N{NAME} */
3757 native_range = FALSE; /* \N{} is defined to be Unicode */
3759 s = e + 1; /* Point to just after the '}' */
3762 /* \c is a control character */
3766 *d++ = grok_bslash_c(*s++, has_utf8, 1);
3769 yyerror("Missing control char name in \\c");
3773 /* printf-style backslashes, formfeeds, newlines, etc */
3775 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3778 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3781 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3784 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3787 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3790 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3793 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3799 } /* end if (backslash) */
3806 /* If we started with encoded form, or already know we want it,
3807 then encode the next character */
3808 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3812 /* One might think that it is wasted effort in the case of the
3813 * source being utf8 (this_utf8 == TRUE) to take the next character
3814 * in the source, convert it to an unsigned value, and then convert
3815 * it back again. But the source has not been validated here. The
3816 * routine that does the conversion checks for errors like
3819 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3820 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3822 SvCUR_set(sv, d - SvPVX_const(sv));
3825 /* See Note on sizing above. */
3826 sv_utf8_upgrade_flags_grow(sv,
3827 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3828 need + (STRLEN)(send - s) + 1);
3829 d = SvPVX(sv) + SvCUR(sv);
3831 } else if (need > len) {
3832 /* encoded value larger than old, may need extra space (NOTE:
3833 * SvCUR() is not set correctly here). See Note on sizing
3835 const STRLEN off = d - SvPVX_const(sv);
3836 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3840 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3842 if (uv > 255 && !dorange)
3843 native_range = FALSE;
3847 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3849 } /* while loop to process each character */
3851 /* terminate the string and set up the sv */
3853 SvCUR_set(sv, d - SvPVX_const(sv));
3854 if (SvCUR(sv) >= SvLEN(sv))
3855 Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3856 " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3859 if (PL_encoding && !has_utf8) {
3860 sv_recode_to_utf8(sv, PL_encoding);
3866 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3867 PL_sublex_info.sub_op->op_private |=
3868 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3872 /* shrink the sv if we allocated more than we used */
3873 if (SvCUR(sv) + 5 < SvLEN(sv)) {
3874 SvPV_shrink_to_cur(sv);
3877 /* return the substring (via pl_yylval) only if we parsed anything */
3878 if (s > PL_bufptr) {
3879 SvREFCNT_inc_simple_void_NN(sv);
3880 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
3881 && ! PL_parser->lex_re_reparsing)
3883 const char *const key = PL_lex_inpat ? "qr" : "q";
3884 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3888 if (PL_lex_inwhat == OP_TRANS) {
3891 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3894 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3902 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3905 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3907 LEAVE_with_name("scan_const");
3912 * Returns TRUE if there's more to the expression (e.g., a subscript),
3915 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3917 * ->[ and ->{ return TRUE
3918 * { and [ outside a pattern are always subscripts, so return TRUE
3919 * if we're outside a pattern and it's not { or [, then return FALSE
3920 * if we're in a pattern and the first char is a {
3921 * {4,5} (any digits around the comma) returns FALSE
3922 * if we're in a pattern and the first char is a [
3924 * [SOMETHING] has a funky algorithm to decide whether it's a
3925 * character class or not. It has to deal with things like
3926 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3927 * anything else returns TRUE
3930 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3933 S_intuit_more(pTHX_ char *s)
3937 PERL_ARGS_ASSERT_INTUIT_MORE;
3939 if (PL_lex_brackets)
3941 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3943 if (*s != '{' && *s != '[')
3948 /* In a pattern, so maybe we have {n,m}. */
3950 if (regcurly(s, FALSE)) {
3956 /* On the other hand, maybe we have a character class */
3959 if (*s == ']' || *s == '^')
3962 /* this is terrifying, and it works */
3965 const char * const send = strchr(s,']');
3966 unsigned char un_char, last_un_char;
3967 char tmpbuf[sizeof PL_tokenbuf * 4];
3969 if (!send) /* has to be an expression */
3971 weight = 2; /* let's weigh the evidence */
3975 else if (isDIGIT(*s)) {
3977 if (isDIGIT(s[1]) && s[2] == ']')
3983 Zero(seen,256,char);
3985 for (; s < send; s++) {
3986 last_un_char = un_char;
3987 un_char = (unsigned char)*s;
3992 weight -= seen[un_char] * 10;
3993 if (isWORDCHAR_lazy_if(s+1,UTF)) {
3995 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3996 len = (int)strlen(tmpbuf);
3997 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3998 UTF ? SVf_UTF8 : 0, SVt_PV))
4003 else if (*s == '$' && s[1] &&
4004 strchr("[#!%*<>()-=",s[1])) {
4005 if (/*{*/ strchr("])} =",s[2]))
4014 if (strchr("wds]",s[1]))
4016 else if (seen[(U8)'\''] || seen[(U8)'"'])
4018 else if (strchr("rnftbxcav",s[1]))
4020 else if (isDIGIT(s[1])) {
4022 while (s[1] && isDIGIT(s[1]))
4032 if (strchr("aA01! ",last_un_char))
4034 if (strchr("zZ79~",s[1]))
4036 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4037 weight -= 5; /* cope with negative subscript */
4040 if (!isWORDCHAR(last_un_char)
4041 && !(last_un_char == '$' || last_un_char == '@'
4042 || last_un_char == '&')
4043 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4048 if (keyword(tmpbuf, d - tmpbuf, 0))
4051 if (un_char == last_un_char + 1)
4053 weight -= seen[un_char];
4058 if (weight >= 0) /* probably a character class */
4068 * Does all the checking to disambiguate
4070 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
4071 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4073 * First argument is the stuff after the first token, e.g. "bar".
4075 * Not a method if foo is a filehandle.
4076 * Not a method if foo is a subroutine prototyped to take a filehandle.
4077 * Not a method if it's really "Foo $bar"
4078 * Method if it's "foo $bar"
4079 * Not a method if it's really "print foo $bar"
4080 * Method if it's really "foo package::" (interpreted as package->foo)
4081 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4082 * Not a method if bar is a filehandle or package, but is quoted with
4087 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
4090 char *s = start + (*start == '$');
4091 char tmpbuf[sizeof PL_tokenbuf];
4098 PERL_ARGS_ASSERT_INTUIT_METHOD;
4100 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4102 if (cv && SvPOK(cv)) {
4103 const char *proto = CvPROTO(cv);
4105 while (*proto && (isSPACE(*proto) || *proto == ';'))
4112 if (*start == '$') {
4113 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
4114 isUPPER(*PL_tokenbuf))
4117 len = start - SvPVX(PL_linestr);
4121 start = SvPVX(PL_linestr) + len;
4125 return *s == '(' ? FUNCMETH : METHOD;
4128 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4129 /* start is the beginning of the possible filehandle/object,
4130 * and s is the end of it
4131 * tmpbuf is a copy of it (but with single quotes as double colons)
4134 if (!keyword(tmpbuf, len, 0)) {
4135 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4139 soff = s - SvPVX(PL_linestr);
4143 indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
4144 if (indirgv && GvCVu(indirgv))
4146 /* filehandle or package name makes it a method */
4147 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4149 soff = s - SvPVX(PL_linestr);
4152 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4153 return 0; /* no assumptions -- "=>" quotes bareword */
4155 start_force(PL_curforce);
4156 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
4157 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4158 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4160 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
4161 ( UTF ? SVf_UTF8 : 0 )));
4166 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
4168 return *s == '(' ? FUNCMETH : METHOD;
4174 /* Encoded script support. filter_add() effectively inserts a
4175 * 'pre-processing' function into the current source input stream.
4176 * Note that the filter function only applies to the current source file
4177 * (e.g., it will not affect files 'require'd or 'use'd by this one).
4179 * The datasv parameter (which may be NULL) can be used to pass
4180 * private data to this instance of the filter. The filter function
4181 * can recover the SV using the FILTER_DATA macro and use it to
4182 * store private buffers and state information.
4184 * The supplied datasv parameter is upgraded to a PVIO type
4185 * and the IoDIRP/IoANY field is used to store the function pointer,
4186 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4187 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4188 * private use must be set using malloc'd pointers.
4192 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4201 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4202 Perl_croak(aTHX_ "Source filters apply only to byte streams");
4204 if (!PL_rsfp_filters)
4205 PL_rsfp_filters = newAV();
4208 SvUPGRADE(datasv, SVt_PVIO);
4209 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4210 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4211 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4212 FPTR2DPTR(void *, IoANY(datasv)),
4213 SvPV_nolen(datasv)));
4214 av_unshift(PL_rsfp_filters, 1);
4215 av_store(PL_rsfp_filters, 0, datasv) ;
4217 !PL_parser->filtered
4218 && PL_parser->lex_flags & LEX_EVALBYTES
4219 && PL_bufptr < PL_bufend
4221 const char *s = PL_bufptr;
4222 while (s < PL_bufend) {
4224 SV *linestr = PL_parser->linestr;
4225 char *buf = SvPVX(linestr);
4226 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4227 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4228 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4229 STRLEN const linestart_pos = PL_parser->linestart - buf;
4230 STRLEN const last_uni_pos =
4231 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4232 STRLEN const last_lop_pos =
4233 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4234 av_push(PL_rsfp_filters, linestr);
4235 PL_parser->linestr =
4236 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4237 buf = SvPVX(PL_parser->linestr);
4238 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4239 PL_parser->bufptr = buf + bufptr_pos;
4240 PL_parser->oldbufptr = buf + oldbufptr_pos;
4241 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4242 PL_parser->linestart = buf + linestart_pos;
4243 if (PL_parser->last_uni)
4244 PL_parser->last_uni = buf + last_uni_pos;
4245 if (PL_parser->last_lop)
4246 PL_parser->last_lop = buf + last_lop_pos;
4247 SvLEN(linestr) = SvCUR(linestr);
4248 SvCUR(linestr) = s-SvPVX(linestr);
4249 PL_parser->filtered = 1;
4259 /* Delete most recently added instance of this filter function. */
4261 Perl_filter_del(pTHX_ filter_t funcp)
4266 PERL_ARGS_ASSERT_FILTER_DEL;
4269 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4270 FPTR2DPTR(void*, funcp)));
4272 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4274 /* if filter is on top of stack (usual case) just pop it off */
4275 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4276 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4277 sv_free(av_pop(PL_rsfp_filters));
4281 /* we need to search for the correct entry and clear it */
4282 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4286 /* Invoke the idxth filter function for the current rsfp. */
4287 /* maxlen 0 = read one text line */
4289 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4294 /* This API is bad. It should have been using unsigned int for maxlen.
4295 Not sure if we want to change the API, but if not we should sanity
4296 check the value here. */
4297 unsigned int correct_length
4306 PERL_ARGS_ASSERT_FILTER_READ;
4308 if (!PL_parser || !PL_rsfp_filters)
4310 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
4311 /* Provide a default input filter to make life easy. */
4312 /* Note that we append to the line. This is handy. */
4313 DEBUG_P(PerlIO_printf(Perl_debug_log,
4314 "filter_read %d: from rsfp\n", idx));
4315 if (correct_length) {
4318 const int old_len = SvCUR(buf_sv);
4320 /* ensure buf_sv is large enough */
4321 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4322 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4323 correct_length)) <= 0) {
4324 if (PerlIO_error(PL_rsfp))
4325 return -1; /* error */
4327 return 0 ; /* end of file */
4329 SvCUR_set(buf_sv, old_len + len) ;
4330 SvPVX(buf_sv)[old_len + len] = '\0';
4333 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4334 if (PerlIO_error(PL_rsfp))
4335 return -1; /* error */
4337 return 0 ; /* end of file */
4340 return SvCUR(buf_sv);
4342 /* Skip this filter slot if filter has been deleted */
4343 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4344 DEBUG_P(PerlIO_printf(Perl_debug_log,
4345 "filter_read %d: skipped (filter deleted)\n",
4347 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4349 if (SvTYPE(datasv) != SVt_PVIO) {
4350 if (correct_length) {
4352 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4353 if (!remainder) return 0; /* eof */
4354 if (correct_length > remainder) correct_length = remainder;
4355 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4356 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4359 const char *s = SvEND(datasv);
4360 const char *send = SvPVX(datasv) + SvLEN(datasv);
4368 if (s == send) return 0; /* eof */
4369 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4370 SvCUR_set(datasv, s-SvPVX(datasv));
4372 return SvCUR(buf_sv);
4374 /* Get function pointer hidden within datasv */
4375 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4376 DEBUG_P(PerlIO_printf(Perl_debug_log,
4377 "filter_read %d: via function %p (%s)\n",
4378 idx, (void*)datasv, SvPV_nolen_const(datasv)));
4379 /* Call function. The function is expected to */
4380 /* call "FILTER_READ(idx+1, buf_sv)" first. */
4381 /* Return: <0:error, =0:eof, >0:not eof */
4382 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4386 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4390 PERL_ARGS_ASSERT_FILTER_GETS;
4392 #ifdef PERL_CR_FILTER
4393 if (!PL_rsfp_filters) {
4394 filter_add(S_cr_textfilter,NULL);
4397 if (PL_rsfp_filters) {
4399 SvCUR_set(sv, 0); /* start with empty line */
4400 if (FILTER_READ(0, sv, 0) > 0)
4401 return ( SvPVX(sv) ) ;
4406 return (sv_gets(sv, PL_rsfp, append));
4410 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4415 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4417 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4421 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4422 (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4424 return GvHV(gv); /* Foo:: */
4427 /* use constant CLASS => 'MyClass' */
4428 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4429 if (gv && GvCV(gv)) {
4430 SV * const sv = cv_const_sv(GvCV(gv));
4432 pkgname = SvPV_const(sv, len);
4435 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4439 * S_readpipe_override
4440 * Check whether readpipe() is overridden, and generates the appropriate
4441 * optree, provided sublex_start() is called afterwards.
4444 S_readpipe_override(pTHX)
4447 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
4448 pl_yylval.ival = OP_BACKTICK;
4450 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4452 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
4453 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
4454 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4456 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
4457 op_append_elem(OP_LIST,
4458 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4459 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4466 * The intent of this yylex wrapper is to minimize the changes to the
4467 * tokener when we aren't interested in collecting madprops. It remains
4468 * to be seen how successful this strategy will be...
4475 char *s = PL_bufptr;
4477 /* make sure PL_thiswhite is initialized */
4481 /* previous token ate up our whitespace? */
4482 if (!PL_lasttoke && PL_nextwhite) {
4483 PL_thiswhite = PL_nextwhite;
4487 /* isolate the token, and figure out where it is without whitespace */
4488 PL_realtokenstart = -1;
4492 assert(PL_curforce < 0);
4494 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
4495 if (!PL_thistoken) {
4496 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4497 PL_thistoken = newSVpvs("");
4499 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4500 PL_thistoken = newSVpvn(tstart, s - tstart);
4503 if (PL_thismad) /* install head */
4504 CURMAD('X', PL_thistoken);
4507 /* last whitespace of a sublex? */
4508 if (optype == ')' && PL_endwhite) {
4509 CURMAD('X', PL_endwhite);
4514 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
4515 if (!PL_thiswhite && !PL_endwhite && !optype) {
4516 sv_free(PL_thistoken);
4521 /* put off final whitespace till peg */
4522 if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
4523 PL_nextwhite = PL_thiswhite;
4526 else if (PL_thisopen) {
4527 CURMAD('q', PL_thisopen);
4529 sv_free(PL_thistoken);
4533 /* Store actual token text as madprop X */
4534 CURMAD('X', PL_thistoken);
4538 /* add preceding whitespace as madprop _ */
4539 CURMAD('_', PL_thiswhite);
4543 /* add quoted material as madprop = */
4544 CURMAD('=', PL_thisstuff);
4548 /* add terminating quote as madprop Q */
4549 CURMAD('Q', PL_thisclose);
4553 /* special processing based on optype */
4557 /* opval doesn't need a TOKEN since it can already store mp */
4567 if (pl_yylval.opval)
4568 append_madprops(PL_thismad, pl_yylval.opval, 0);
4576 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4589 /* remember any fake bracket that lexer is about to discard */
4590 if (PL_lex_brackets == 1 &&
4591 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4594 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4597 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4598 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4601 break; /* don't bother looking for trailing comment */
4610 /* attach a trailing comment to its statement instead of next token */
4614 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4616 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4618 if (*s == '\n' || *s == '#') {
4619 while (s < PL_bufend && *s != '\n')
4623 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4624 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4637 /* Create new token struct. Note: opvals return early above. */
4638 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4645 S_tokenize_use(pTHX_ int is_use, char *s) {
4648 PERL_ARGS_ASSERT_TOKENIZE_USE;
4650 if (PL_expect != XSTATE)
4651 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4652 is_use ? "use" : "no"));
4655 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4656 s = force_version(s, TRUE);
4657 if (*s == ';' || *s == '}'
4658 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4659 start_force(PL_curforce);
4660 NEXTVAL_NEXTTOKE.opval = NULL;
4663 else if (*s == 'v') {
4664 s = force_word(s,WORD,FALSE,TRUE);
4665 s = force_version(s, FALSE);
4669 s = force_word(s,WORD,FALSE,TRUE);
4670 s = force_version(s, FALSE);
4672 pl_yylval.ival = is_use;
4676 static const char* const exp_name[] =
4677 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4678 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4682 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4684 S_word_takes_any_delimeter(char *p, STRLEN len)
4686 return (len == 1 && strchr("msyq", p[0])) ||
4688 (p[0] == 't' && p[1] == 'r') ||
4689 (p[0] == 'q' && strchr("qwxr", p[1]))));
4695 Works out what to call the token just pulled out of the input
4696 stream. The yacc parser takes care of taking the ops we return and
4697 stitching them into a tree.
4700 The type of the next token
4703 Switch based on the current state:
4704 - if we already built the token before, use it
4705 - if we have a case modifier in a string, deal with that
4706 - handle other cases of interpolation inside a string
4707 - scan the next line if we are inside a format
4708 In the normal state switch on the next character:
4710 if alphabetic, go to key lookup
4711 unrecoginized character - croak
4712 - 0/4/26: handle end-of-line or EOF
4713 - cases for whitespace
4714 - \n and #: handle comments and line numbers
4715 - various operators, brackets and sigils
4718 - 'v': vstrings (or go to key lookup)
4719 - 'x' repetition operator (or go to key lookup)
4720 - other ASCII alphanumerics (key lookup begins here):
4723 scan built-in keyword (but do nothing with it yet)
4724 check for statement label
4725 check for lexical subs
4726 goto just_a_word if there is one
4727 see whether built-in keyword is overridden
4728 switch on keyword number:
4729 - default: just_a_word:
4730 not a built-in keyword; handle bareword lookup
4731 disambiguate between method and sub call
4732 fall back to bareword
4733 - cases for built-in keywords
4738 #pragma segment Perl_yylex
4744 char *s = PL_bufptr;
4748 const bool saw_infix_sigil = PL_parser->saw_infix_sigil;
4752 /* orig_keyword, gvp, and gv are initialized here because
4753 * jump to the label just_a_word_zero can bypass their
4754 * initialization later. */
4755 I32 orig_keyword = 0;
4760 SV* tmp = newSVpvs("");
4761 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4762 (IV)CopLINE(PL_curcop),
4763 lex_state_names[PL_lex_state],
4764 exp_name[PL_expect],
4765 pv_display(tmp, s, strlen(s), 0, 60));
4769 switch (PL_lex_state) {
4771 case LEX_NORMAL: /* Some compilers will produce faster */
4772 case LEX_INTERPNORMAL: /* code if we comment these out. */
4776 /* when we've already built the next token, just pull it out of the queue */
4780 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4782 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4783 PL_nexttoke[PL_lasttoke].next_mad = 0;
4784 if (PL_thismad && PL_thismad->mad_key == '_') {
4785 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4786 PL_thismad->mad_val = 0;
4787 mad_free(PL_thismad);
4792 PL_lex_state = PL_lex_defer;
4793 PL_expect = PL_lex_expect;
4794 PL_lex_defer = LEX_NORMAL;
4795 if (!PL_nexttoke[PL_lasttoke].next_type)
4800 pl_yylval = PL_nextval[PL_nexttoke];
4802 PL_lex_state = PL_lex_defer;
4803 PL_expect = PL_lex_expect;
4804 PL_lex_defer = LEX_NORMAL;
4810 next_type = PL_nexttoke[PL_lasttoke].next_type;
4812 next_type = PL_nexttype[PL_nexttoke];
4814 if (next_type & (7<<24)) {
4815 if (next_type & (1<<24)) {
4816 if (PL_lex_brackets > 100)
4817 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4818 PL_lex_brackstack[PL_lex_brackets++] =
4819 (char) ((next_type >> 16) & 0xff);
4821 if (next_type & (2<<24))
4822 PL_lex_allbrackets++;
4823 if (next_type & (4<<24))
4824 PL_lex_allbrackets--;
4825 next_type &= 0xffff;
4827 return REPORT(next_type == 'p' ? pending_ident() : next_type);
4830 /* interpolated case modifiers like \L \U, including \Q and \E.
4831 when we get here, PL_bufptr is at the \
4833 case LEX_INTERPCASEMOD:
4835 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4837 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4838 PL_bufptr, PL_bufend, *PL_bufptr);
4840 /* handle \E or end of string */
4841 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4843 if (PL_lex_casemods) {
4844 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4845 PL_lex_casestack[PL_lex_casemods] = '\0';
4847 if (PL_bufptr != PL_bufend
4848 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4849 || oldmod == 'F')) {
4851 PL_lex_state = LEX_INTERPCONCAT;
4854 PL_thistoken = newSVpvs("\\E");
4857 PL_lex_allbrackets--;
4860 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4861 /* Got an unpaired \E */
4862 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4863 "Useless use of \\E");
4866 while (PL_bufptr != PL_bufend &&
4867 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4870 PL_thiswhite = newSVpvs("");
4871 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4876 if (PL_bufptr != PL_bufend)
4879 PL_lex_state = LEX_INTERPCONCAT;
4883 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4884 "### Saw case modifier\n"); });
4886 if (s[1] == '\\' && s[2] == 'E') {
4890 PL_thiswhite = newSVpvs("");
4891 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4895 PL_lex_state = LEX_INTERPCONCAT;
4900 if (!PL_madskills) /* when just compiling don't need correct */
4901 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4902 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
4903 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4904 (strchr(PL_lex_casestack, 'L')
4905 || strchr(PL_lex_casestack, 'U')
4906 || strchr(PL_lex_casestack, 'F'))) {
4907 PL_lex_casestack[--PL_lex_casemods] = '\0';
4908 PL_lex_allbrackets--;
4911 if (PL_lex_casemods > 10)
4912 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4913 PL_lex_casestack[PL_lex_casemods++] = *s;
4914 PL_lex_casestack[PL_lex_casemods] = '\0';
4915 PL_lex_state = LEX_INTERPCONCAT;
4916 start_force(PL_curforce);
4917 NEXTVAL_NEXTTOKE.ival = 0;
4918 force_next((2<<24)|'(');
4919 start_force(PL_curforce);
4921 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4923 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4925 NEXTVAL_NEXTTOKE.ival = OP_LC;
4927 NEXTVAL_NEXTTOKE.ival = OP_UC;
4929 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4931 NEXTVAL_NEXTTOKE.ival = OP_FC;
4933 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4935 SV* const tmpsv = newSVpvs("\\ ");
4936 /* replace the space with the character we want to escape
4938 SvPVX(tmpsv)[1] = *s;
4944 if (PL_lex_starts) {
4950 sv_free(PL_thistoken);
4951 PL_thistoken = newSVpvs("");
4954 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4955 if (PL_lex_casemods == 1 && PL_lex_inpat)
4964 case LEX_INTERPPUSH:
4965 return REPORT(sublex_push());
4967 case LEX_INTERPSTART:
4968 if (PL_bufptr == PL_bufend)
4969 return REPORT(sublex_done());
4970 DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4971 "### Interpolated variable\n"); });
4973 /* for /@a/, we leave the joining for the regex engine to do
4974 * (unless we're within \Q etc) */
4975 PL_lex_dojoin = (*PL_bufptr == '@'
4976 && (!PL_lex_inpat || PL_lex_casemods));
4977 PL_lex_state = LEX_INTERPNORMAL;
4978 if (PL_lex_dojoin) {
4979 start_force(PL_curforce);
4980 NEXTVAL_NEXTTOKE.ival = 0;
4982 start_force(PL_curforce);
4983 force_ident("\"", '$');
4984 start_force(PL_curforce);
4985 NEXTVAL_NEXTTOKE.ival = 0;
4987 start_force(PL_curforce);
4988 NEXTVAL_NEXTTOKE.ival = 0;
4989 force_next((2<<24)|'(');
4990 start_force(PL_curforce);
4991 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
4994 /* Convert (?{...}) and friends to 'do {...}' */
4995 if (PL_lex_inpat && *PL_bufptr == '(') {
4996 PL_parser->lex_shared->re_eval_start = PL_bufptr;
4998 if (*PL_bufptr != '{')
5000 start_force(PL_curforce);
5001 /* XXX probably need a CURMAD(something) here */
5002 PL_expect = XTERMBLOCK;
5006 if (PL_lex_starts++) {
5011 sv_free(PL_thistoken);
5012 PL_thistoken = newSVpvs("");
5015 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5016 if (!PL_lex_casemods && PL_lex_inpat)
5023 case LEX_INTERPENDMAYBE:
5024 if (intuit_more(PL_bufptr)) {
5025 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
5031 if (PL_lex_dojoin) {
5032 PL_lex_dojoin = FALSE;
5033 PL_lex_state = LEX_INTERPCONCAT;
5037 sv_free(PL_thistoken);
5038 PL_thistoken = newSVpvs("");
5041 PL_lex_allbrackets--;
5044 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
5045 && SvEVALED(PL_lex_repl))
5047 if (PL_bufptr != PL_bufend)
5048 Perl_croak(aTHX_ "Bad evalled substitution pattern");
5051 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
5052 re_eval_str. If the here-doc body’s length equals the previous
5053 value of re_eval_start, re_eval_start will now be null. So
5054 check re_eval_str as well. */
5055 if (PL_parser->lex_shared->re_eval_start
5056 || PL_parser->lex_shared->re_eval_str) {
5058 if (*PL_bufptr != ')')
5059 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
5061 /* having compiled a (?{..}) expression, return the original
5062 * text too, as a const */
5063 if (PL_parser->lex_shared->re_eval_str) {
5064 sv = PL_parser->lex_shared->re_eval_str;
5065 PL_parser->lex_shared->re_eval_str = NULL;
5067 PL_bufptr - PL_parser->lex_shared->re_eval_start);
5068 SvPV_shrink_to_cur(sv);
5070 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
5071 PL_bufptr - PL_parser->lex_shared->re_eval_start);
5072 start_force(PL_curforce);
5073 /* XXX probably need a CURMAD(something) here */
5074 NEXTVAL_NEXTTOKE.opval =
5075 (OP*)newSVOP(OP_CONST, 0,
5078 PL_parser->lex_shared->re_eval_start = NULL;
5084 case LEX_INTERPCONCAT:
5086 if (PL_lex_brackets)
5087 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
5088 (long) PL_lex_brackets);
5090 if (PL_bufptr == PL_bufend)
5091 return REPORT(sublex_done());
5093 /* m'foo' still needs to be parsed for possible (?{...}) */
5094 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
5095 SV *sv = newSVsv(PL_linestr);
5097 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5101 s = scan_const(PL_bufptr);
5103 PL_lex_state = LEX_INTERPCASEMOD;
5105 PL_lex_state = LEX_INTERPSTART;
5108 if (s != PL_bufptr) {
5109 start_force(PL_curforce);
5111 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
5113 NEXTVAL_NEXTTOKE = pl_yylval;
5116 if (PL_lex_starts++) {
5120 sv_free(PL_thistoken);
5121 PL_thistoken = newSVpvs("");
5124 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5125 if (!PL_lex_casemods && PL_lex_inpat)
5138 s = scan_formline(PL_bufptr);
5139 if (!PL_lex_formbrack)
5148 /* We really do *not* want PL_linestr ever becoming a COW. */
5149 assert (!SvIsCOW(PL_linestr));
5151 PL_oldoldbufptr = PL_oldbufptr;
5153 PL_parser->saw_infix_sigil = 0;
5158 sv_free(PL_thistoken);
5161 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5165 if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
5168 SV *dsv = newSVpvs_flags("", SVs_TEMP);
5169 const char *c = UTF ? savepv(sv_uni_display(dsv, newSVpvn_flags(s,
5171 SVs_TEMP | SVf_UTF8),
5172 10, UNI_DISPLAY_ISPRINT))
5173 : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
5174 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
5175 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
5176 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
5184 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %"SVf"<-- HERE near column %d", c, SVfARG(dsv), (int) len + 1);
5188 goto fake_eof; /* emulate EOF on ^D or ^Z */
5194 if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
5197 if (PL_lex_brackets &&
5198 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
5199 yyerror((const char *)
5201 ? "Format not terminated"
5202 : "Missing right curly or square bracket"));
5204 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5205 "### Tokener got EOF\n");
5209 if (s++ < PL_bufend)
5210 goto retry; /* ignore stray nulls */
5213 if (!PL_in_eval && !PL_preambled) {
5214 PL_preambled = TRUE;
5220 /* Generate a string of Perl code to load the debugger.
5221 * If PERL5DB is set, it will return the contents of that,
5222 * otherwise a compile-time require of perl5db.pl. */
5224 const char * const pdb = PerlEnv_getenv("PERL5DB");
5227 sv_setpv(PL_linestr, pdb);
5228 sv_catpvs(PL_linestr,";");
5230 SETERRNO(0,SS_NORMAL);
5231 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
5234 sv_setpvs(PL_linestr,"");
5235 if (PL_preambleav) {
5236 SV **svp = AvARRAY(PL_preambleav);
5237 SV **const end = svp + AvFILLp(PL_preambleav);
5239 sv_catsv(PL_linestr, *svp);
5241 sv_catpvs(PL_linestr, ";");
5243 sv_free(MUTABLE_SV(PL_preambleav));
5244 PL_preambleav = NULL;
5247 sv_catpvs(PL_linestr,
5248 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
5249 if (PL_minus_n || PL_minus_p) {
5250 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
5252 sv_catpvs(PL_linestr,"chomp;");
5255 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
5256 || *PL_splitstr == '"')
5257 && strchr(PL_splitstr + 1, *PL_splitstr))
5258 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
5260 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
5261 bytes can be used as quoting characters. :-) */
5262 const char *splits = PL_splitstr;
5263 sv_catpvs(PL_linestr, "our @F=split(q\0");
5266 if (*splits == '\\')
5267 sv_catpvn(PL_linestr, splits, 1);
5268 sv_catpvn(PL_linestr, splits, 1);
5269 } while (*splits++);
5270 /* This loop will embed the trailing NUL of
5271 PL_linestr as the last thing it does before
5273 sv_catpvs(PL_linestr, ");");
5277 sv_catpvs(PL_linestr,"our @F=split(' ');");
5280 sv_catpvs(PL_linestr, "\n");
5281 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5282 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5283 PL_last_lop = PL_last_uni = NULL;
5284 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5285 update_debugger_info(PL_linestr, NULL, 0);
5290 bof = PL_rsfp ? TRUE : FALSE;
5293 fake_eof = LEX_FAKE_EOF;
5295 PL_bufptr = PL_bufend;
5296 COPLINE_INC_WITH_HERELINES;
5297 if (!lex_next_chunk(fake_eof)) {
5298 CopLINE_dec(PL_curcop);
5300 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
5302 CopLINE_dec(PL_curcop);
5305 PL_realtokenstart = -1;
5308 /* If it looks like the start of a BOM or raw UTF-16,
5309 * check if it in fact is. */
5310 if (bof && PL_rsfp &&
5315 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
5316 bof = (offset == (Off_t)SvCUR(PL_linestr));
5317 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5318 /* offset may include swallowed CR */
5320 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
5323 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5324 s = swallow_bom((U8*)s);
5327 if (PL_parser->in_pod) {
5328 /* Incest with pod. */
5331 sv_catsv(PL_thiswhite, PL_linestr);
5333 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
5334 sv_setpvs(PL_linestr, "");
5335 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5336 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5337 PL_last_lop = PL_last_uni = NULL;
5338 PL_parser->in_pod = 0;
5341 if (PL_rsfp || PL_parser->filtered)
5343 } while (PL_parser->in_pod);
5344 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5345 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5346 PL_last_lop = PL_last_uni = NULL;
5347 if (CopLINE(PL_curcop) == 1) {
5348 while (s < PL_bufend && isSPACE(*s))
5350 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5354 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5358 if (*s == '#' && *(s+1) == '!')
5360 #ifdef ALTERNATE_SHEBANG
5362 static char const as[] = ALTERNATE_SHEBANG;
5363 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5364 d = s + (sizeof(as) - 1);
5366 #endif /* ALTERNATE_SHEBANG */
5375 while (*d && !isSPACE(*d))
5379 #ifdef ARG_ZERO_IS_SCRIPT
5380 if (ipathend > ipath) {
5382 * HP-UX (at least) sets argv[0] to the script name,
5383 * which makes $^X incorrect. And Digital UNIX and Linux,
5384 * at least, set argv[0] to the basename of the Perl
5385 * interpreter. So, having found "#!", we'll set it right.
5387 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5389 assert(SvPOK(x) || SvGMAGICAL(x));
5390 if (sv_eq(x, CopFILESV(PL_curcop))) {
5391 sv_setpvn(x, ipath, ipathend - ipath);
5397 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
5398 const char * const lstart = SvPV_const(x,llen);
5400 bstart += blen - llen;
5401 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5402 sv_setpvn(x, ipath, ipathend - ipath);
5407 TAINT_NOT; /* $^X is always tainted, but that's OK */
5409 #endif /* ARG_ZERO_IS_SCRIPT */
5414 d = instr(s,"perl -");
5416 d = instr(s,"perl");
5418 /* avoid getting into infinite loops when shebang
5419 * line contains "Perl" rather than "perl" */
5421 for (d = ipathend-4; d >= ipath; --d) {
5422 if ((*d == 'p' || *d == 'P')
5423 && !ibcmp(d, "perl", 4))
5433 #ifdef ALTERNATE_SHEBANG
5435 * If the ALTERNATE_SHEBANG on this system starts with a
5436 * character that can be part of a Perl expression, then if
5437 * we see it but not "perl", we're probably looking at the
5438 * start of Perl code, not a request to hand off to some
5439 * other interpreter. Similarly, if "perl" is there, but
5440 * not in the first 'word' of the line, we assume the line
5441 * contains the start of the Perl program.
5443 if (d && *s != '#') {
5444 const char *c = ipath;
5445 while (*c && !strchr("; \t\r\n\f\v#", *c))
5448 d = NULL; /* "perl" not in first word; ignore */
5450 *s = '#'; /* Don't try to parse shebang line */
5452 #endif /* ALTERNATE_SHEBANG */
5457 !instr(s,"indir") &&
5458 instr(PL_origargv[0],"perl"))
5465 while (s < PL_bufend && isSPACE(*s))
5467 if (s < PL_bufend) {
5468 Newx(newargv,PL_origargc+3,char*);
5470 while (s < PL_bufend && !isSPACE(*s))
5473 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5476 newargv = PL_origargv;
5479 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5481 Perl_croak(aTHX_ "Can't exec %s", ipath);
5484 while (*d && !isSPACE(*d))
5486 while (SPACE_OR_TAB(*d))
5490 const bool switches_done = PL_doswitches;
5491 const U32 oldpdb = PL_perldb;
5492 const bool oldn = PL_minus_n;
5493 const bool oldp = PL_minus_p;
5497 bool baduni = FALSE;
5499 const char *d2 = d1 + 1;
5500 if (parse_unicode_opts((const char **)&d2)
5504 if (baduni || *d1 == 'M' || *d1 == 'm') {
5505 const char * const m = d1;
5506 while (*d1 && !isSPACE(*d1))
5508 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5511 d1 = moreswitches(d1);
5513 if (PL_doswitches && !switches_done) {
5514 int argc = PL_origargc;
5515 char **argv = PL_origargv;
5518 } while (argc && argv[0][0] == '-' && argv[0][1]);
5519 init_argv_symbols(argc,argv);
5521 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5522 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5523 /* if we have already added "LINE: while (<>) {",
5524 we must not do it again */
5526 sv_setpvs(PL_linestr, "");
5527 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5528 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5529 PL_last_lop = PL_last_uni = NULL;
5530 PL_preambled = FALSE;
5531 if (PERLDB_LINE || PERLDB_SAVESRC)
5532 (void)gv_fetchfile(PL_origfilename);
5539 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5540 PL_lex_state = LEX_FORMLINE;
5541 start_force(PL_curforce);
5542 NEXTVAL_NEXTTOKE.ival = 0;
5543 force_next(FORMRBRACK);
5548 #ifdef PERL_STRICT_CR
5549 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5551 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5553 case ' ': case '\t': case '\f': case 013:
5555 PL_realtokenstart = -1;
5558 PL_thiswhite = newSVpvs("");
5559 sv_catpvn(PL_thiswhite, s, 1);
5567 PL_realtokenstart = -1;
5571 if (PL_lex_state != LEX_NORMAL ||
5572 (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5573 if (*s == '#' && s == PL_linestart && PL_in_eval
5574 && !PL_rsfp && !PL_parser->filtered) {
5575 /* handle eval qq[#line 1 "foo"\n ...] */
5576 CopLINE_dec(PL_curcop);
5579 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5581 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5585 const bool in_comment = *s == '#';
5587 while (d < PL_bufend && *d != '\n')
5591 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5592 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5596 PL_thiswhite = newSVpvn(s, d - s);
5599 if (in_comment && d == PL_bufend
5600 && PL_lex_state == LEX_INTERPNORMAL
5601 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5602 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5605 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5606 PL_lex_state = LEX_FORMLINE;
5607 start_force(PL_curforce);
5608 NEXTVAL_NEXTTOKE.ival = 0;
5609 force_next(FORMRBRACK);
5615 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5616 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5619 TOKEN(PEG); /* make sure any #! line is accessible */
5624 /* if (PL_madskills && PL_lex_formbrack) { */
5626 while (d < PL_bufend && *d != '\n')
5630 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5631 Perl_croak(aTHX_ "panic: input overflow");
5632 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5634 PL_thiswhite = newSVpvs("");
5635 if (CopLINE(PL_curcop) == 1) {
5636 sv_setpvs(PL_thiswhite, "");
5639 sv_catpvn(PL_thiswhite, s, d - s);
5647 while (s < PL_bufend && *s != '\n')
5651 else if (s > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5652 Perl_croak(aTHX_ "panic: input overflow");
5657 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5665 while (s < PL_bufend && SPACE_OR_TAB(*s))
5668 if (strnEQ(s,"=>",2)) {
5669 s = force_word(PL_bufptr,WORD,FALSE,FALSE);
5670 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5671 OPERATOR('-'); /* unary minus */
5673 PL_last_uni = PL_oldbufptr;
5675 case 'r': ftst = OP_FTEREAD; break;
5676 case 'w': ftst = OP_FTEWRITE; break;
5677 case 'x': ftst = OP_FTEEXEC; break;
5678 case 'o': ftst = OP_FTEOWNED; break;
5679 case 'R': ftst = OP_FTRREAD; break;
5680 case 'W': ftst = OP_FTRWRITE; break;
5681 case 'X': ftst = OP_FTREXEC; break;
5682 case 'O': ftst = OP_FTROWNED; break;
5683 case 'e': ftst = OP_FTIS; break;
5684 case 'z': ftst = OP_FTZERO; break;
5685 case 's': ftst = OP_FTSIZE; break;
5686 case 'f': ftst = OP_FTFILE; break;
5687 case 'd': ftst = OP_FTDIR; break;
5688 case 'l': ftst = OP_FTLINK; break;
5689 case 'p': ftst = OP_FTPIPE; break;
5690 case 'S': ftst = OP_FTSOCK; break;
5691 case 'u': ftst = OP_FTSUID; break;
5692 case 'g': ftst = OP_FTSGID; break;
5693 case 'k': ftst = OP_FTSVTX; break;
5694 case 'b': ftst = OP_FTBLK; break;
5695 case 'c': ftst = OP_FTCHR; break;
5696 case 't': ftst = OP_FTTTY; break;
5697 case 'T': ftst = OP_FTTEXT; break;
5698 case 'B': ftst = OP_FTBINARY; break;
5699 case 'M': case 'A': case 'C':
5700 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5702 case 'M': ftst = OP_FTMTIME; break;
5703 case 'A': ftst = OP_FTATIME; break;
5704 case 'C': ftst = OP_FTCTIME; break;
5712 PL_last_lop_op = (OPCODE)ftst;
5713 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5714 "### Saw file test %c\n", (int)tmp);
5719 /* Assume it was a minus followed by a one-letter named
5720 * subroutine call (or a -bareword), then. */
5721 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5722 "### '-%c' looked like a file test but was not\n",
5729 const char tmp = *s++;
5732 if (PL_expect == XOPERATOR)
5737 else if (*s == '>') {
5740 if (isIDFIRST_lazy_if(s,UTF)) {
5741 s = force_word(s,METHOD,FALSE,TRUE);
5749 if (PL_expect == XOPERATOR) {
5750 if (*s == '=' && !PL_lex_allbrackets &&
5751 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5758 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5760 OPERATOR('-'); /* unary minus */
5766 const char tmp = *s++;
5769 if (PL_expect == XOPERATOR)
5774 if (PL_expect == XOPERATOR) {
5775 if (*s == '=' && !PL_lex_allbrackets &&
5776 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5783 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5790 if (PL_expect != XOPERATOR) {
5791 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5792 PL_expect = XOPERATOR;
5793 force_ident(PL_tokenbuf, '*');
5801 if (*s == '=' && !PL_lex_allbrackets &&
5802 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5808 if (*s == '=' && !PL_lex_allbrackets &&
5809 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5813 PL_parser->saw_infix_sigil = 1;
5817 if (PL_expect == XOPERATOR) {
5818 if (s[1] == '=' && !PL_lex_allbrackets &&
5819 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5822 PL_parser->saw_infix_sigil = 1;
5825 PL_tokenbuf[0] = '%';
5826 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5827 sizeof PL_tokenbuf - 1, FALSE);
5828 if (!PL_tokenbuf[1]) {
5831 PL_expect = XOPERATOR;
5832 force_ident_maybe_lex('%');
5836 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5837 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5842 if (PL_lex_brackets > 100)
5843 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5844 PL_lex_brackstack[PL_lex_brackets++] = 0;
5845 PL_lex_allbrackets++;
5847 const char tmp = *s++;
5852 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5854 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5857 Perl_ck_warner_d(aTHX_
5858 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5859 "Smartmatch is experimental");
5865 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5872 goto just_a_word_zero_gv;
5875 switch (PL_expect) {
5881 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5883 PL_bufptr = s; /* update in case we back off */
5886 "Use of := for an empty attribute list is not allowed");
5893 PL_expect = XTERMBLOCK;
5896 stuffstart = s - SvPVX(PL_linestr) - 1;
5900 while (isIDFIRST_lazy_if(s,UTF)) {
5903 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5904 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5905 if (tmp < 0) tmp = -tmp;
5920 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5922 d = scan_str(d,TRUE,TRUE,FALSE, FALSE);
5924 /* MUST advance bufptr here to avoid bogus
5925 "at end of line" context messages from yyerror().
5927 PL_bufptr = s + len;
5928 yyerror("Unterminated attribute parameter in attribute list");
5932 return REPORT(0); /* EOF indicator */
5936 sv_catsv(sv, PL_lex_stuff);
5937 attrs = op_append_elem(OP_LIST, attrs,
5938 newSVOP(OP_CONST, 0, sv));
5939 SvREFCNT_dec(PL_lex_stuff);
5940 PL_lex_stuff = NULL;
5943 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5945 if (PL_in_my == KEY_our) {
5946 deprecate(":unique");
5949 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5952 /* NOTE: any CV attrs applied here need to be part of
5953 the CVf_BUILTIN_ATTRS define in cv.h! */
5954 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5956 CvLVALUE_on(PL_compcv);
5958 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5960 deprecate(":locked");
5962 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5964 CvMETHOD_on(PL_compcv);
5966 /* After we've set the flags, it could be argued that
5967 we don't need to do the attributes.pm-based setting
5968 process, and shouldn't bother appending recognized
5969 flags. To experiment with that, uncomment the
5970 following "else". (Note that's already been
5971 uncommented. That keeps the above-applied built-in
5972 attributes from being intercepted (and possibly
5973 rejected) by a package's attribute routines, but is
5974 justified by the performance win for the common case
5975 of applying only built-in attributes.) */
5977 attrs = op_append_elem(OP_LIST, attrs,
5978 newSVOP(OP_CONST, 0,
5982 if (*s == ':' && s[1] != ':')
5985 break; /* require real whitespace or :'s */
5986 /* XXX losing whitespace on sequential attributes here */
5990 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5991 if (*s != ';' && *s != '}' && *s != tmp
5992 && (tmp != '=' || *s != ')')) {
5993 const char q = ((*s == '\'') ? '"' : '\'');
5994 /* If here for an expression, and parsed no attrs, back
5996 if (tmp == '=' && !attrs) {
6000 /* MUST advance bufptr here to avoid bogus "at end of line"
6001 context messages from yyerror().
6004 yyerror( (const char *)
6006 ? Perl_form(aTHX_ "Invalid separator character "
6007 "%c%c%c in attribute list", q, *s, q)
6008 : "Unterminated attribute list" ) );
6016 start_force(PL_curforce);
6017 NEXTVAL_NEXTTOKE.opval = attrs;
6018 CURMAD('_', PL_nextwhite);
6023 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
6024 (s - SvPVX(PL_linestr)) - stuffstart);
6029 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
6033 PL_lex_allbrackets--;
6037 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6038 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
6042 PL_lex_allbrackets++;
6045 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6051 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6054 PL_lex_allbrackets--;
6060 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6063 if (PL_lex_brackets <= 0)
6064 yyerror("Unmatched right square bracket");
6067 PL_lex_allbrackets--;
6068 if (PL_lex_state == LEX_INTERPNORMAL) {
6069 if (PL_lex_brackets == 0) {
6070 if (*s == '-' && s[1] == '>')
6071 PL_lex_state = LEX_INTERPENDMAYBE;
6072 else if (*s != '[' && *s != '{')
6073 PL_lex_state = LEX_INTERPEND;
6080 if (PL_lex_brackets > 100) {
6081 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6083 switch (PL_expect) {
6085 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6086 PL_lex_allbrackets++;
6087 OPERATOR(HASHBRACK);
6089 while (s < PL_bufend && SPACE_OR_TAB(*s))
6092 PL_tokenbuf[0] = '\0';
6093 if (d < PL_bufend && *d == '-') {
6094 PL_tokenbuf[0] = '-';
6096 while (d < PL_bufend && SPACE_OR_TAB(*d))
6099 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
6100 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6102 while (d < PL_bufend && SPACE_OR_TAB(*d))
6105 const char minus = (PL_tokenbuf[0] == '-');
6106 s = force_word(s + minus, WORD, FALSE, TRUE);
6114 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6115 PL_lex_allbrackets++;
6120 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6121 PL_lex_allbrackets++;
6126 if (PL_oldoldbufptr == PL_last_lop)
6127 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6129 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6130 PL_lex_allbrackets++;
6133 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6135 /* This hack is to get the ${} in the message. */
6137 yyerror("syntax error");
6140 OPERATOR(HASHBRACK);
6142 /* This hack serves to disambiguate a pair of curlies
6143 * as being a block or an anon hash. Normally, expectation
6144 * determines that, but in cases where we're not in a
6145 * position to expect anything in particular (like inside
6146 * eval"") we have to resolve the ambiguity. This code
6147 * covers the case where the first term in the curlies is a
6148 * quoted string. Most other cases need to be explicitly
6149 * disambiguated by prepending a "+" before the opening
6150 * curly in order to force resolution as an anon hash.
6152 * XXX should probably propagate the outer expectation
6153 * into eval"" to rely less on this hack, but that could
6154 * potentially break current behavior of eval"".
6158 if (*s == '\'' || *s == '"' || *s == '`') {
6159 /* common case: get past first string, handling escapes */
6160 for (t++; t < PL_bufend && *t != *s;)
6161 if (*t++ == '\\' && (*t == '\\' || *t == *s))
6165 else if (*s == 'q') {
6168 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6169 && !isWORDCHAR(*t))))
6171 /* skip q//-like construct */
6173 char open, close, term;
6176 while (t < PL_bufend && isSPACE(*t))
6178 /* check for q => */
6179 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6180 OPERATOR(HASHBRACK);
6184 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
6188 for (t++; t < PL_bufend; t++) {
6189 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6191 else if (*t == open)
6195 for (t++; t < PL_bufend; t++) {
6196 if (*t == '\\' && t+1 < PL_bufend)
6198 else if (*t == close && --brackets <= 0)
6200 else if (*t == open)
6207 /* skip plain q word */
6208 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
6211 else if (isWORDCHAR_lazy_if(t,UTF)) {
6213 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
6216 while (t < PL_bufend && isSPACE(*t))
6218 /* if comma follows first term, call it an anon hash */
6219 /* XXX it could be a comma expression with loop modifiers */
6220 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6221 || (*t == '=' && t[1] == '>')))
6222 OPERATOR(HASHBRACK);
6223 if (PL_expect == XREF)
6226 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6232 pl_yylval.ival = CopLINE(PL_curcop);
6233 if (isSPACE(*s) || *s == '#')
6234 PL_copline = NOLINE; /* invalidate current command line number */
6235 TOKEN(formbrack ? '=' : '{');
6237 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6241 if (PL_lex_brackets <= 0)
6242 yyerror("Unmatched right curly bracket");
6244 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6245 PL_lex_allbrackets--;
6246 if (PL_lex_state == LEX_INTERPNORMAL) {
6247 if (PL_lex_brackets == 0) {
6248 if (PL_expect & XFAKEBRACK) {
6249 PL_expect &= XENUMMASK;
6250 PL_lex_state = LEX_INTERPEND;
6255 PL_thiswhite = newSVpvs("");
6256 sv_catpvs(PL_thiswhite,"}");
6259 return yylex(); /* ignore fake brackets */
6261 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6262 && SvEVALED(PL_lex_repl))
6263 PL_lex_state = LEX_INTERPEND;
6264 else if (*s == '-' && s[1] == '>')
6265 PL_lex_state = LEX_INTERPENDMAYBE;
6266 else if (*s != '[' && *s != '{')
6267 PL_lex_state = LEX_INTERPEND;
6270 if (PL_expect & XFAKEBRACK) {
6271 PL_expect &= XENUMMASK;
6273 return yylex(); /* ignore fake brackets */
6275 start_force(PL_curforce);
6277 curmad('X', newSVpvn(s-1,1));
6278 CURMAD('_', PL_thiswhite);
6280 force_next(formbrack ? '.' : '}');
6281 if (formbrack) LEAVE;
6283 if (PL_madskills && !PL_thistoken)
6284 PL_thistoken = newSVpvs("");
6286 if (formbrack == 2) { /* means . where arguments were expected */
6287 start_force(PL_curforce);
6295 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6296 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6303 if (PL_expect == XOPERATOR) {
6304 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
6305 && isIDFIRST_lazy_if(s,UTF))
6307 CopLINE_dec(PL_curcop);
6308 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6309 CopLINE_inc(PL_curcop);
6311 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6312 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6316 PL_parser->saw_infix_sigil = 1;
6320 PL_tokenbuf[0] = '&';
6321 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf + 1,
6322 sizeof PL_tokenbuf - 1, TRUE);
6323 if (PL_tokenbuf[1]) {
6324 PL_expect = XOPERATOR;
6325 force_ident_maybe_lex('&');
6329 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6335 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6336 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6343 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6344 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6352 const char tmp = *s++;
6354 if (!PL_lex_allbrackets &&
6355 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6362 if (!PL_lex_allbrackets &&
6363 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
6371 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6372 && strchr("+-*/%.^&|<",tmp))
6373 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6374 "Reversed %c= operator",(int)tmp);
6376 if (PL_expect == XSTATE && isALPHA(tmp) &&
6377 (s == PL_linestart+1 || s[-2] == '\n') )
6379 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6380 || PL_lex_state != LEX_NORMAL) {
6385 if (strnEQ(s,"=cut",4)) {
6401 PL_thiswhite = newSVpvs("");
6402 sv_catpvn(PL_thiswhite, PL_linestart,
6403 PL_bufend - PL_linestart);
6407 PL_parser->in_pod = 1;
6411 if (PL_expect == XBLOCK) {
6413 #ifdef PERL_STRICT_CR
6414 while (SPACE_OR_TAB(*t))
6416 while (SPACE_OR_TAB(*t) || *t == '\r')
6419 if (*t == '\n' || *t == '#') {
6422 SAVEI8(PL_parser->form_lex_state);
6423 SAVEI32(PL_lex_formbrack);
6424 PL_parser->form_lex_state = PL_lex_state;
6425 PL_lex_formbrack = PL_lex_brackets + 1;
6429 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6438 const char tmp = *s++;
6440 /* was this !=~ where !~ was meant?
6441 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6443 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6444 const char *t = s+1;
6446 while (t < PL_bufend && isSPACE(*t))
6449 if (*t == '/' || *t == '?' ||
6450 ((*t == 'm' || *t == 's' || *t == 'y')
6451 && !isWORDCHAR(t[1])) ||
6452 (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6453 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6454 "!=~ should be !~");
6456 if (!PL_lex_allbrackets &&
6457 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6469 if (PL_expect != XOPERATOR) {
6470 if (s[1] != '<' && !strchr(s,'>'))
6473 s = scan_heredoc(s);
6475 s = scan_inputsymbol(s);
6476 PL_expect = XOPERATOR;
6477 TOKEN(sublex_start());
6483 if (*s == '=' && !PL_lex_allbrackets &&
6484 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6488 SHop(OP_LEFT_SHIFT);
6493 if (!PL_lex_allbrackets &&
6494 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6501 if (!PL_lex_allbrackets &&
6502 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6510 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6518 const char tmp = *s++;
6520 if (*s == '=' && !PL_lex_allbrackets &&
6521 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6525 SHop(OP_RIGHT_SHIFT);
6527 else if (tmp == '=') {
6528 if (!PL_lex_allbrackets &&
6529 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6537 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6546 if (PL_expect == XOPERATOR) {
6547 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6548 return deprecate_commaless_var_list();
6552 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6553 PL_tokenbuf[0] = '@';
6554 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6555 sizeof PL_tokenbuf - 1, FALSE);
6556 if (PL_expect == XOPERATOR)
6557 no_op("Array length", s);
6558 if (!PL_tokenbuf[1])
6560 PL_expect = XOPERATOR;
6561 force_ident_maybe_lex('#');
6565 PL_tokenbuf[0] = '$';
6566 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6567 sizeof PL_tokenbuf - 1, FALSE);
6568 if (PL_expect == XOPERATOR)
6570 if (!PL_tokenbuf[1]) {
6572 yyerror("Final $ should be \\$ or $name");
6578 const char tmp = *s;
6579 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6582 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6583 && intuit_more(s)) {
6585 PL_tokenbuf[0] = '@';
6586 if (ckWARN(WARN_SYNTAX)) {
6589 while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
6592 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6593 while (t < PL_bufend && *t != ']')
6595 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6596 "Multidimensional syntax %.*s not supported",
6597 (int)((t - PL_bufptr) + 1), PL_bufptr);
6601 else if (*s == '{') {
6603 PL_tokenbuf[0] = '%';
6604 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6605 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6607 char tmpbuf[sizeof PL_tokenbuf];
6610 } while (isSPACE(*t));
6611 if (isIDFIRST_lazy_if(t,UTF)) {
6613 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6618 && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6619 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6620 "You need to quote \"%"UTF8f"\"",
6621 UTF8fARG(UTF, len, tmpbuf));
6627 PL_expect = XOPERATOR;
6628 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6629 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6630 if (!islop || PL_last_lop_op == OP_GREPSTART)
6631 PL_expect = XOPERATOR;
6632 else if (strchr("$@\"'`q", *s))
6633 PL_expect = XTERM; /* e.g. print $fh "foo" */
6634 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6635 PL_expect = XTERM; /* e.g. print $fh &sub */
6636 else if (isIDFIRST_lazy_if(s,UTF)) {
6637 char tmpbuf[sizeof PL_tokenbuf];
6639 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6640 if ((t2 = keyword(tmpbuf, len, 0))) {
6641 /* binary operators exclude handle interpretations */
6653 PL_expect = XTERM; /* e.g. print $fh length() */
6658 PL_expect = XTERM; /* e.g. print $fh subr() */
6661 else if (isDIGIT(*s))
6662 PL_expect = XTERM; /* e.g. print $fh 3 */
6663 else if (*s == '.' && isDIGIT(s[1]))
6664 PL_expect = XTERM; /* e.g. print $fh .3 */
6665 else if ((*s == '?' || *s == '-' || *s == '+')
6666 && !isSPACE(s[1]) && s[1] != '=')
6667 PL_expect = XTERM; /* e.g. print $fh -1 */
6668 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6670 PL_expect = XTERM; /* e.g. print $fh /.../
6671 XXX except DORDOR operator
6673 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6675 PL_expect = XTERM; /* print $fh <<"EOF" */
6678 force_ident_maybe_lex('$');
6682 if (PL_expect == XOPERATOR)
6684 PL_tokenbuf[0] = '@';
6685 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6686 if (!PL_tokenbuf[1]) {
6689 if (PL_lex_state == LEX_NORMAL)
6691 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6693 PL_tokenbuf[0] = '%';
6695 /* Warn about @ where they meant $. */
6696 if (*s == '[' || *s == '{') {
6697 if (ckWARN(WARN_SYNTAX)) {
6698 const char *t = s + 1;
6699 while (*t && (isWORDCHAR_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
6700 t += UTF ? UTF8SKIP(t) : 1;
6701 if (*t == '}' || *t == ']') {
6703 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6704 /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */
6705 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6706 "Scalar value %"UTF8f" better written as $%"UTF8f,
6707 UTF8fARG(UTF, t-PL_bufptr, PL_bufptr),
6708 UTF8fARG(UTF, t-PL_bufptr-1, PL_bufptr+1));
6713 PL_expect = XOPERATOR;
6714 force_ident_maybe_lex('@');
6717 case '/': /* may be division, defined-or, or pattern */
6718 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6719 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6720 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6725 case '?': /* may either be conditional or pattern */
6726 if (PL_expect == XOPERATOR) {
6729 if (!PL_lex_allbrackets &&
6730 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6734 PL_lex_allbrackets++;
6740 /* A // operator. */
6741 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6742 (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6743 LEX_FAKEEOF_LOGIC)) {
6751 if (*s == '=' && !PL_lex_allbrackets &&
6752 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6761 /* Disable warning on "study /blah/" */
6762 if (PL_oldoldbufptr == PL_last_uni
6763 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6764 || memNE(PL_last_uni, "study", 5)
6765 || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
6769 deprecate("?PATTERN? without explicit operator");
6770 s = scan_pat(s,OP_MATCH);
6771 TERM(sublex_start());
6775 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6776 #ifdef PERL_STRICT_CR
6779 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6781 && (s == PL_linestart || s[-1] == '\n') )
6784 formbrack = 2; /* dot seen where arguments expected */
6787 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6791 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6794 if (!PL_lex_allbrackets &&
6795 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6802 pl_yylval.ival = OPf_SPECIAL;
6808 if (*s == '=' && !PL_lex_allbrackets &&
6809 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6816 case '0': case '1': case '2': case '3': case '4':
6817 case '5': case '6': case '7': case '8': case '9':
6818 s = scan_num(s, &pl_yylval);
6819 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6820 if (PL_expect == XOPERATOR)
6825 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
6826 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6827 if (PL_expect == XOPERATOR) {
6828 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6829 return deprecate_commaless_var_list();
6836 pl_yylval.ival = OP_CONST;
6837 TERM(sublex_start());
6840 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
6841 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6842 if (PL_expect == XOPERATOR) {
6843 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6844 return deprecate_commaless_var_list();
6851 pl_yylval.ival = OP_CONST;
6852 /* FIXME. I think that this can be const if char *d is replaced by
6853 more localised variables. */
6854 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6855 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6856 pl_yylval.ival = OP_STRINGIFY;
6860 TERM(sublex_start());
6863 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
6864 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6865 if (PL_expect == XOPERATOR)
6866 no_op("Backticks",s);
6869 readpipe_override();
6870 TERM(sublex_start());
6874 if (PL_lex_inwhat && isDIGIT(*s))
6875 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6877 if (PL_expect == XOPERATOR)
6878 no_op("Backslash",s);
6882 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6883 char *start = s + 2;
6884 while (isDIGIT(*start) || *start == '_')
6886 if (*start == '.' && isDIGIT(start[1])) {
6887 s = scan_num(s, &pl_yylval);
6890 else if ((*start == ':' && start[1] == ':')
6891 || (PL_expect == XSTATE && *start == ':'))
6893 else if (PL_expect == XSTATE) {
6895 while (d < PL_bufend && isSPACE(*d)) d++;
6896 if (*d == ':') goto keylookup;
6898 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6899 if (!isALPHA(*start) && (PL_expect == XTERM
6900 || PL_expect == XREF || PL_expect == XSTATE
6901 || PL_expect == XTERMORDORDOR)) {
6902 GV *const gv = gv_fetchpvn_flags(s, start - s,
6903 UTF ? SVf_UTF8 : 0, SVt_PVCV);
6905 s = scan_num(s, &pl_yylval);
6912 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6965 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6967 /* Some keywords can be followed by any delimiter, including ':' */
6968 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6970 /* x::* is just a word, unless x is "CORE" */
6971 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6975 while (d < PL_bufend && isSPACE(*d))
6976 d++; /* no comments skipped here, or s### is misparsed */
6978 /* Is this a word before a => operator? */
6979 if (*d == '=' && d[1] == '>') {
6983 = (OP*)newSVOP(OP_CONST, 0,
6984 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6985 pl_yylval.opval->op_private = OPpCONST_BARE;
6989 /* Check for plugged-in keyword */
6993 char *saved_bufptr = PL_bufptr;
6995 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6997 if (result == KEYWORD_PLUGIN_DECLINE) {
6998 /* not a plugged-in keyword */
6999 PL_bufptr = saved_bufptr;
7000 } else if (result == KEYWORD_PLUGIN_STMT) {
7001 pl_yylval.opval = o;
7004 return REPORT(PLUGSTMT);
7005 } else if (result == KEYWORD_PLUGIN_EXPR) {
7006 pl_yylval.opval = o;
7008 PL_expect = XOPERATOR;
7009 return REPORT(PLUGEXPR);
7011 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
7016 /* Check for built-in keyword */
7017 tmp = keyword(PL_tokenbuf, len, 0);
7019 /* Is this a label? */
7020 if (!anydelim && PL_expect == XSTATE
7021 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
7023 pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
7024 pl_yylval.pval[len] = '\0';
7025 pl_yylval.pval[len+1] = UTF ? 1 : 0;
7030 /* Check for lexical sub */
7031 if (PL_expect != XOPERATOR) {
7032 char tmpbuf[sizeof PL_tokenbuf + 1];
7034 Copy(PL_tokenbuf, tmpbuf+1, len, char);
7035 off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
7036 if (off != NOT_IN_PAD) {
7037 assert(off); /* we assume this is boolean-true below */
7038 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
7039 HV * const stash = PAD_COMPNAME_OURSTASH(off);
7040 HEK * const stashname = HvNAME_HEK(stash);
7041 sv = newSVhek(stashname);
7042 sv_catpvs(sv, "::");
7043 sv_catpvn_flags(sv, PL_tokenbuf, len,
7044 (UTF ? SV_CATUTF8 : SV_CATBYTES));
7045 gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
7055 rv2cv_op = newOP(OP_PADANY, 0);
7056 rv2cv_op->op_targ = off;
7057 cv = find_lexical_cv(off);
7065 if (tmp < 0) { /* second-class keyword? */
7066 GV *ogv = NULL; /* override (winner) */
7067 GV *hgv = NULL; /* hidden (loser) */
7068 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
7070 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7071 UTF ? SVf_UTF8 : 0, SVt_PVCV)) &&
7074 if (GvIMPORTED_CV(gv))
7076 else if (! CvMETHOD(cv))
7080 (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
7081 UTF ? -(I32)len : (I32)len, FALSE)) &&
7082 (gv = *gvp) && isGV_with_GP(gv) &&
7083 GvCVu(gv) && GvIMPORTED_CV(gv))
7090 tmp = 0; /* overridden by import or by GLOBAL */
7093 && -tmp==KEY_lock /* XXX generalizable kludge */
7096 tmp = 0; /* any sub overrides "weak" keyword */
7098 else { /* no override */
7100 if (tmp == KEY_dump) {
7101 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
7102 "dump() better written as CORE::dump()");
7106 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
7107 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7108 "Ambiguous call resolved as CORE::%s(), "
7109 "qualify as such or use &",
7114 if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
7115 && (!anydelim || *s != '#')) {
7116 /* no override, and not s### either; skipspace is safe here
7117 * check for => on following line */
7118 STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
7119 STRLEN soff = s - SvPVX(PL_linestr);
7120 s = skipspace_flags(s, LEX_NO_INCLINE);
7121 if (*s == '=' && s[1] == '>') goto fat_arrow;
7122 PL_bufptr = SvPVX(PL_linestr) + bufoff;
7123 s = SvPVX(PL_linestr) + soff;
7129 default: /* not a keyword */
7130 /* Trade off - by using this evil construction we can pull the
7131 variable gv into the block labelled keylookup. If not, then
7132 we have to give it function scope so that the goto from the
7133 earlier ':' case doesn't bypass the initialisation. */
7135 just_a_word_zero_gv:
7147 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7148 const char penultchar =
7149 lastchar && PL_bufptr - 2 >= PL_linestart
7153 SV *nextPL_nextwhite = 0;
7157 /* Get the rest if it looks like a package qualifier */
7159 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7161 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7164 Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
7165 UTF8fARG(UTF, len, PL_tokenbuf),
7166 *s == '\'' ? "'" : "::");
7171 if (PL_expect == XOPERATOR) {
7172 if (PL_bufptr == PL_linestart) {
7173 CopLINE_dec(PL_curcop);
7174 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7175 CopLINE_inc(PL_curcop);
7178 no_op("Bareword",s);
7181 /* Look for a subroutine with this name in current package,
7182 unless this is a lexical sub, or name is "Foo::",
7183 in which case Foo is a bareword
7184 (and a package name). */
7186 if (len > 2 && !PL_madskills &&
7187 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
7189 if (ckWARN(WARN_BAREWORD)
7190 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7191 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7192 "Bareword \"%"UTF8f"\" refers to nonexistent package",
7193 UTF8fARG(UTF, len, PL_tokenbuf));
7195 PL_tokenbuf[len] = '\0';
7201 /* Mustn't actually add anything to a symbol table.
7202 But also don't want to "initialise" any placeholder
7203 constants that might already be there into full
7204 blown PVGVs with attached PVCV. */
7205 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7206 GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
7212 /* if we saw a global override before, get the right name */
7215 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
7216 len ? len : strlen(PL_tokenbuf));
7218 SV * const tmp_sv = sv;
7219 sv = newSVpvs("CORE::GLOBAL::");
7220 sv_catsv(sv, tmp_sv);
7221 SvREFCNT_dec(tmp_sv);
7225 if (PL_madskills && !PL_thistoken) {
7226 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
7227 PL_thistoken = newSVpvn(start,s - start);
7228 PL_realtokenstart = s - SvPVX(PL_linestr);
7232 /* Presume this is going to be a bareword of some sort. */
7234 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
7235 pl_yylval.opval->op_private = OPpCONST_BARE;
7237 /* And if "Foo::", then that's what it certainly is. */
7243 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
7244 const_op->op_private = OPpCONST_BARE;
7245 rv2cv_op = newCVREF(0, const_op);
7246 cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0);
7249 /* See if it's the indirect object for a list operator. */
7251 if (PL_oldoldbufptr &&
7252 PL_oldoldbufptr < PL_bufptr &&
7253 (PL_oldoldbufptr == PL_last_lop
7254 || PL_oldoldbufptr == PL_last_uni) &&
7255 /* NO SKIPSPACE BEFORE HERE! */
7256 (PL_expect == XREF ||
7257 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
7259 bool immediate_paren = *s == '(';
7261 /* (Now we can afford to cross potential line boundary.) */
7262 s = SKIPSPACE2(s,nextPL_nextwhite);
7264 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
7267 /* Two barewords in a row may indicate method call. */
7269 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
7270 (tmp = intuit_method(s, gv, cv))) {
7272 if (tmp == METHOD && !PL_lex_allbrackets &&
7273 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7274 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7278 /* If not a declared subroutine, it's an indirect object. */
7279 /* (But it's an indir obj regardless for sort.) */
7280 /* Also, if "_" follows a filetest operator, it's a bareword */
7283 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
7285 (PL_last_lop_op != OP_MAPSTART &&
7286 PL_last_lop_op != OP_GREPSTART))))
7287 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7288 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
7291 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7296 PL_expect = XOPERATOR;
7299 s = SKIPSPACE2(s,nextPL_nextwhite);
7300 PL_nextwhite = nextPL_nextwhite;
7305 /* Is this a word before a => operator? */
7306 if (*s == '=' && s[1] == '>' && !pkgname) {
7309 /* This is our own scalar, created a few lines above,
7311 SvREADONLY_off(cSVOPx(pl_yylval.opval)->op_sv);
7312 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
7313 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
7314 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
7315 SvREADONLY_on(cSVOPx(pl_yylval.opval)->op_sv);
7319 /* If followed by a paren, it's certainly a subroutine. */
7324 while (SPACE_OR_TAB(*d))
7326 if (*d == ')' && (sv = cv_const_sv(cv))) {
7333 PL_nextwhite = PL_thiswhite;
7336 start_force(PL_curforce);
7338 NEXTVAL_NEXTTOKE.opval =
7339 off ? rv2cv_op : pl_yylval.opval;
7340 PL_expect = XOPERATOR;
7343 PL_nextwhite = nextPL_nextwhite;
7344 curmad('X', PL_thistoken);
7345 PL_thistoken = newSVpvs("");
7349 op_free(pl_yylval.opval), force_next(PRIVATEREF);
7350 else op_free(rv2cv_op), force_next(WORD);
7355 /* If followed by var or block, call it a method (unless sub) */
7357 if ((*s == '$' || *s == '{') && !cv) {
7359 PL_last_lop = PL_oldbufptr;
7360 PL_last_lop_op = OP_METHOD;
7361 if (!PL_lex_allbrackets &&
7362 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7363 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7367 /* If followed by a bareword, see if it looks like indir obj. */
7370 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
7371 && (tmp = intuit_method(s, gv, cv))) {
7373 if (tmp == METHOD && !PL_lex_allbrackets &&
7374 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7375 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7379 /* Not a method, so call it a subroutine (if defined) */
7382 if (lastchar == '-' && penultchar != '-') {
7383 const STRLEN l = len ? len : strlen(PL_tokenbuf);
7384 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7385 "Ambiguous use of -%"UTF8f" resolved as -&%"UTF8f"()",
7386 UTF8fARG(UTF, l, PL_tokenbuf),
7387 UTF8fARG(UTF, l, PL_tokenbuf));
7389 /* Check for a constant sub */
7390 if ((sv = cv_const_sv(cv))) {
7393 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7394 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7395 pl_yylval.opval->op_private = OPpCONST_FOLDED;
7396 pl_yylval.opval->op_folded = 1;
7397 pl_yylval.opval->op_flags |= OPf_SPECIAL;
7401 op_free(pl_yylval.opval);
7403 off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7404 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7405 PL_last_lop = PL_oldbufptr;
7406 PL_last_lop_op = OP_ENTERSUB;
7407 /* Is there a prototype? */
7414 STRLEN protolen = CvPROTOLEN(cv);
7415 const char *proto = CvPROTO(cv);
7417 proto = S_strip_spaces(aTHX_ proto, &protolen);
7420 if ((optional = *proto == ';'))
7423 while (*proto == ';');
7427 *proto == '$' || *proto == '_'
7428 || *proto == '*' || *proto == '+'
7433 *proto == '\\' && proto[1] && proto[2] == '\0'
7436 UNIPROTO(UNIOPSUB,optional);
7437 if (*proto == '\\' && proto[1] == '[') {
7438 const char *p = proto + 2;
7439 while(*p && *p != ']')
7441 if(*p == ']' && !p[1])
7442 UNIPROTO(UNIOPSUB,optional);
7444 if (*proto == '&' && *s == '{') {
7446 sv_setpvs(PL_subname, "__ANON__");
7448 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7449 if (!PL_lex_allbrackets &&
7450 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7451 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7458 PL_nextwhite = PL_thiswhite;
7461 start_force(PL_curforce);
7462 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7465 PL_nextwhite = nextPL_nextwhite;
7466 curmad('X', PL_thistoken);
7467 PL_thistoken = newSVpvs("");
7469 force_next(off ? PRIVATEREF : WORD);
7470 if (!PL_lex_allbrackets &&
7471 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7472 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7477 /* Guess harder when madskills require "best effort". */
7478 if (PL_madskills && (!gv || !GvCVu(gv))) {
7479 int probable_sub = 0;
7480 if (strchr("\"'`$@%0123456789!*+{[<", *s))
7482 else if (isALPHA(*s)) {
7486 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
7487 if (!keyword(tmpbuf, tmplen, 0))
7490 while (d < PL_bufend && isSPACE(*d))
7492 if (*d == '=' && d[1] == '>')
7497 gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
7499 op_free(pl_yylval.opval);
7501 off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7502 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7503 PL_last_lop = PL_oldbufptr;
7504 PL_last_lop_op = OP_ENTERSUB;
7505 PL_nextwhite = PL_thiswhite;
7507 start_force(PL_curforce);
7508 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7510 PL_nextwhite = nextPL_nextwhite;
7511 curmad('X', PL_thistoken);
7512 PL_thistoken = newSVpvs("");
7513 force_next(off ? PRIVATEREF : WORD);
7514 if (!PL_lex_allbrackets &&
7515 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7516 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7520 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7522 force_next(off ? PRIVATEREF : WORD);
7523 if (!PL_lex_allbrackets &&
7524 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7525 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7530 /* Call it a bare word */
7532 if (PL_hints & HINT_STRICT_SUBS)
7533 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7536 /* after "print" and similar functions (corresponding to
7537 * "F? L" in opcode.pl), whatever wasn't already parsed as
7538 * a filehandle should be subject to "strict subs".
7539 * Likewise for the optional indirect-object argument to system
7540 * or exec, which can't be a bareword */
7541 if ((PL_last_lop_op == OP_PRINT
7542 || PL_last_lop_op == OP_PRTF
7543 || PL_last_lop_op == OP_SAY
7544 || PL_last_lop_op == OP_SYSTEM
7545 || PL_last_lop_op == OP_EXEC)
7546 && (PL_hints & HINT_STRICT_SUBS))
7547 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7548 if (lastchar != '-') {
7549 if (ckWARN(WARN_RESERVED)) {
7553 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7554 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7562 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7563 && saw_infix_sigil) {
7564 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7565 "Operator or semicolon missing before %c%"UTF8f,
7567 UTF8fARG(UTF, strlen(PL_tokenbuf),
7569 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7570 "Ambiguous use of %c resolved as operator %c",
7571 lastchar, lastchar);
7578 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7583 (OP*)newSVOP(OP_CONST, 0,
7584 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7587 case KEY___PACKAGE__:
7589 (OP*)newSVOP(OP_CONST, 0,
7591 ? newSVhek(HvNAME_HEK(PL_curstash))
7598 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7599 const char *pname = "main";
7602 if (PL_tokenbuf[2] == 'D')
7605 PL_curstash ? PL_curstash : PL_defstash;
7606 pname = HvNAME_get(stash);
7607 plen = HvNAMELEN (stash);
7608 if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
7610 gv = gv_fetchpvn_flags(
7611 Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
7612 plen+6, GV_ADD|putf8, SVt_PVIO
7616 GvIOp(gv) = newIO();
7617 IoIFP(GvIOp(gv)) = PL_rsfp;
7618 #if defined(HAS_FCNTL) && defined(F_SETFD)
7620 const int fd = PerlIO_fileno(PL_rsfp);
7621 fcntl(fd,F_SETFD,fd >= 3);
7624 /* Mark this internal pseudo-handle as clean */
7625 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7626 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7627 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7629 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7630 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7631 /* if the script was opened in binmode, we need to revert
7632 * it to text mode for compatibility; but only iff it has CRs
7633 * XXX this is a questionable hack at best. */
7634 if (PL_bufend-PL_bufptr > 2
7635 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7638 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7639 loc = PerlIO_tell(PL_rsfp);
7640 (void)PerlIO_seek(PL_rsfp, 0L, 0);
7643 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7645 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7646 #endif /* NETWARE */
7648 PerlIO_seek(PL_rsfp, loc, 0);
7652 #ifdef PERLIO_LAYERS
7655 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7656 else if (PL_encoding) {
7663 XPUSHs(PL_encoding);
7665 call_method("name", G_SCALAR);
7669 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7670 Perl_form(aTHX_ ":encoding(%"SVf")",
7679 if (PL_realtokenstart >= 0) {
7680 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7682 PL_endwhite = newSVpvs("");
7683 sv_catsv(PL_endwhite, PL_thiswhite);
7685 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7686 PL_realtokenstart = -1;
7688 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7698 FUN0OP(newPVOP(OP_RUNCV,0,NULL));
7707 if (PL_expect == XSTATE) {
7714 if (*s == ':' && s[1] == ':') {
7718 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7719 if ((*s == ':' && s[1] == ':')
7720 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7724 Copy(PL_bufptr, PL_tokenbuf, olen, char);
7728 Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
7729 UTF8fARG(UTF, len, PL_tokenbuf));
7732 else if (tmp == KEY_require || tmp == KEY_do
7734 /* that's a way to remember we saw "CORE::" */
7747 LOP(OP_ACCEPT,XTERM);
7750 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7755 LOP(OP_ATAN2,XTERM);
7761 LOP(OP_BINMODE,XTERM);
7764 LOP(OP_BLESS,XTERM);
7773 /* We have to disambiguate the two senses of
7774 "continue". If the next token is a '{' then
7775 treat it as the start of a continue block;
7776 otherwise treat it as a control operator.
7786 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7796 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7805 if (!PL_cryptseen) {
7806 PL_cryptseen = TRUE;
7810 LOP(OP_CRYPT,XTERM);
7813 LOP(OP_CHMOD,XTERM);
7816 LOP(OP_CHOWN,XTERM);
7819 LOP(OP_CONNECT,XTERM);
7839 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7841 if (len && !keyword(PL_tokenbuf + 1, len, 0)) {
7844 force_ident_maybe_lex('&');
7849 if (orig_keyword == KEY_do) {
7858 PL_hints |= HINT_BLOCK_SCOPE;
7868 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7869 STR_WITH_LEN("NDBM_File::"),
7870 STR_WITH_LEN("DB_File::"),
7871 STR_WITH_LEN("GDBM_File::"),
7872 STR_WITH_LEN("SDBM_File::"),
7873 STR_WITH_LEN("ODBM_File::"),
7875 LOP(OP_DBMOPEN,XTERM);
7881 PL_expect = XOPERATOR;
7882 s = force_word(s,WORD,TRUE,FALSE);
7889 pl_yylval.ival = CopLINE(PL_curcop);
7893 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7907 if (*s == '{') { /* block eval */
7908 PL_expect = XTERMBLOCK;
7909 UNIBRACK(OP_ENTERTRY);
7911 else { /* string eval */
7913 UNIBRACK(OP_ENTEREVAL);
7918 UNIBRACK(-OP_ENTEREVAL);
7932 case KEY_endhostent:
7938 case KEY_endservent:
7941 case KEY_endprotoent:
7952 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7954 pl_yylval.ival = CopLINE(PL_curcop);
7956 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7959 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7962 if ((PL_bufend - p) >= 3 &&
7963 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7965 else if ((PL_bufend - p) >= 4 &&
7966 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7969 if (isIDFIRST_lazy_if(p,UTF)) {
7970 p = scan_ident(p, PL_bufend,
7971 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
7975 Perl_croak(aTHX_ "Missing $ on loop variable");
7977 s = SvPVX(PL_linestr) + soff;
7983 LOP(OP_FORMLINE,XTERM);
7992 LOP(OP_FCNTL,XTERM);
7998 LOP(OP_FLOCK,XTERM);
8001 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8006 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8011 LOP(OP_GREPSTART, XREF);
8014 PL_expect = XOPERATOR;
8015 s = force_word(s,WORD,TRUE,FALSE);
8030 case KEY_getpriority:
8031 LOP(OP_GETPRIORITY,XTERM);
8033 case KEY_getprotobyname:
8036 case KEY_getprotobynumber:
8037 LOP(OP_GPBYNUMBER,XTERM);
8039 case KEY_getprotoent:
8051 case KEY_getpeername:
8052 UNI(OP_GETPEERNAME);
8054 case KEY_gethostbyname:
8057 case KEY_gethostbyaddr:
8058 LOP(OP_GHBYADDR,XTERM);
8060 case KEY_gethostent:
8063 case KEY_getnetbyname:
8066 case KEY_getnetbyaddr:
8067 LOP(OP_GNBYADDR,XTERM);
8072 case KEY_getservbyname:
8073 LOP(OP_GSBYNAME,XTERM);
8075 case KEY_getservbyport:
8076 LOP(OP_GSBYPORT,XTERM);
8078 case KEY_getservent:
8081 case KEY_getsockname:
8082 UNI(OP_GETSOCKNAME);
8084 case KEY_getsockopt:
8085 LOP(OP_GSOCKOPT,XTERM);
8100 pl_yylval.ival = CopLINE(PL_curcop);
8101 Perl_ck_warner_d(aTHX_
8102 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8103 "given is experimental");
8108 orig_keyword==KEY_glob ? (orig_keyword=0, -OP_GLOB) : OP_GLOB,
8116 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8118 pl_yylval.ival = CopLINE(PL_curcop);
8122 LOP(OP_INDEX,XTERM);
8128 LOP(OP_IOCTL,XTERM);
8140 PL_expect = XOPERATOR;
8141 s = force_word(s,WORD,TRUE,FALSE);
8158 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8163 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8177 LOP(OP_LISTEN,XTERM);
8186 s = scan_pat(s,OP_MATCH);
8187 TERM(sublex_start());
8190 LOP(OP_MAPSTART, XREF);
8193 LOP(OP_MKDIR,XTERM);
8196 LOP(OP_MSGCTL,XTERM);
8199 LOP(OP_MSGGET,XTERM);
8202 LOP(OP_MSGRCV,XTERM);
8205 LOP(OP_MSGSND,XTERM);
8210 PL_in_my = (U16)tmp;
8212 if (isIDFIRST_lazy_if(s,UTF)) {
8216 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8217 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
8219 if (!FEATURE_LEXSUBS_IS_ENABLED)
8221 "Experimental \"%s\" subs not enabled",
8222 tmp == KEY_my ? "my" :
8223 tmp == KEY_state ? "state" : "our");
8224 Perl_ck_warner_d(aTHX_
8225 packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
8226 "The lexical_subs feature is experimental");
8229 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
8230 if (!PL_in_my_stash) {
8233 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
8234 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
8237 if (PL_madskills) { /* just add type to declarator token */
8238 sv_catsv(PL_thistoken, PL_nextwhite);
8240 sv_catpvn(PL_thistoken, start, s - start);
8248 PL_expect = XOPERATOR;
8249 s = force_word(s,WORD,TRUE,FALSE);
8253 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8258 s = tokenize_use(0, s);
8262 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
8265 if (!PL_lex_allbrackets &&
8266 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8267 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8273 if (isIDFIRST_lazy_if(s,UTF)) {
8275 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
8277 for (t=d; isSPACE(*t);)
8279 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8281 && !(t[0] == '=' && t[1] == '>')
8282 && !(t[0] == ':' && t[1] == ':')
8283 && !keyword(s, d-s, 0)
8285 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8286 "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
8287 UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8293 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8295 pl_yylval.ival = OP_OR;
8305 LOP(OP_OPEN_DIR,XTERM);
8308 checkcomma(s,PL_tokenbuf,"filehandle");
8312 checkcomma(s,PL_tokenbuf,"filehandle");
8331 s = force_word(s,WORD,FALSE,TRUE);
8333 s = force_strict_version(s);
8334 PL_lex_expect = XBLOCK;
8338 LOP(OP_PIPE_OP,XTERM);
8341 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8344 pl_yylval.ival = OP_CONST;
8345 TERM(sublex_start());
8352 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8355 PL_expect = XOPERATOR;
8356 if (SvCUR(PL_lex_stuff)) {
8357 int warned_comma = !ckWARN(WARN_QW);
8358 int warned_comment = warned_comma;
8359 d = SvPV_force(PL_lex_stuff, len);
8361 for (; isSPACE(*d) && len; --len, ++d)
8366 if (!warned_comma || !warned_comment) {
8367 for (; !isSPACE(*d) && len; --len, ++d) {
8368 if (!warned_comma && *d == ',') {
8369 Perl_warner(aTHX_ packWARN(WARN_QW),
8370 "Possible attempt to separate words with commas");
8373 else if (!warned_comment && *d == '#') {
8374 Perl_warner(aTHX_ packWARN(WARN_QW),
8375 "Possible attempt to put comments in qw() list");
8381 for (; !isSPACE(*d) && len; --len, ++d)
8384 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8385 words = op_append_elem(OP_LIST, words,
8386 newSVOP(OP_CONST, 0, tokeq(sv)));
8391 words = newNULLLIST();
8393 SvREFCNT_dec(PL_lex_stuff);
8394 PL_lex_stuff = NULL;
8396 PL_expect = XOPERATOR;
8397 pl_yylval.opval = sawparens(words);
8402 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8405 pl_yylval.ival = OP_STRINGIFY;
8406 if (SvIVX(PL_lex_stuff) == '\'')
8407 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
8408 TERM(sublex_start());
8411 s = scan_pat(s,OP_QR);
8412 TERM(sublex_start());
8415 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8418 readpipe_override();
8419 TERM(sublex_start());
8426 PL_expect = XOPERATOR;
8428 s = force_version(s, FALSE);
8430 else if (*s != 'v' || !isDIGIT(s[1])
8431 || (s = force_version(s, TRUE), *s == 'v'))
8433 *PL_tokenbuf = '\0';
8434 s = force_word(s,WORD,TRUE,TRUE);
8435 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
8436 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8437 GV_ADD | (UTF ? SVf_UTF8 : 0));
8439 yyerror("<> should be quotes");
8441 if (orig_keyword == KEY_require) {
8449 PL_last_uni = PL_oldbufptr;
8450 PL_last_lop_op = OP_REQUIRE;
8452 return REPORT( (int)REQUIRE );
8458 PL_expect = XOPERATOR;
8459 s = force_word(s,WORD,TRUE,FALSE);
8463 LOP(OP_RENAME,XTERM);
8472 LOP(OP_RINDEX,XTERM);
8481 UNIDOR(OP_READLINE);
8484 UNIDOR(OP_BACKTICK);
8493 LOP(OP_REVERSE,XTERM);
8496 UNIDOR(OP_READLINK);
8503 if (pl_yylval.opval)
8504 TERM(sublex_start());
8506 TOKEN(1); /* force error */
8509 checkcomma(s,PL_tokenbuf,"filehandle");
8519 LOP(OP_SELECT,XTERM);
8525 LOP(OP_SEMCTL,XTERM);
8528 LOP(OP_SEMGET,XTERM);
8531 LOP(OP_SEMOP,XTERM);
8537 LOP(OP_SETPGRP,XTERM);
8539 case KEY_setpriority:
8540 LOP(OP_SETPRIORITY,XTERM);
8542 case KEY_sethostent:
8548 case KEY_setservent:
8551 case KEY_setprotoent:
8561 LOP(OP_SEEKDIR,XTERM);
8563 case KEY_setsockopt:
8564 LOP(OP_SSOCKOPT,XTERM);
8570 LOP(OP_SHMCTL,XTERM);
8573 LOP(OP_SHMGET,XTERM);
8576 LOP(OP_SHMREAD,XTERM);
8579 LOP(OP_SHMWRITE,XTERM);
8582 LOP(OP_SHUTDOWN,XTERM);
8591 LOP(OP_SOCKET,XTERM);
8593 case KEY_socketpair:
8594 LOP(OP_SOCKPAIR,XTERM);
8597 checkcomma(s,PL_tokenbuf,"subroutine name");
8600 s = force_word(s,WORD,TRUE,TRUE);
8604 LOP(OP_SPLIT,XTERM);
8607 LOP(OP_SPRINTF,XTERM);
8610 LOP(OP_SPLICE,XTERM);
8625 LOP(OP_SUBSTR,XTERM);
8631 char * const tmpbuf = PL_tokenbuf + 1;
8632 expectation attrful;
8633 bool have_name, have_proto;
8634 const int key = tmp;
8636 SV *format_name = NULL;
8642 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
8643 SV *subtoken = PL_madskills
8644 ? newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr))
8649 s = SKIPSPACE2(s,tmpwhite);
8655 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
8656 (*s == ':' && s[1] == ':'))
8659 SV *nametoke = NULL;
8663 attrful = XATTRBLOCK;
8664 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8668 nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
8670 if (key == KEY_format)
8671 format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
8674 if (memchr(tmpbuf, ':', len) || key != KEY_sub
8676 PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
8678 sv_setpvn(PL_subname, tmpbuf, len);
8680 sv_setsv(PL_subname,PL_curstname);
8681 sv_catpvs(PL_subname,"::");
8682 sv_catpvn(PL_subname,tmpbuf,len);
8684 if (SvUTF8(PL_linestr))
8685 SvUTF8_on(PL_subname);
8691 CURMAD('X', nametoke);
8692 CURMAD('_', tmpwhite);
8693 force_ident_maybe_lex('&');
8695 s = SKIPSPACE2(d,tmpwhite);
8701 if (key == KEY_my || key == KEY_our || key==KEY_state)
8704 /* diag_listed_as: Missing name in "%s sub" */
8706 "Missing name in \"%s\"", PL_bufptr);
8708 PL_expect = XTERMBLOCK;
8709 attrful = XATTRTERM;
8710 sv_setpvs(PL_subname,"?");
8714 if (key == KEY_format) {
8716 PL_thistoken = subtoken;
8720 start_force(PL_curforce);
8721 NEXTVAL_NEXTTOKE.opval
8722 = (OP*)newSVOP(OP_CONST,0, format_name);
8723 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
8730 /* Look for a prototype */
8732 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8734 Perl_croak(aTHX_ "Prototype not terminated");
8735 (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
8740 CURMAD('q', PL_thisopen);
8741 CURMAD('_', tmpwhite);
8742 CURMAD('=', PL_thisstuff);
8743 CURMAD('Q', PL_thisclose);
8744 NEXTVAL_NEXTTOKE.opval =
8745 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8746 PL_lex_stuff = NULL;
8749 s = SKIPSPACE2(s,tmpwhite);
8757 if (*s == ':' && s[1] != ':')
8758 PL_expect = attrful;
8759 else if (*s != '{' && key == KEY_sub) {
8761 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8762 else if (*s != ';' && *s != '}')
8763 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8770 curmad('^', newSVpvs(""));
8771 CURMAD('_', tmpwhite);
8775 PL_thistoken = subtoken;
8776 PERL_UNUSED_VAR(have_proto);
8779 NEXTVAL_NEXTTOKE.opval =
8780 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8781 PL_lex_stuff = NULL;
8787 sv_setpvs(PL_subname, "__ANON__");
8789 sv_setpvs(PL_subname, "__ANON__::__ANON__");
8793 force_ident_maybe_lex('&');
8799 LOP(OP_SYSTEM,XREF);
8802 LOP(OP_SYMLINK,XTERM);
8805 LOP(OP_SYSCALL,XTERM);
8808 LOP(OP_SYSOPEN,XTERM);
8811 LOP(OP_SYSSEEK,XTERM);
8814 LOP(OP_SYSREAD,XTERM);
8817 LOP(OP_SYSWRITE,XTERM);
8822 TERM(sublex_start());
8843 LOP(OP_TRUNCATE,XTERM);
8855 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8857 pl_yylval.ival = CopLINE(PL_curcop);
8861 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8863 pl_yylval.ival = CopLINE(PL_curcop);
8867 LOP(OP_UNLINK,XTERM);
8873 LOP(OP_UNPACK,XTERM);
8876 LOP(OP_UTIME,XTERM);
8882 LOP(OP_UNSHIFT,XTERM);
8885 s = tokenize_use(1, s);
8895 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8897 pl_yylval.ival = CopLINE(PL_curcop);
8898 Perl_ck_warner_d(aTHX_
8899 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8900 "when is experimental");
8904 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8906 pl_yylval.ival = CopLINE(PL_curcop);
8910 PL_hints |= HINT_BLOCK_SCOPE;
8917 LOP(OP_WAITPID,XTERM);
8926 ctl_l[0] = toCTRL('L');
8928 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
8931 /* Make sure $^L is defined */
8932 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
8937 if (PL_expect == XOPERATOR) {
8938 if (*s == '=' && !PL_lex_allbrackets &&
8939 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8947 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8949 pl_yylval.ival = OP_XOR;
8955 #pragma segment Main
8961 Looks up an identifier in the pad or in a package
8964 PRIVATEREF if this is a lexical name.
8965 WORD if this belongs to a package.
8968 if we're in a my declaration
8969 croak if they tried to say my($foo::bar)
8970 build the ops for a my() declaration
8971 if it's an access to a my() variable
8972 build ops for access to a my() variable
8973 if in a dq string, and they've said @foo and we can't find @foo
8975 build ops for a bareword
8979 S_pending_ident(pTHX)
8983 const char pit = (char)pl_yylval.ival;
8984 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8985 /* All routes through this function want to know if there is a colon. */
8986 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8988 DEBUG_T({ PerlIO_printf(Perl_debug_log,
8989 "### Pending identifier '%s'\n", PL_tokenbuf); });
8991 /* if we're in a my(), we can't allow dynamics here.
8992 $foo'bar has already been turned into $foo::bar, so
8993 just check for colons.
8995 if it's a legal name, the OP is a PADANY.
8998 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9000 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
9001 "variable %s in \"our\"",
9002 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
9003 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
9007 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
9008 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
9009 UTF ? SVf_UTF8 : 0);
9011 pl_yylval.opval = newOP(OP_PADANY, 0);
9012 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
9013 UTF ? SVf_UTF8 : 0);
9019 build the ops for accesses to a my() variable.
9024 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9025 UTF ? SVf_UTF8 : 0);
9026 if (tmp != NOT_IN_PAD) {
9027 /* might be an "our" variable" */
9028 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9029 /* build ops for a bareword */
9030 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9031 HEK * const stashname = HvNAME_HEK(stash);
9032 SV * const sym = newSVhek(stashname);
9033 sv_catpvs(sym, "::");
9034 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9035 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
9036 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9040 ? (GV_ADDMULTI | GV_ADDINEVAL)
9043 ((PL_tokenbuf[0] == '$') ? SVt_PV
9044 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9049 pl_yylval.opval = newOP(OP_PADANY, 0);
9050 pl_yylval.opval->op_targ = tmp;
9056 Whine if they've said @foo in a doublequoted string,
9057 and @foo isn't a variable we can find in the symbol
9060 if (ckWARN(WARN_AMBIGUOUS) &&
9061 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
9062 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
9063 ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
9064 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9065 /* DO NOT warn for @- and @+ */
9066 && !( PL_tokenbuf[2] == '\0' &&
9067 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
9070 /* Downgraded from fatal to warning 20000522 mjd */
9071 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9072 "Possible unintended interpolation of %"UTF8f
9074 UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9078 /* build ops for a bareword */
9079 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
9080 newSVpvn_flags(PL_tokenbuf + 1,
9082 UTF ? SVf_UTF8 : 0 ));
9083 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9085 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
9086 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
9087 | ( UTF ? SVf_UTF8 : 0 ),
9088 ((PL_tokenbuf[0] == '$') ? SVt_PV
9089 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9095 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9099 PERL_ARGS_ASSERT_CHECKCOMMA;
9101 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
9102 if (ckWARN(WARN_SYNTAX)) {
9105 for (w = s+2; *w && level; w++) {
9113 /* the list of chars below is for end of statements or
9114 * block / parens, boolean operators (&&, ||, //) and branch
9115 * constructs (or, and, if, until, unless, while, err, for).
9116 * Not a very solid hack... */
9117 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9118 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9119 "%s (...) interpreted as function",name);
9122 while (s < PL_bufend && isSPACE(*s))
9126 while (s < PL_bufend && isSPACE(*s))
9128 if (isIDFIRST_lazy_if(s,UTF)) {
9129 const char * const w = s;
9130 s += UTF ? UTF8SKIP(s) : 1;
9131 while (isWORDCHAR_lazy_if(s,UTF))
9132 s += UTF ? UTF8SKIP(s) : 1;
9133 while (s < PL_bufend && isSPACE(*s))
9137 if (keyword(w, s - w, 0))
9140 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9141 if (gv && GvCVu(gv))
9143 Perl_croak(aTHX_ "No comma allowed after %s", what);
9148 /* S_new_constant(): do any overload::constant lookup.
9150 Either returns sv, or mortalizes/frees sv and returns a new SV*.
9151 Best used as sv=new_constant(..., sv, ...).
9152 If s, pv are NULL, calls subroutine with one argument,
9153 and <type> is used with error messages only.
9154 <type> is assumed to be well formed UTF-8 */
9157 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9158 SV *sv, SV *pv, const char *type, STRLEN typelen)
9161 HV * table = GvHV(PL_hintgv); /* ^H */
9166 const char *why1 = "", *why2 = "", *why3 = "";
9168 PERL_ARGS_ASSERT_NEW_CONSTANT;
9169 /* We assume that this is true: */
9170 if (*key == 'c') { assert (strEQ(key, "charnames")); }
9173 /* charnames doesn't work well if there have been errors found */
9174 if (PL_error_count > 0 && *key == 'c')
9176 SvREFCNT_dec_NN(sv);
9177 return &PL_sv_undef;
9180 sv_2mortal(sv); /* Parent created it permanently */
9182 || ! (PL_hints & HINT_LOCALIZE_HH)
9183 || ! (cvp = hv_fetch(table, key, keylen, FALSE))
9188 /* Here haven't found what we're looking for. If it is charnames,
9189 * perhaps it needs to be loaded. Try doing that before giving up */
9191 Perl_load_module(aTHX_
9193 newSVpvs("_charnames"),
9194 /* version parameter; no need to specify it, as if
9195 * we get too early a version, will fail anyway,
9196 * not being able to find '_charnames' */
9202 table = GvHV(PL_hintgv);
9204 && (PL_hints & HINT_LOCALIZE_HH)
9205 && (cvp = hv_fetch(table, key, keylen, FALSE))
9211 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9212 msg = Perl_form(aTHX_
9213 "Constant(%.*s) unknown",
9214 (int)(type ? typelen : len),
9220 why3 = "} is not defined";
9223 msg = Perl_form(aTHX_
9224 /* The +3 is for '\N{'; -4 for that, plus '}' */
9225 "Unknown charname '%.*s'", (int)typelen - 4, type + 3
9229 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
9230 (int)(type ? typelen : len),
9231 (type ? type: s), why1, why2, why3);
9234 yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9235 return SvREFCNT_inc_simple_NN(sv);
9240 pv = newSVpvn_flags(s, len, SVs_TEMP);
9242 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9244 typesv = &PL_sv_undef;
9246 PUSHSTACKi(PERLSI_OVERLOAD);
9258 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9262 /* Check the eval first */
9263 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9265 const char * errstr;
9266 sv_catpvs(errsv, "Propagated");
9267 errstr = SvPV_const(errsv, errlen);
9268 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9270 res = SvREFCNT_inc_simple_NN(sv);
9274 SvREFCNT_inc_simple_void_NN(res);
9283 why1 = "Call to &{$^H{";
9285 why3 = "}} did not return a defined value";
9287 (void)sv_2mortal(sv);
9294 PERL_STATIC_INLINE void
9295 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) {
9297 PERL_ARGS_ASSERT_PARSE_IDENT;
9301 Perl_croak(aTHX_ "%s", ident_too_long);
9302 if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
9303 /* The UTF-8 case must come first, otherwise things
9304 * like c\N{COMBINING TILDE} would start failing, as the
9305 * isWORDCHAR_A case below would gobble the 'c' up.
9308 char *t = *s + UTF8SKIP(*s);
9309 while (isIDCONT_utf8((U8*)t))
9311 if (*d + (t - *s) > e)
9312 Perl_croak(aTHX_ "%s", ident_too_long);
9313 Copy(*s, *d, t - *s, char);
9317 else if ( isWORDCHAR_A(**s) ) {
9320 } while isWORDCHAR_A(**s);
9322 else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
9327 else if (allow_package && **s == ':' && (*s)[1] == ':'
9328 /* Disallow things like Foo::$bar. For the curious, this is
9329 * the code path that triggers the "Bad name after" warning
9330 * when looking for barewords.
9332 && (*s)[2] != '$') {
9342 /* Returns a NUL terminated string, with the length of the string written to
9346 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9350 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9351 bool is_utf8 = cBOOL(UTF);
9353 PERL_ARGS_ASSERT_SCAN_WORD;
9355 parse_ident(&s, &d, e, allow_package, is_utf8);
9362 S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck_uni)
9365 char *bracket = NULL;
9368 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9369 bool is_utf8 = cBOOL(UTF);
9371 PERL_ARGS_ASSERT_SCAN_IDENT;
9376 while (isDIGIT(*s)) {
9378 Perl_croak(aTHX_ "%s", ident_too_long);
9383 parse_ident(&s, &d, e, 1, is_utf8);
9388 /* Either a digit variable, or parse_ident() found an identifier
9389 (anything valid as a bareword), so job done and return. */
9390 if (PL_lex_state != LEX_NORMAL)
9391 PL_lex_state = LEX_INTERPENDMAYBE;
9394 if (*s == '$' && s[1] &&
9395 (isIDFIRST_lazy_if(s+1,is_utf8)
9396 || isDIGIT_A((U8)s[1])
9399 || strnEQ(s+1,"::",2)) )
9401 /* Dereferencing a value in a scalar variable.
9402 The alternatives are different syntaxes for a scalar variable.
9403 Using ' as a leading package separator isn't allowed. :: is. */
9406 /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */
9410 while (s < send && SPACE_OR_TAB(*s))
9414 #define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d)) \
9415 || isCNTRL_A((U8)(d)) \
9416 || isDIGIT_A((U8)(d)) \
9417 || (!(u) && !UTF8_IS_INVARIANT((U8)(d))))
9419 && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8)))
9422 const STRLEN skip = UTF8SKIP(s);
9425 for ( i = 0; i < skip; i++ )
9433 /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
9434 if (*d == '^' && *s && isCONTROLVAR(*s)) {
9438 /* Warn about ambiguous code after unary operators if {...} notation isn't
9439 used. There's no difference in ambiguity; it's merely a heuristic
9440 about when not to warn. */
9441 else if (ck_uni && !bracket)
9444 /* If we were processing {...} notation then... */
9445 if (isIDFIRST_lazy_if(d,is_utf8)) {
9446 /* if it starts as a valid identifier, assume that it is one.
9447 (the later check for } being at the expected point will trap
9448 cases where this doesn't pan out.) */
9449 d += is_utf8 ? UTF8SKIP(d) : 1;
9450 parse_ident(&s, &d, e, 1, is_utf8);
9452 while (s < send && SPACE_OR_TAB(*s))
9454 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9455 /* ${foo[0]} and ${foo{bar}} notation. */
9456 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9457 const char * const brack =
9459 ((*s == '[') ? "[...]" : "{...}");
9460 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9461 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9462 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9463 funny, dest, brack, funny, dest, brack);
9466 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9467 PL_lex_allbrackets++;
9471 /* Handle extended ${^Foo} variables
9472 * 1999-02-27 mjd-perl-patch@plover.com */
9473 else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
9477 while (isWORDCHAR(*s) && d < e) {
9481 Perl_croak(aTHX_ "%s", ident_too_long);
9485 while (s < send && SPACE_OR_TAB(*s))
9488 /* Expect to find a closing } after consuming any trailing whitespace.
9492 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9493 PL_lex_state = LEX_INTERPEND;
9496 if (PL_lex_state == LEX_NORMAL) {
9497 if (ckWARN(WARN_AMBIGUOUS) &&
9498 (keyword(dest, d - dest, 0)
9499 || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0)))
9501 SV *tmp = newSVpvn_flags( dest, d - dest,
9502 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
9505 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9506 "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9507 funny, tmp, funny, tmp);
9512 /* Didn't find the closing } at the point we expected, so restore
9513 state such that the next thing to process is the opening { and */
9514 s = bracket; /* let the parser handle it */
9518 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9519 PL_lex_state = LEX_INTERPEND;
9524 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
9526 /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
9527 * the parse starting at 's', based on the subset that are valid in this
9528 * context input to this routine in 'valid_flags'. Advances s. Returns
9529 * TRUE if the input should be treated as a valid flag, so the next char
9530 * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
9531 * first call on the current regex. This routine will set it to any
9532 * charset modifier found. The caller shouldn't change it. This way,
9533 * another charset modifier encountered in the parse can be detected as an
9534 * error, as we have decided to allow only one */
9537 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9539 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9540 if (isWORDCHAR_lazy_if(*s, UTF)) {
9541 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9542 UTF ? SVf_UTF8 : 0);
9544 /* Pretend that it worked, so will continue processing before
9553 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
9554 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
9555 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
9556 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
9557 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
9558 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9559 case LOCALE_PAT_MOD:
9561 goto multiple_charsets;
9563 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9566 case UNICODE_PAT_MOD:
9568 goto multiple_charsets;
9570 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9573 case ASCII_RESTRICT_PAT_MOD:
9575 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9579 /* Error if previous modifier wasn't an 'a', but if it was, see
9580 * if, and accept, a second occurrence (only) */
9582 || get_regex_charset(*pmfl)
9583 != REGEX_ASCII_RESTRICTED_CHARSET)
9585 goto multiple_charsets;
9587 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9591 case DEPENDS_PAT_MOD:
9593 goto multiple_charsets;
9595 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9604 if (*charset != c) {
9605 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9607 else if (c == 'a') {
9608 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9611 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9614 /* Pretend that it worked, so will continue processing before dieing */
9620 S_scan_pat(pTHX_ char *start, I32 type)
9625 const char * const valid_flags =
9626 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9627 char charset = '\0'; /* character set modifier */
9632 PERL_ARGS_ASSERT_SCAN_PAT;
9634 s = scan_str(start,!!PL_madskills,FALSE, (PL_in_eval & EVAL_RE_REPARSING),
9635 TRUE /* look for escaped bracketed metas */ );
9638 const char * const delimiter = skipspace(start);
9642 ? "Search pattern not terminated or ternary operator parsed as search pattern"
9643 : "Search pattern not terminated" ));
9646 pm = (PMOP*)newPMOP(type, 0);
9647 if (PL_multi_open == '?') {
9648 /* This is the only point in the code that sets PMf_ONCE: */
9649 pm->op_pmflags |= PMf_ONCE;
9651 /* Hence it's safe to do this bit of PMOP book-keeping here, which
9652 allows us to restrict the list needed by reset to just the ??
9654 assert(type != OP_TRANS);
9656 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9659 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9662 elements = mg->mg_len / sizeof(PMOP**);
9663 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9664 ((PMOP**)mg->mg_ptr) [elements++] = pm;
9665 mg->mg_len = elements * sizeof(PMOP**);
9666 PmopSTASH_set(pm,PL_curstash);
9673 /* if qr/...(?{..}).../, then need to parse the pattern within a new
9674 * anon CV. False positives like qr/[(?{]/ are harmless */
9676 if (type == OP_QR) {
9678 char *e, *p = SvPV(PL_lex_stuff, len);
9680 for (; p < e; p++) {
9681 if (p[0] == '(' && p[1] == '?'
9682 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9684 pm->op_pmflags |= PMf_HAS_CV;
9688 pm->op_pmflags |= PMf_IS_QR;
9691 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
9693 if (PL_madskills && modstart != s) {
9694 SV* tmptoken = newSVpvn(modstart, s - modstart);
9695 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9698 /* issue a warning if /c is specified,but /g is not */
9699 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9701 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9702 "Use of /c modifier is meaningless without /g" );
9705 PL_lex_op = (OP*)pm;
9706 pl_yylval.ival = OP_MATCH;
9711 S_scan_subst(pTHX_ char *start)
9718 char charset = '\0'; /* character set modifier */
9723 PERL_ARGS_ASSERT_SCAN_SUBST;
9725 pl_yylval.ival = OP_NULL;
9727 s = scan_str(start,!!PL_madskills,FALSE,FALSE,
9728 TRUE /* look for escaped bracketed metas */ );
9731 Perl_croak(aTHX_ "Substitution pattern not terminated");
9733 if (s[-1] == PL_multi_open)
9737 CURMAD('q', PL_thisopen);
9738 CURMAD('_', PL_thiswhite);
9739 CURMAD('E', PL_thisstuff);
9740 CURMAD('Q', PL_thisclose);
9741 PL_realtokenstart = s - SvPVX(PL_linestr);
9745 first_start = PL_multi_start;
9746 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
9749 SvREFCNT_dec(PL_lex_stuff);
9750 PL_lex_stuff = NULL;
9752 Perl_croak(aTHX_ "Substitution replacement not terminated");
9754 PL_multi_start = first_start; /* so whole substitution is taken together */
9756 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9760 CURMAD('z', PL_thisopen);
9761 CURMAD('R', PL_thisstuff);
9762 CURMAD('Z', PL_thisclose);
9768 if (*s == EXEC_PAT_MOD) {
9772 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9781 curmad('m', newSVpvn(modstart, s - modstart));
9782 append_madprops(PL_thismad, (OP*)pm, 0);
9786 if ((pm->op_pmflags & PMf_CONTINUE)) {
9787 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9791 SV * const repl = newSVpvs("");
9794 pm->op_pmflags |= PMf_EVAL;
9797 sv_catpvs(repl, "eval ");
9799 sv_catpvs(repl, "do ");
9801 sv_catpvs(repl, "{");
9802 sv_catsv(repl, PL_sublex_info.repl);
9803 sv_catpvs(repl, "}");
9805 SvREFCNT_dec(PL_sublex_info.repl);
9806 PL_sublex_info.repl = repl;
9809 PL_lex_op = (OP*)pm;
9810 pl_yylval.ival = OP_SUBST;
9815 S_scan_trans(pTHX_ char *start)
9823 bool nondestruct = 0;
9828 PERL_ARGS_ASSERT_SCAN_TRANS;
9830 pl_yylval.ival = OP_NULL;
9832 s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE);
9834 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9836 if (s[-1] == PL_multi_open)
9840 CURMAD('q', PL_thisopen);
9841 CURMAD('_', PL_thiswhite);
9842 CURMAD('E', PL_thisstuff);
9843 CURMAD('Q', PL_thisclose);
9844 PL_realtokenstart = s - SvPVX(PL_linestr);
9848 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
9851 SvREFCNT_dec(PL_lex_stuff);
9852 PL_lex_stuff = NULL;
9854 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9857 CURMAD('z', PL_thisopen);
9858 CURMAD('R', PL_thisstuff);
9859 CURMAD('Z', PL_thisclose);
9862 complement = del = squash = 0;
9869 complement = OPpTRANS_COMPLEMENT;
9872 del = OPpTRANS_DELETE;
9875 squash = OPpTRANS_SQUASH;
9887 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9888 o->op_private &= ~OPpTRANS_ALL;
9889 o->op_private |= del|squash|complement|
9890 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9891 (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF : 0);
9894 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9899 curmad('m', newSVpvn(modstart, s - modstart));
9900 append_madprops(PL_thismad, o, 0);
9909 Takes a pointer to the first < in <<FOO.
9910 Returns a pointer to the byte following <<FOO.
9912 This function scans a heredoc, which involves different methods
9913 depending on whether we are in a string eval, quoted construct, etc.
9914 This is because PL_linestr could containing a single line of input, or
9915 a whole string being evalled, or the contents of the current quote-
9918 The two basic methods are:
9919 - Steal lines from the input stream
9920 - Scan the heredoc in PL_linestr and remove it therefrom
9922 In a file scope or filtered eval, the first method is used; in a
9923 string eval, the second.
9925 In a quote-like operator, we have to choose between the two,
9926 depending on where we can find a newline. We peek into outer lex-
9927 ing scopes until we find one with a newline in it. If we reach the
9928 outermost lexing scope and it is a file, we use the stream method.
9929 Otherwise it is treated as an eval.
9933 S_scan_heredoc(pTHX_ char *s)
9936 I32 op_type = OP_SCALAR;
9943 const bool infile = PL_rsfp || PL_parser->filtered;
9944 LEXSHARED *shared = PL_parser->lex_shared;
9946 I32 stuffstart = s - SvPVX(PL_linestr);
9949 PL_realtokenstart = -1;
9952 PERL_ARGS_ASSERT_SCAN_HEREDOC;
9955 d = PL_tokenbuf + 1;
9956 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9957 *PL_tokenbuf = '\n';
9959 while (SPACE_OR_TAB(*peek))
9961 if (*peek == '`' || *peek == '\'' || *peek =='"') {
9964 s = delimcpy(d, e, s, PL_bufend, term, &len);
9966 Perl_croak(aTHX_ "Unterminated delimiter for here document");
9972 /* <<\FOO is equivalent to <<'FOO' */
9976 if (!isWORDCHAR_lazy_if(s,UTF))
9977 deprecate("bare << to mean <<\"\"");
9978 for (; isWORDCHAR_lazy_if(s,UTF); s++) {
9983 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9984 Perl_croak(aTHX_ "Delimiter for here document is too long");
9987 len = d - PL_tokenbuf;
9991 tstart = PL_tokenbuf + 1;
9992 PL_thisclose = newSVpvn(tstart, len - 1);
9993 tstart = SvPVX(PL_linestr) + stuffstart;
9994 PL_thisopen = newSVpvn(tstart, s - tstart);
9995 stuffstart = s - SvPVX(PL_linestr);
9998 #ifndef PERL_STRICT_CR
9999 d = strchr(s, '\r');
10001 char * const olds = s;
10003 while (s < PL_bufend) {
10009 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
10018 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10023 if (PL_madskills) {
10024 tstart = SvPVX(PL_linestr) + stuffstart;
10026 sv_catpvn(PL_thisstuff, tstart, s - tstart);
10028 PL_thisstuff = newSVpvn(tstart, s - tstart);
10031 stuffstart = s - SvPVX(PL_linestr);
10034 tmpstr = newSV_type(SVt_PVIV);
10035 SvGROW(tmpstr, 80);
10036 if (term == '\'') {
10037 op_type = OP_CONST;
10038 SvIV_set(tmpstr, -1);
10040 else if (term == '`') {
10041 op_type = OP_BACKTICK;
10042 SvIV_set(tmpstr, '\\');
10045 PL_multi_start = CopLINE(PL_curcop) + 1;
10046 PL_multi_open = PL_multi_close = '<';
10047 /* inside a string eval or quote-like operator */
10048 if (!infile || PL_lex_inwhat) {
10051 char * const olds = s;
10052 PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
10053 /* These two fields are not set until an inner lexing scope is
10054 entered. But we need them set here. */
10055 shared->ls_bufptr = s;
10056 shared->ls_linestr = PL_linestr;
10058 /* Look for a newline. If the current buffer does not have one,
10059 peek into the line buffer of the parent lexing scope, going
10060 up as many levels as necessary to find one with a newline
10063 while (!(s = (char *)memchr(
10064 (void *)shared->ls_bufptr, '\n',
10065 SvEND(shared->ls_linestr)-shared->ls_bufptr
10067 shared = shared->ls_prev;
10068 /* shared is only null if we have gone beyond the outermost
10069 lexing scope. In a file, we will have broken out of the
10070 loop in the previous iteration. In an eval, the string buf-
10071 fer ends with "\n;", so the while condition below will have
10072 evaluated to false. So shared can never be null. */
10074 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10075 most lexing scope. In a file, shared->ls_linestr at that
10076 level is just one line, so there is no body to steal. */
10077 if (infile && !shared->ls_prev) {
10083 s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10086 linestr = shared->ls_linestr;
10087 bufend = SvEND(linestr);
10089 while (s < bufend - len + 1 &&
10090 memNE(s,PL_tokenbuf,len) ) {
10092 ++shared->herelines;
10094 if (s >= bufend - len + 1) {
10097 sv_setpvn(tmpstr,d+1,s-d);
10099 if (PL_madskills) {
10101 sv_catpvn(PL_thisstuff, d + 1, s - d);
10103 PL_thisstuff = newSVpvn(d + 1, s - d);
10104 stuffstart = s - SvPVX(PL_linestr);
10108 /* the preceding stmt passes a newline */
10109 shared->herelines++;
10111 /* s now points to the newline after the heredoc terminator.
10112 d points to the newline before the body of the heredoc.
10115 /* We are going to modify linestr in place here, so set
10116 aside copies of the string if necessary for re-evals or
10118 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10119 check shared->re_eval_str. */
10120 if (shared->re_eval_start || shared->re_eval_str) {
10121 /* Set aside the rest of the regexp */
10122 if (!shared->re_eval_str)
10123 shared->re_eval_str =
10124 newSVpvn(shared->re_eval_start,
10125 bufend - shared->re_eval_start);
10126 shared->re_eval_start -= s-d;
10128 if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
10129 CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
10130 cx->blk_eval.cur_text == linestr)
10132 cx->blk_eval.cur_text = newSVsv(linestr);
10133 SvSCREAM_on(cx->blk_eval.cur_text);
10135 /* Copy everything from s onwards back to d. */
10136 Move(s,d,bufend-s + 1,char);
10137 SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10138 /* Setting PL_bufend only applies when we have not dug deeper
10139 into other scopes, because sublex_done sets PL_bufend to
10140 SvEND(PL_linestr). */
10141 if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
10148 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
10149 term = PL_tokenbuf[1];
10151 linestr_save = PL_linestr; /* must restore this afterwards */
10152 d = s; /* and this */
10153 PL_linestr = newSVpvs("");
10154 PL_bufend = SvPVX(PL_linestr);
10157 if (PL_madskills) {
10158 tstart = SvPVX(PL_linestr) + stuffstart;
10160 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10162 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10165 PL_bufptr = PL_bufend;
10166 CopLINE_set(PL_curcop,
10167 PL_multi_start + shared->herelines);
10168 if (!lex_next_chunk(LEX_NO_TERM)
10169 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
10170 SvREFCNT_dec(linestr_save);
10173 CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
10174 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10175 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10176 /* ^That should be enough to avoid this needing to grow: */
10177 sv_catpvs(PL_linestr, "\n\0");
10178 assert(s == SvPVX(PL_linestr));
10179 PL_bufend = SvEND(PL_linestr);
10183 stuffstart = s - SvPVX(PL_linestr);
10185 shared->herelines++;
10186 PL_last_lop = PL_last_uni = NULL;
10187 #ifndef PERL_STRICT_CR
10188 if (PL_bufend - PL_linestart >= 2) {
10189 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
10190 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10192 PL_bufend[-2] = '\n';
10194 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10196 else if (PL_bufend[-1] == '\r')
10197 PL_bufend[-1] = '\n';
10199 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10200 PL_bufend[-1] = '\n';
10202 if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
10203 SvREFCNT_dec(PL_linestr);
10204 PL_linestr = linestr_save;
10205 PL_linestart = SvPVX(linestr_save);
10206 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10211 sv_catsv(tmpstr,PL_linestr);
10215 PL_multi_end = CopLINE(PL_curcop);
10216 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10217 SvPV_shrink_to_cur(tmpstr);
10220 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10222 else if (PL_encoding)
10223 sv_recode_to_utf8(tmpstr, PL_encoding);
10225 PL_lex_stuff = tmpstr;
10226 pl_yylval.ival = op_type;
10230 SvREFCNT_dec(tmpstr);
10231 CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
10232 missingterm(PL_tokenbuf + 1);
10235 /* scan_inputsymbol
10236 takes: current position in input buffer
10237 returns: new position in input buffer
10238 side-effects: pl_yylval and lex_op are set.
10243 <FH> read from filehandle
10244 <pkg::FH> read from package qualified filehandle
10245 <pkg'FH> read from package qualified filehandle
10246 <$fh> read from filehandle in $fh
10247 <*.h> filename glob
10252 S_scan_inputsymbol(pTHX_ char *start)
10255 char *s = start; /* current position in buffer */
10258 char *d = PL_tokenbuf; /* start of temp holding space */
10259 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
10261 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10263 end = strchr(s, '\n');
10266 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
10268 /* die if we didn't have space for the contents of the <>,
10269 or if it didn't end, or if we see a newline
10272 if (len >= (I32)sizeof PL_tokenbuf)
10273 Perl_croak(aTHX_ "Excessively long <> operator");
10275 Perl_croak(aTHX_ "Unterminated <> operator");
10280 Remember, only scalar variables are interpreted as filehandles by
10281 this code. Anything more complex (e.g., <$fh{$num}>) will be
10282 treated as a glob() call.
10283 This code makes use of the fact that except for the $ at the front,
10284 a scalar variable and a filehandle look the same.
10286 if (*d == '$' && d[1]) d++;
10288 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10289 while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
10290 d += UTF ? UTF8SKIP(d) : 1;
10292 /* If we've tried to read what we allow filehandles to look like, and
10293 there's still text left, then it must be a glob() and not a getline.
10294 Use scan_str to pull out the stuff between the <> and treat it
10295 as nothing more than a string.
10298 if (d - PL_tokenbuf != len) {
10299 pl_yylval.ival = OP_GLOB;
10300 s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE);
10302 Perl_croak(aTHX_ "Glob not terminated");
10306 bool readline_overriden = FALSE;
10309 /* we're in a filehandle read situation */
10312 /* turn <> into <ARGV> */
10314 Copy("ARGV",d,5,char);
10316 /* Check whether readline() is overriden */
10317 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
10319 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
10321 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
10322 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
10323 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
10324 readline_overriden = TRUE;
10326 /* if <$fh>, create the ops to turn the variable into a
10330 /* try to find it in the pad for this block, otherwise find
10331 add symbol table ops
10333 const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
10334 if (tmp != NOT_IN_PAD) {
10335 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10336 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10337 HEK * const stashname = HvNAME_HEK(stash);
10338 SV * const sym = sv_2mortal(newSVhek(stashname));
10339 sv_catpvs(sym, "::");
10340 sv_catpv(sym, d+1);
10345 OP * const o = newOP(OP_PADSV, 0);
10347 PL_lex_op = readline_overriden
10348 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10349 op_append_elem(OP_LIST, o,
10350 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10351 : (OP*)newUNOP(OP_READLINE, 0, o);
10360 ? (GV_ADDMULTI | GV_ADDINEVAL)
10361 : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
10363 PL_lex_op = readline_overriden
10364 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10365 op_append_elem(OP_LIST,
10366 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10367 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10368 : (OP*)newUNOP(OP_READLINE, 0,
10369 newUNOP(OP_RV2SV, 0,
10370 newGVOP(OP_GV, 0, gv)));
10372 if (!readline_overriden)
10373 PL_lex_op->op_flags |= OPf_SPECIAL;
10374 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10375 pl_yylval.ival = OP_NULL;
10378 /* If it's none of the above, it must be a literal filehandle
10379 (<Foo::BAR> or <FOO>) so build a simple readline OP */
10381 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
10382 PL_lex_op = readline_overriden
10383 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10384 op_append_elem(OP_LIST,
10385 newGVOP(OP_GV, 0, gv),
10386 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10387 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
10388 pl_yylval.ival = OP_NULL;
10397 takes: start position in buffer
10398 keep_quoted preserve \ on the embedded delimiter(s)
10399 keep_delims preserve the delimiters around the string
10400 re_reparse compiling a run-time /(?{})/:
10401 collapse // to /, and skip encoding src
10402 returns: position to continue reading from buffer
10403 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10404 updates the read buffer.
10406 This subroutine pulls a string out of the input. It is called for:
10407 q single quotes q(literal text)
10408 ' single quotes 'literal text'
10409 qq double quotes qq(interpolate $here please)
10410 " double quotes "interpolate $here please"
10411 qx backticks qx(/bin/ls -l)
10412 ` backticks `/bin/ls -l`
10413 qw quote words @EXPORT_OK = qw( func() $spam )
10414 m// regexp match m/this/
10415 s/// regexp substitute s/this/that/
10416 tr/// string transliterate tr/this/that/
10417 y/// string transliterate y/this/that/
10418 ($*@) sub prototypes sub foo ($)
10419 (stuff) sub attr parameters sub foo : attr(stuff)
10420 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
10422 In most of these cases (all but <>, patterns and transliterate)
10423 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
10424 calls scan_str(). s/// makes yylex() call scan_subst() which calls
10425 scan_str(). tr/// and y/// make yylex() call scan_trans() which
10428 It skips whitespace before the string starts, and treats the first
10429 character as the delimiter. If the delimiter is one of ([{< then
10430 the corresponding "close" character )]}> is used as the closing
10431 delimiter. It allows quoting of delimiters, and if the string has
10432 balanced delimiters ([{<>}]) it allows nesting.
10434 On success, the SV with the resulting string is put into lex_stuff or,
10435 if that is already non-NULL, into lex_repl. The second case occurs only
10436 when parsing the RHS of the special constructs s/// and tr/// (y///).
10437 For convenience, the terminating delimiter character is stuffed into
10442 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
10443 bool deprecate_escaped_meta /* Should we issue a deprecation warning
10444 for certain paired metacharacters that
10445 appear escaped within it */
10449 SV *sv; /* scalar value: string */
10450 const char *tmps; /* temp string, used for delimiter matching */
10451 char *s = start; /* current position in the buffer */
10452 char term; /* terminating character */
10453 char *to; /* current position in the sv's data */
10454 I32 brackets = 1; /* bracket nesting level */
10455 bool has_utf8 = FALSE; /* is there any utf8 content? */
10456 I32 termcode; /* terminating char. code */
10457 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
10458 STRLEN termlen; /* length of terminating string */
10459 int last_off = 0; /* last position for nesting bracket */
10460 char *escaped_open = NULL;
10466 PERL_ARGS_ASSERT_SCAN_STR;
10468 /* skip space before the delimiter */
10474 if (PL_realtokenstart >= 0) {
10475 stuffstart = PL_realtokenstart;
10476 PL_realtokenstart = -1;
10479 stuffstart = start - SvPVX(PL_linestr);
10481 /* mark where we are, in case we need to report errors */
10484 /* after skipping whitespace, the next character is the terminator */
10487 termcode = termstr[0] = term;
10491 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10492 Copy(s, termstr, termlen, U8);
10493 if (!UTF8_IS_INVARIANT(term))
10497 /* mark where we are */
10498 PL_multi_start = CopLINE(PL_curcop);
10499 PL_multi_open = term;
10501 /* find corresponding closing delimiter */
10502 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10503 termcode = termstr[0] = term = tmps[5];
10505 PL_multi_close = term;
10507 /* A warning is raised if the input parameter requires it for escaped (by a
10508 * backslash) paired metacharacters {} [] and () when the delimiters are
10509 * those same characters, and the backslash is ineffective. This doesn't
10510 * happen for <>, as they aren't metas. */
10511 if (deprecate_escaped_meta
10512 && (PL_multi_open == PL_multi_close
10513 || ! ckWARN_d(WARN_DEPRECATED)
10514 || PL_multi_open == '<'))
10516 deprecate_escaped_meta = FALSE;
10519 /* create a new SV to hold the contents. 79 is the SV's initial length.
10520 What a random number. */
10521 sv = newSV_type(SVt_PVIV);
10523 SvIV_set(sv, termcode);
10524 (void)SvPOK_only(sv); /* validate pointer */
10526 /* move past delimiter and try to read a complete string */
10528 sv_catpvn(sv, s, termlen);
10531 tstart = SvPVX(PL_linestr) + stuffstart;
10532 if (PL_madskills && !PL_thisopen && !keep_delims) {
10533 PL_thisopen = newSVpvn(tstart, s - tstart);
10534 stuffstart = s - SvPVX(PL_linestr);
10538 if (PL_encoding && !UTF && !re_reparse) {
10542 int offset = s - SvPVX_const(PL_linestr);
10543 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
10544 &offset, (char*)termstr, termlen);
10548 if (SvIsCOW(PL_linestr)) {
10549 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos;
10550 STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos;
10551 STRLEN last_lop_pos, re_eval_start_pos, s_pos;
10552 char *buf = SvPVX(PL_linestr);
10553 bufend_pos = PL_parser->bufend - buf;
10554 bufptr_pos = PL_parser->bufptr - buf;
10555 oldbufptr_pos = PL_parser->oldbufptr - buf;
10556 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
10557 linestart_pos = PL_parser->linestart - buf;
10558 last_uni_pos = PL_parser->last_uni
10559 ? PL_parser->last_uni - buf
10561 last_lop_pos = PL_parser->last_lop
10562 ? PL_parser->last_lop - buf
10564 re_eval_start_pos =
10565 PL_parser->lex_shared->re_eval_start ?
10566 PL_parser->lex_shared->re_eval_start - buf : 0;
10569 sv_force_normal(PL_linestr);
10571 buf = SvPVX(PL_linestr);
10572 PL_parser->bufend = buf + bufend_pos;
10573 PL_parser->bufptr = buf + bufptr_pos;
10574 PL_parser->oldbufptr = buf + oldbufptr_pos;
10575 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
10576 PL_parser->linestart = buf + linestart_pos;
10577 if (PL_parser->last_uni)
10578 PL_parser->last_uni = buf + last_uni_pos;
10579 if (PL_parser->last_lop)
10580 PL_parser->last_lop = buf + last_lop_pos;
10581 if (PL_parser->lex_shared->re_eval_start)
10582 PL_parser->lex_shared->re_eval_start =
10583 buf + re_eval_start_pos;
10586 ns = SvPVX_const(PL_linestr) + offset;
10587 svlast = SvEND(sv) - 1;
10589 for (; s < ns; s++) {
10590 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10591 COPLINE_INC_WITH_HERELINES;
10594 goto read_more_line;
10596 /* handle quoted delimiters */
10597 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10599 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10601 if ((svlast-1 - t) % 2) {
10602 if (!keep_quoted) {
10603 *(svlast-1) = term;
10605 SvCUR_set(sv, SvCUR(sv) - 1);
10610 if (PL_multi_open == PL_multi_close) {
10616 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
10617 /* At here, all closes are "was quoted" one,
10618 so we don't check PL_multi_close. */
10620 if (!keep_quoted && *(t+1) == PL_multi_open)
10625 else if (*t == PL_multi_open)
10633 SvCUR_set(sv, w - SvPVX_const(sv));
10635 last_off = w - SvPVX(sv);
10636 if (--brackets <= 0)
10641 if (!keep_delims) {
10642 SvCUR_set(sv, SvCUR(sv) - 1);
10648 /* extend sv if need be */
10649 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10650 /* set 'to' to the next character in the sv's string */
10651 to = SvPVX(sv)+SvCUR(sv);
10653 /* if open delimiter is the close delimiter read unbridle */
10654 if (PL_multi_open == PL_multi_close) {
10655 for (; s < PL_bufend; s++,to++) {
10656 /* embedded newlines increment the current line number */
10657 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10658 COPLINE_INC_WITH_HERELINES;
10659 /* handle quoted delimiters */
10660 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10663 || (re_reparse && s[1] == '\\'))
10666 /* any other quotes are simply copied straight through */
10670 /* terminate when run out of buffer (the for() condition), or
10671 have found the terminator */
10672 else if (*s == term) {
10675 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10678 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10684 /* if the terminator isn't the same as the start character (e.g.,
10685 matched brackets), we have to allow more in the quoting, and
10686 be prepared for nested brackets.
10689 /* read until we run out of string, or we find the terminator */
10690 for (; s < PL_bufend; s++,to++) {
10691 /* embedded newlines increment the line count */
10692 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10693 COPLINE_INC_WITH_HERELINES;
10694 /* backslashes can escape the open or closing characters */
10695 if (*s == '\\' && s+1 < PL_bufend) {
10696 if (!keep_quoted &&
10697 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10701 /* Here, 'deprecate_escaped_meta' is true iff the
10702 * delimiters are paired metacharacters, and 's' points
10703 * to an occurrence of one of them within the string,
10704 * which was preceded by a backslash. If this is a
10705 * context where the delimiter is also a metacharacter,
10706 * the backslash is useless, and deprecated. () and []
10707 * are meta in any context. {} are meta only when
10708 * appearing in a quantifier or in things like '\p{'.
10709 * They also aren't meta unless there is a matching
10710 * closed, escaped char later on within the string.
10711 * If 's' points to an open, set a flag; if to a close,
10712 * test that flag, and raise a warning if it was set */
10714 if (deprecate_escaped_meta) {
10715 if (*s == PL_multi_open) {
10719 else if (regcurly(s,
10720 TRUE /* Look for a closing
10722 || (s - start > 2 /* Look for e.g.
10724 && _generic_isCC(*(s-2), _CC_BACKSLASH_FOO_LBRACE_IS_META)))
10729 else if (escaped_open) {
10730 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
10731 "Useless use of '\\'; doesn't escape metacharacter '%c'", PL_multi_open);
10732 escaped_open = NULL;
10739 /* allow nested opens and closes */
10740 else if (*s == PL_multi_close && --brackets <= 0)
10742 else if (*s == PL_multi_open)
10744 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10749 /* terminate the copied string and update the sv's end-of-string */
10751 SvCUR_set(sv, to - SvPVX_const(sv));
10754 * this next chunk reads more into the buffer if we're not done yet
10758 break; /* handle case where we are done yet :-) */
10760 #ifndef PERL_STRICT_CR
10761 if (to - SvPVX_const(sv) >= 2) {
10762 if ((to[-2] == '\r' && to[-1] == '\n') ||
10763 (to[-2] == '\n' && to[-1] == '\r'))
10767 SvCUR_set(sv, to - SvPVX_const(sv));
10769 else if (to[-1] == '\r')
10772 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10777 /* if we're out of file, or a read fails, bail and reset the current
10778 line marker so we can report where the unterminated string began
10781 if (PL_madskills) {
10782 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10784 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10786 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10789 COPLINE_INC_WITH_HERELINES;
10790 PL_bufptr = PL_bufend;
10791 if (!lex_next_chunk(0)) {
10793 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10802 /* at this point, we have successfully read the delimited string */
10804 if (!PL_encoding || UTF || re_reparse) {
10806 if (PL_madskills) {
10807 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10808 const int len = s - tstart;
10810 sv_catpvn(PL_thisstuff, tstart, len);
10812 PL_thisstuff = newSVpvn(tstart, len);
10813 if (!PL_thisclose && !keep_delims)
10814 PL_thisclose = newSVpvn(s,termlen);
10819 sv_catpvn(sv, s, termlen);
10824 if (PL_madskills) {
10825 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10826 const int len = s - tstart - termlen;
10828 sv_catpvn(PL_thisstuff, tstart, len);
10830 PL_thisstuff = newSVpvn(tstart, len);
10831 if (!PL_thisclose && !keep_delims)
10832 PL_thisclose = newSVpvn(s - termlen,termlen);
10836 if (has_utf8 || (PL_encoding && !re_reparse))
10839 PL_multi_end = CopLINE(PL_curcop);
10841 /* if we allocated too much space, give some back */
10842 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10843 SvLEN_set(sv, SvCUR(sv) + 1);
10844 SvPV_renew(sv, SvLEN(sv));
10847 /* decide whether this is the first or second quoted string we've read
10852 PL_sublex_info.repl = sv;
10860 takes: pointer to position in buffer
10861 returns: pointer to new position in buffer
10862 side-effects: builds ops for the constant in pl_yylval.op
10864 Read a number in any of the formats that Perl accepts:
10866 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10867 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
10870 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10872 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10875 If it reads a number without a decimal point or an exponent, it will
10876 try converting the number to an integer and see if it can do so
10877 without loss of precision.
10881 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10884 const char *s = start; /* current position in buffer */
10885 char *d; /* destination in temp buffer */
10886 char *e; /* end of temp buffer */
10887 NV nv; /* number read, as a double */
10888 SV *sv = NULL; /* place to put the converted number */
10889 bool floatit; /* boolean: int or float? */
10890 const char *lastub = NULL; /* position of last underbar */
10891 static const char* const number_too_long = "Number too long";
10893 PERL_ARGS_ASSERT_SCAN_NUM;
10895 /* We use the first character to decide what type of number this is */
10899 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
10901 /* if it starts with a 0, it could be an octal number, a decimal in
10902 0.13 disguise, or a hexadecimal number, or a binary number. */
10906 u holds the "number so far"
10907 shift the power of 2 of the base
10908 (hex == 4, octal == 3, binary == 1)
10909 overflowed was the number more than we can hold?
10911 Shift is used when we add a digit. It also serves as an "are
10912 we in octal/hex/binary?" indicator to disallow hex characters
10913 when in octal mode.
10918 bool overflowed = FALSE;
10919 bool just_zero = TRUE; /* just plain 0 or binary number? */
10920 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10921 static const char* const bases[5] =
10922 { "", "binary", "", "octal", "hexadecimal" };
10923 static const char* const Bases[5] =
10924 { "", "Binary", "", "Octal", "Hexadecimal" };
10925 static const char* const maxima[5] =
10927 "0b11111111111111111111111111111111",
10931 const char *base, *Base, *max;
10933 /* check for hex */
10934 if (s[1] == 'x' || s[1] == 'X') {
10938 } else if (s[1] == 'b' || s[1] == 'B') {
10943 /* check for a decimal in disguise */
10944 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10946 /* so it must be octal */
10953 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10954 "Misplaced _ in number");
10958 base = bases[shift];
10959 Base = Bases[shift];
10960 max = maxima[shift];
10962 /* read the rest of the number */
10964 /* x is used in the overflow test,
10965 b is the digit we're adding on. */
10970 /* if we don't mention it, we're done */
10974 /* _ are ignored -- but warned about if consecutive */
10976 if (lastub && s == lastub + 1)
10977 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10978 "Misplaced _ in number");
10982 /* 8 and 9 are not octal */
10983 case '8': case '9':
10985 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10989 case '2': case '3': case '4':
10990 case '5': case '6': case '7':
10992 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10995 case '0': case '1':
10996 b = *s++ & 15; /* ASCII digit -> value of digit */
11000 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11001 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11002 /* make sure they said 0x */
11005 b = (*s++ & 7) + 9;
11007 /* Prepare to put the digit we have onto the end
11008 of the number so far. We check for overflows.
11014 x = u << shift; /* make room for the digit */
11016 if ((x >> shift) != u
11017 && !(PL_hints & HINT_NEW_BINARY)) {
11020 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11021 "Integer overflow in %s number",
11024 u = x | b; /* add the digit to the end */
11027 n *= nvshift[shift];
11028 /* If an NV has not enough bits in its
11029 * mantissa to represent an UV this summing of
11030 * small low-order numbers is a waste of time
11031 * (because the NV cannot preserve the
11032 * low-order bits anyway): we could just
11033 * remember when did we overflow and in the
11034 * end just multiply n by the right
11042 /* if we get here, we had success: make a scalar value from
11047 /* final misplaced underbar check */
11048 if (s[-1] == '_') {
11049 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11053 if (n > 4294967295.0)
11054 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11055 "%s number > %s non-portable",
11061 if (u > 0xffffffff)
11062 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11063 "%s number > %s non-portable",
11068 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
11069 sv = new_constant(start, s - start, "integer",
11070 sv, NULL, NULL, 0);
11071 else if (PL_hints & HINT_NEW_BINARY)
11072 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
11077 handle decimal numbers.
11078 we're also sent here when we read a 0 as the first digit
11080 case '1': case '2': case '3': case '4': case '5':
11081 case '6': case '7': case '8': case '9': case '.':
11084 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
11087 /* read next group of digits and _ and copy into d */
11088 while (isDIGIT(*s) || *s == '_') {
11089 /* skip underscores, checking for misplaced ones
11093 if (lastub && s == lastub + 1)
11094 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11095 "Misplaced _ in number");
11099 /* check for end of fixed-length buffer */
11101 Perl_croak(aTHX_ "%s", number_too_long);
11102 /* if we're ok, copy the character */
11107 /* final misplaced underbar check */
11108 if (lastub && s == lastub + 1) {
11109 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11112 /* read a decimal portion if there is one. avoid
11113 3..5 being interpreted as the number 3. followed
11116 if (*s == '.' && s[1] != '.') {
11121 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11122 "Misplaced _ in number");
11126 /* copy, ignoring underbars, until we run out of digits.
11128 for (; isDIGIT(*s) || *s == '_'; s++) {
11129 /* fixed length buffer check */
11131 Perl_croak(aTHX_ "%s", number_too_long);
11133 if (lastub && s == lastub + 1)
11134 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11135 "Misplaced _ in number");
11141 /* fractional part ending in underbar? */
11142 if (s[-1] == '_') {
11143 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11144 "Misplaced _ in number");
11146 if (*s == '.' && isDIGIT(s[1])) {
11147 /* oops, it's really a v-string, but without the "v" */
11153 /* read exponent part, if present */
11154 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
11158 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
11159 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
11161 /* stray preinitial _ */
11163 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11164 "Misplaced _ in number");
11168 /* allow positive or negative exponent */
11169 if (*s == '+' || *s == '-')
11172 /* stray initial _ */
11174 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11175 "Misplaced _ in number");
11179 /* read digits of exponent */
11180 while (isDIGIT(*s) || *s == '_') {
11183 Perl_croak(aTHX_ "%s", number_too_long);
11187 if (((lastub && s == lastub + 1) ||
11188 (!isDIGIT(s[1]) && s[1] != '_')))
11189 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11190 "Misplaced _ in number");
11198 We try to do an integer conversion first if no characters
11199 indicating "float" have been found.
11204 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
11206 if (flags == IS_NUMBER_IN_UV) {
11208 sv = newSViv(uv); /* Prefer IVs over UVs. */
11211 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11212 if (uv <= (UV) IV_MIN)
11213 sv = newSViv(-(IV)uv);
11220 /* terminate the string */
11222 nv = Atof(PL_tokenbuf);
11227 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
11228 const char *const key = floatit ? "float" : "integer";
11229 const STRLEN keylen = floatit ? 5 : 7;
11230 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
11231 key, keylen, sv, NULL, NULL, 0);
11235 /* if it starts with a v, it could be a v-string */
11238 sv = newSV(5); /* preallocate storage space */
11239 ENTER_with_name("scan_vstring");
11241 s = scan_vstring(s, PL_bufend, sv);
11242 SvREFCNT_inc_simple_void_NN(sv);
11243 LEAVE_with_name("scan_vstring");
11247 /* make the op for the constant and return */
11250 lvalp->opval = newSVOP(OP_CONST, 0, sv);
11252 lvalp->opval = NULL;
11258 S_scan_formline(pTHX_ char *s)
11263 SV * const stuff = newSVpvs("");
11264 bool needargs = FALSE;
11265 bool eofmt = FALSE;
11267 char *tokenstart = s;
11268 SV* savewhite = NULL;
11270 if (PL_madskills) {
11271 savewhite = PL_thiswhite;
11276 PERL_ARGS_ASSERT_SCAN_FORMLINE;
11278 while (!needargs) {
11281 #ifdef PERL_STRICT_CR
11282 while (SPACE_OR_TAB(*t))
11285 while (SPACE_OR_TAB(*t) || *t == '\r')
11288 if (*t == '\n' || t == PL_bufend) {
11293 eol = (char *) memchr(s,'\n',PL_bufend-s);
11297 for (t = s; t < eol; t++) {
11298 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
11300 goto enough; /* ~~ must be first line in formline */
11302 if (*t == '@' || *t == '^')
11306 sv_catpvn(stuff, s, eol-s);
11307 #ifndef PERL_STRICT_CR
11308 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
11309 char *end = SvPVX(stuff) + SvCUR(stuff);
11312 SvCUR_set(stuff, SvCUR(stuff) - 1);
11320 if ((PL_rsfp || PL_parser->filtered)
11321 && PL_parser->form_lex_state == LEX_NORMAL) {
11324 if (PL_madskills) {
11326 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
11328 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
11331 PL_bufptr = PL_bufend;
11332 COPLINE_INC_WITH_HERELINES;
11333 got_some = lex_next_chunk(0);
11334 CopLINE_dec(PL_curcop);
11337 tokenstart = PL_bufptr;
11345 if (!SvCUR(stuff) || needargs)
11346 PL_lex_state = PL_parser->form_lex_state;
11347 if (SvCUR(stuff)) {
11348 PL_expect = XSTATE;
11350 start_force(PL_curforce);
11351 NEXTVAL_NEXTTOKE.ival = 0;
11352 force_next(FORMLBRACK);
11355 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
11357 else if (PL_encoding)
11358 sv_recode_to_utf8(stuff, PL_encoding);
11360 start_force(PL_curforce);
11361 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
11365 SvREFCNT_dec(stuff);
11367 PL_lex_formbrack = 0;
11370 if (PL_madskills) {
11372 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
11374 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
11375 PL_thiswhite = savewhite;
11382 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
11385 const I32 oldsavestack_ix = PL_savestack_ix;
11386 CV* const outsidecv = PL_compcv;
11388 SAVEI32(PL_subline);
11389 save_item(PL_subname);
11390 SAVESPTR(PL_compcv);
11392 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
11393 CvFLAGS(PL_compcv) |= flags;
11395 PL_subline = CopLINE(PL_curcop);
11396 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
11397 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
11398 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
11399 if (outsidecv && CvPADLIST(outsidecv))
11400 CvPADLIST(PL_compcv)->xpadl_outid =
11401 PadlistNAMES(CvPADLIST(outsidecv));
11403 return oldsavestack_ix;
11407 #pragma segment Perl_yylex
11410 S_yywarn(pTHX_ const char *const s, U32 flags)
11414 PERL_ARGS_ASSERT_YYWARN;
11416 PL_in_eval |= EVAL_WARNONLY;
11417 yyerror_pv(s, flags);
11418 PL_in_eval &= ~EVAL_WARNONLY;
11423 Perl_yyerror(pTHX_ const char *const s)
11425 PERL_ARGS_ASSERT_YYERROR;
11426 return yyerror_pvn(s, strlen(s), 0);
11430 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11432 PERL_ARGS_ASSERT_YYERROR_PV;
11433 return yyerror_pvn(s, strlen(s), flags);
11437 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11440 const char *context = NULL;
11443 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
11444 int yychar = PL_parser->yychar;
11446 PERL_ARGS_ASSERT_YYERROR_PVN;
11448 if (!yychar || (yychar == ';' && !PL_rsfp))
11449 sv_catpvs(where_sv, "at EOF");
11450 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
11451 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
11452 PL_oldbufptr != PL_bufptr) {
11455 The code below is removed for NetWare because it abends/crashes on NetWare
11456 when the script has error such as not having the closing quotes like:
11457 if ($var eq "value)
11458 Checking of white spaces is anyway done in NetWare code.
11461 while (isSPACE(*PL_oldoldbufptr))
11464 context = PL_oldoldbufptr;
11465 contlen = PL_bufptr - PL_oldoldbufptr;
11467 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
11468 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
11471 The code below is removed for NetWare because it abends/crashes on NetWare
11472 when the script has error such as not having the closing quotes like:
11473 if ($var eq "value)
11474 Checking of white spaces is anyway done in NetWare code.
11477 while (isSPACE(*PL_oldbufptr))
11480 context = PL_oldbufptr;
11481 contlen = PL_bufptr - PL_oldbufptr;
11483 else if (yychar > 255)
11484 sv_catpvs(where_sv, "next token ???");
11485 else if (yychar == -2) { /* YYEMPTY */
11486 if (PL_lex_state == LEX_NORMAL ||
11487 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
11488 sv_catpvs(where_sv, "at end of line");
11489 else if (PL_lex_inpat)
11490 sv_catpvs(where_sv, "within pattern");
11492 sv_catpvs(where_sv, "within string");
11495 sv_catpvs(where_sv, "next char ");
11497 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11498 else if (isPRINT_LC(yychar)) {
11499 const char string = yychar;
11500 sv_catpvn(where_sv, &string, 1);
11503 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11505 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
11506 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
11507 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
11509 Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
11510 UTF8fARG(UTF, contlen, context));
11512 Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
11513 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11514 Perl_sv_catpvf(aTHX_ msg,
11515 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
11516 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11519 if (PL_in_eval & EVAL_WARNONLY) {
11520 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
11524 if (PL_error_count >= 10) {
11526 if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
11527 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11528 SVfARG(errsv), OutCopFILE(PL_curcop));
11530 Perl_croak(aTHX_ "%s has too many errors.\n",
11531 OutCopFILE(PL_curcop));
11534 PL_in_my_stash = NULL;
11538 #pragma segment Main
11542 S_swallow_bom(pTHX_ U8 *s)
11545 const STRLEN slen = SvCUR(PL_linestr);
11547 PERL_ARGS_ASSERT_SWALLOW_BOM;
11551 if (s[1] == 0xFE) {
11552 /* UTF-16 little-endian? (or UTF-32LE?) */
11553 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
11554 /* diag_listed_as: Unsupported script encoding %s */
11555 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11556 #ifndef PERL_NO_UTF16_FILTER
11557 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11559 if (PL_bufend > (char*)s) {
11560 s = add_utf16_textfilter(s, TRUE);
11563 /* diag_listed_as: Unsupported script encoding %s */
11564 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11569 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
11570 #ifndef PERL_NO_UTF16_FILTER
11571 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11573 if (PL_bufend > (char *)s) {
11574 s = add_utf16_textfilter(s, FALSE);
11577 /* diag_listed_as: Unsupported script encoding %s */
11578 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11583 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
11584 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11585 s += 3; /* UTF-8 */
11591 if (s[2] == 0xFE && s[3] == 0xFF) {
11592 /* UTF-32 big-endian */
11593 /* diag_listed_as: Unsupported script encoding %s */
11594 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11597 else if (s[2] == 0 && s[3] != 0) {
11600 * are a good indicator of UTF-16BE. */
11601 #ifndef PERL_NO_UTF16_FILTER
11602 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11603 s = add_utf16_textfilter(s, FALSE);
11605 /* diag_listed_as: Unsupported script encoding %s */
11606 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11612 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
11613 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11614 s += 4; /* UTF-8 */
11620 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11623 * are a good indicator of UTF-16LE. */
11624 #ifndef PERL_NO_UTF16_FILTER
11625 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11626 s = add_utf16_textfilter(s, TRUE);
11628 /* diag_listed_as: Unsupported script encoding %s */
11629 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11637 #ifndef PERL_NO_UTF16_FILTER
11639 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11642 SV *const filter = FILTER_DATA(idx);
11643 /* We re-use this each time round, throwing the contents away before we
11645 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11646 SV *const utf8_buffer = filter;
11647 IV status = IoPAGE(filter);
11648 const bool reverse = cBOOL(IoLINES(filter));
11651 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11653 /* As we're automatically added, at the lowest level, and hence only called
11654 from this file, we can be sure that we're not called in block mode. Hence
11655 don't bother writing code to deal with block mode. */
11657 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11660 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11662 DEBUG_P(PerlIO_printf(Perl_debug_log,
11663 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11664 FPTR2DPTR(void *, S_utf16_textfilter),
11665 reverse ? 'l' : 'b', idx, maxlen, status,
11666 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11673 /* First, look in our buffer of existing UTF-8 data: */
11674 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11678 } else if (status == 0) {
11680 IoPAGE(filter) = 0;
11681 nl = SvEND(utf8_buffer);
11684 STRLEN got = nl - SvPVX(utf8_buffer);
11685 /* Did we have anything to append? */
11687 sv_catpvn(sv, SvPVX(utf8_buffer), got);
11688 /* Everything else in this code works just fine if SVp_POK isn't
11689 set. This, however, needs it, and we need it to work, else
11690 we loop infinitely because the buffer is never consumed. */
11691 sv_chop(utf8_buffer, nl);
11695 /* OK, not a complete line there, so need to read some more UTF-16.
11696 Read an extra octect if the buffer currently has an odd number. */
11700 if (SvCUR(utf16_buffer) >= 2) {
11701 /* Location of the high octet of the last complete code point.
11702 Gosh, UTF-16 is a pain. All the benefits of variable length,
11703 *coupled* with all the benefits of partial reads and
11705 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11706 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11708 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11712 /* We have the first half of a surrogate. Read more. */
11713 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11716 status = FILTER_READ(idx + 1, utf16_buffer,
11717 160 + (SvCUR(utf16_buffer) & 1));
11718 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
11719 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11722 IoPAGE(filter) = status;
11727 chars = SvCUR(utf16_buffer) >> 1;
11728 have = SvCUR(utf8_buffer);
11729 SvGROW(utf8_buffer, have + chars * 3 + 1);
11732 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11733 (U8*)SvPVX_const(utf8_buffer) + have,
11734 chars * 2, &newlen);
11736 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11737 (U8*)SvPVX_const(utf8_buffer) + have,
11738 chars * 2, &newlen);
11740 SvCUR_set(utf8_buffer, have + newlen);
11743 /* No need to keep this SV "well-formed" with a '\0' after the end, as
11744 it's private to us, and utf16_to_utf8{,reversed} take a
11745 (pointer,length) pair, rather than a NUL-terminated string. */
11746 if(SvCUR(utf16_buffer) & 1) {
11747 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11748 SvCUR_set(utf16_buffer, 1);
11750 SvCUR_set(utf16_buffer, 0);
11753 DEBUG_P(PerlIO_printf(Perl_debug_log,
11754 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11756 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11757 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11762 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11764 SV *filter = filter_add(S_utf16_textfilter, NULL);
11766 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11768 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11769 sv_setpvs(filter, "");
11770 IoLINES(filter) = reversed;
11771 IoPAGE(filter) = 1; /* Not EOF */
11773 /* Sadly, we have to return a valid pointer, come what may, so we have to
11774 ignore any error return from this. */
11775 SvCUR_set(PL_linestr, 0);
11776 if (FILTER_READ(0, PL_linestr, 0)) {
11777 SvUTF8_on(PL_linestr);
11779 SvUTF8_on(PL_linestr);
11781 PL_bufend = SvEND(PL_linestr);
11782 return (U8*)SvPVX(PL_linestr);
11787 Returns a pointer to the next character after the parsed
11788 vstring, as well as updating the passed in sv.
11790 Function must be called like
11792 sv = sv_2mortal(newSV(5));
11793 s = scan_vstring(s,e,sv);
11795 where s and e are the start and end of the string.
11796 The sv should already be large enough to store the vstring
11797 passed in, for performance reasons.
11799 This function may croak if fatal warnings are enabled in the
11800 calling scope, hence the sv_2mortal in the example (to prevent
11801 a leak). Make sure to do SvREFCNT_inc afterwards if you use
11807 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11810 const char *pos = s;
11811 const char *start = s;
11813 PERL_ARGS_ASSERT_SCAN_VSTRING;
11815 if (*pos == 'v') pos++; /* get past 'v' */
11816 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11818 if ( *pos != '.') {
11819 /* this may not be a v-string if followed by => */
11820 const char *next = pos;
11821 while (next < e && isSPACE(*next))
11823 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11824 /* return string not v-string */
11825 sv_setpvn(sv,(char *)s,pos-s);
11826 return (char *)pos;
11830 if (!isALPHA(*pos)) {
11831 U8 tmpbuf[UTF8_MAXBYTES+1];
11834 s++; /* get past 'v' */
11839 /* this is atoi() that tolerates underscores */
11842 const char *end = pos;
11844 while (--end >= s) {
11846 const UV orev = rev;
11847 rev += (*end - '0') * mult;
11850 /* diag_listed_as: Integer overflow in %s number */
11851 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11852 "Integer overflow in decimal number");
11856 if (rev > 0x7FFFFFFF)
11857 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11859 /* Append native character for the rev point */
11860 tmpend = uvchr_to_utf8(tmpbuf, rev);
11861 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11862 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11864 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11870 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11874 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11881 Perl_keyword_plugin_standard(pTHX_
11882 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11884 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11885 PERL_UNUSED_CONTEXT;
11886 PERL_UNUSED_ARG(keyword_ptr);
11887 PERL_UNUSED_ARG(keyword_len);
11888 PERL_UNUSED_ARG(op_ptr);
11889 return KEYWORD_PLUGIN_DECLINE;
11892 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11894 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11896 SAVEI32(PL_lex_brackets);
11897 if (PL_lex_brackets > 100)
11898 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11899 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11900 SAVEI32(PL_lex_allbrackets);
11901 PL_lex_allbrackets = 0;
11902 SAVEI8(PL_lex_fakeeof);
11903 PL_lex_fakeeof = (U8)fakeeof;
11904 if(yyparse(gramtype) && !PL_parser->error_count)
11905 qerror(Perl_mess(aTHX_ "Parse error"));
11908 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11910 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11914 SAVEVPTR(PL_eval_root);
11915 PL_eval_root = NULL;
11916 parse_recdescent(gramtype, fakeeof);
11922 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11924 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11927 if (flags & ~PARSE_OPTIONAL)
11928 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11929 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11930 if (!exprop && !(flags & PARSE_OPTIONAL)) {
11931 if (!PL_parser->error_count)
11932 qerror(Perl_mess(aTHX_ "Parse error"));
11933 exprop = newOP(OP_NULL, 0);
11939 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11941 Parse a Perl arithmetic expression. This may contain operators of precedence
11942 down to the bit shift operators. The expression must be followed (and thus
11943 terminated) either by a comparison or lower-precedence operator or by
11944 something that would normally terminate an expression such as semicolon.
11945 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11946 otherwise it is mandatory. It is up to the caller to ensure that the
11947 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11948 the source of the code to be parsed and the lexical context for the
11951 The op tree representing the expression is returned. If an optional
11952 expression is absent, a null pointer is returned, otherwise the pointer
11955 If an error occurs in parsing or compilation, in most cases a valid op
11956 tree is returned anyway. The error is reflected in the parser state,
11957 normally resulting in a single exception at the top level of parsing
11958 which covers all the compilation errors that occurred. Some compilation
11959 errors, however, will throw an exception immediately.
11965 Perl_parse_arithexpr(pTHX_ U32 flags)
11967 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11971 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11973 Parse a Perl term expression. This may contain operators of precedence
11974 down to the assignment operators. The expression must be followed (and thus
11975 terminated) either by a comma or lower-precedence operator or by
11976 something that would normally terminate an expression such as semicolon.
11977 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11978 otherwise it is mandatory. It is up to the caller to ensure that the
11979 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11980 the source of the code to be parsed and the lexical context for the
11983 The op tree representing the expression is returned. If an optional
11984 expression is absent, a null pointer is returned, otherwise the pointer
11987 If an error occurs in parsing or compilation, in most cases a valid op
11988 tree is returned anyway. The error is reflected in the parser state,
11989 normally resulting in a single exception at the top level of parsing
11990 which covers all the compilation errors that occurred. Some compilation
11991 errors, however, will throw an exception immediately.
11997 Perl_parse_termexpr(pTHX_ U32 flags)
11999 return parse_expr(LEX_FAKEEOF_COMMA, flags);
12003 =for apidoc Amx|OP *|parse_listexpr|U32 flags
12005 Parse a Perl list expression. This may contain operators of precedence
12006 down to the comma operator. The expression must be followed (and thus
12007 terminated) either by a low-precedence logic operator such as C<or> or by
12008 something that would normally terminate an expression such as semicolon.
12009 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12010 otherwise it is mandatory. It is up to the caller to ensure that the
12011 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12012 the source of the code to be parsed and the lexical context for the
12015 The op tree representing the expression is returned. If an optional
12016 expression is absent, a null pointer is returned, otherwise the pointer
12019 If an error occurs in parsing or compilation, in most cases a valid op
12020 tree is returned anyway. The error is reflected in the parser state,
12021 normally resulting in a single exception at the top level of parsing
12022 which covers all the compilation errors that occurred. Some compilation
12023 errors, however, will throw an exception immediately.
12029 Perl_parse_listexpr(pTHX_ U32 flags)
12031 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
12035 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
12037 Parse a single complete Perl expression. This allows the full
12038 expression grammar, including the lowest-precedence operators such
12039 as C<or>. The expression must be followed (and thus terminated) by a
12040 token that an expression would normally be terminated by: end-of-file,
12041 closing bracketing punctuation, semicolon, or one of the keywords that
12042 signals a postfix expression-statement modifier. If I<flags> includes
12043 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
12044 mandatory. It is up to the caller to ensure that the dynamic parser
12045 state (L</PL_parser> et al) is correctly set to reflect the source of
12046 the code to be parsed and the lexical context for the expression.
12048 The op tree representing the expression is returned. If an optional
12049 expression is absent, a null pointer is returned, otherwise the pointer
12052 If an error occurs in parsing or compilation, in most cases a valid op
12053 tree is returned anyway. The error is reflected in the parser state,
12054 normally resulting in a single exception at the top level of parsing
12055 which covers all the compilation errors that occurred. Some compilation
12056 errors, however, will throw an exception immediately.
12062 Perl_parse_fullexpr(pTHX_ U32 flags)
12064 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
12068 =for apidoc Amx|OP *|parse_block|U32 flags
12070 Parse a single complete Perl code block. This consists of an opening
12071 brace, a sequence of statements, and a closing brace. The block
12072 constitutes a lexical scope, so C<my> variables and various compile-time
12073 effects can be contained within it. It is up to the caller to ensure
12074 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12075 reflect the source of the code to be parsed and the lexical context for
12078 The op tree representing the code block is returned. This is always a
12079 real op, never a null pointer. It will normally be a C<lineseq> list,
12080 including C<nextstate> or equivalent ops. No ops to construct any kind
12081 of runtime scope are included by virtue of it being a block.
12083 If an error occurs in parsing or compilation, in most cases a valid op
12084 tree (most likely null) is returned anyway. The error is reflected in
12085 the parser state, normally resulting in a single exception at the top
12086 level of parsing which covers all the compilation errors that occurred.
12087 Some compilation errors, however, will throw an exception immediately.
12089 The I<flags> parameter is reserved for future use, and must always
12096 Perl_parse_block(pTHX_ U32 flags)
12099 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
12100 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
12104 =for apidoc Amx|OP *|parse_barestmt|U32 flags
12106 Parse a single unadorned Perl statement. This may be a normal imperative
12107 statement or a declaration that has compile-time effect. It does not
12108 include any label or other affixture. It is up to the caller to ensure
12109 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12110 reflect the source of the code to be parsed and the lexical context for
12113 The op tree representing the statement is returned. This may be a
12114 null pointer if the statement is null, for example if it was actually
12115 a subroutine definition (which has compile-time side effects). If not
12116 null, it will be ops directly implementing the statement, suitable to
12117 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
12118 equivalent op (except for those embedded in a scope contained entirely
12119 within the statement).
12121 If an error occurs in parsing or compilation, in most cases a valid op
12122 tree (most likely null) is returned anyway. The error is reflected in
12123 the parser state, normally resulting in a single exception at the top
12124 level of parsing which covers all the compilation errors that occurred.
12125 Some compilation errors, however, will throw an exception immediately.
12127 The I<flags> parameter is reserved for future use, and must always
12134 Perl_parse_barestmt(pTHX_ U32 flags)
12137 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
12138 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
12142 =for apidoc Amx|SV *|parse_label|U32 flags
12144 Parse a single label, possibly optional, of the type that may prefix a
12145 Perl statement. It is up to the caller to ensure that the dynamic parser
12146 state (L</PL_parser> et al) is correctly set to reflect the source of
12147 the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
12148 label is optional, otherwise it is mandatory.
12150 The name of the label is returned in the form of a fresh scalar. If an
12151 optional label is absent, a null pointer is returned.
12153 If an error occurs in parsing, which can only occur if the label is
12154 mandatory, a valid label is returned anyway. The error is reflected in
12155 the parser state, normally resulting in a single exception at the top
12156 level of parsing which covers all the compilation errors that occurred.
12162 Perl_parse_label(pTHX_ U32 flags)
12164 if (flags & ~PARSE_OPTIONAL)
12165 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
12166 if (PL_lex_state == LEX_KNOWNEXT) {
12167 PL_parser->yychar = yylex();
12168 if (PL_parser->yychar == LABEL) {
12169 char * const lpv = pl_yylval.pval;
12170 STRLEN llen = strlen(lpv);
12171 PL_parser->yychar = YYEMPTY;
12172 return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
12179 STRLEN wlen, bufptr_pos;
12182 if (!isIDFIRST_lazy_if(s, UTF))
12184 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
12185 if (word_takes_any_delimeter(s, wlen))
12187 bufptr_pos = s - SvPVX(PL_linestr);
12189 lex_read_space(LEX_KEEP_PREVIOUS);
12191 s = SvPVX(PL_linestr) + bufptr_pos;
12192 if (t[0] == ':' && t[1] != ':') {
12193 PL_oldoldbufptr = PL_oldbufptr;
12196 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
12200 if (flags & PARSE_OPTIONAL) {
12203 qerror(Perl_mess(aTHX_ "Parse error"));
12204 return newSVpvs("x");
12211 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
12213 Parse a single complete Perl statement. This may be a normal imperative
12214 statement or a declaration that has compile-time effect, and may include
12215 optional labels. It is up to the caller to ensure that the dynamic
12216 parser state (L</PL_parser> et al) is correctly set to reflect the source
12217 of the code to be parsed and the lexical context for the statement.
12219 The op tree representing the statement is returned. This may be a
12220 null pointer if the statement is null, for example if it was actually
12221 a subroutine definition (which has compile-time side effects). If not
12222 null, it will be the result of a L</newSTATEOP> call, normally including
12223 a C<nextstate> or equivalent op.
12225 If an error occurs in parsing or compilation, in most cases a valid op
12226 tree (most likely null) is returned anyway. The error is reflected in
12227 the parser state, normally resulting in a single exception at the top
12228 level of parsing which covers all the compilation errors that occurred.
12229 Some compilation errors, however, will throw an exception immediately.
12231 The I<flags> parameter is reserved for future use, and must always
12238 Perl_parse_fullstmt(pTHX_ U32 flags)
12241 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
12242 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
12246 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
12248 Parse a sequence of zero or more Perl statements. These may be normal
12249 imperative statements, including optional labels, or declarations
12250 that have compile-time effect, or any mixture thereof. The statement
12251 sequence ends when a closing brace or end-of-file is encountered in a
12252 place where a new statement could have validly started. It is up to
12253 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
12254 is correctly set to reflect the source of the code to be parsed and the
12255 lexical context for the statements.
12257 The op tree representing the statement sequence is returned. This may
12258 be a null pointer if the statements were all null, for example if there
12259 were no statements or if there were only subroutine definitions (which
12260 have compile-time side effects). If not null, it will be a C<lineseq>
12261 list, normally including C<nextstate> or equivalent ops.
12263 If an error occurs in parsing or compilation, in most cases a valid op
12264 tree is returned anyway. The error is reflected in the parser state,
12265 normally resulting in a single exception at the top level of parsing
12266 which covers all the compilation errors that occurred. Some compilation
12267 errors, however, will throw an exception immediately.
12269 The I<flags> parameter is reserved for future use, and must always
12276 Perl_parse_stmtseq(pTHX_ U32 flags)
12281 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
12282 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
12283 c = lex_peek_unichar(0);
12284 if (c != -1 && c != /*{*/'}')
12285 qerror(Perl_mess(aTHX_ "Parse error"));
12291 * c-indentation-style: bsd
12292 * c-basic-offset: 4
12293 * indent-tabs-mode: nil
12296 * ex: set ts=8 sts=4 sw=4 et: