3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 =head1 Lexer interface
27 This is the lower layer of the Perl parser, managing characters and tokens.
29 =for apidoc AmU|yy_parser *|PL_parser
31 Pointer to a structure encapsulating the state of the parsing operation
32 currently in progress. The pointer can be locally changed to perform
33 a nested parse without interfering with the state of an outer parse.
34 Individual members of C<PL_parser> have their own documentation.
40 #define PERL_IN_TOKE_C
42 #include "dquote_static.c"
44 #define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
47 #define pl_yylval (PL_parser->yylval)
49 /* XXX temporary backwards compatibility */
50 #define PL_lex_brackets (PL_parser->lex_brackets)
51 #define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52 #define PL_lex_fakeeof (PL_parser->lex_fakeeof)
53 #define PL_lex_brackstack (PL_parser->lex_brackstack)
54 #define PL_lex_casemods (PL_parser->lex_casemods)
55 #define PL_lex_casestack (PL_parser->lex_casestack)
56 #define PL_lex_defer (PL_parser->lex_defer)
57 #define PL_lex_dojoin (PL_parser->lex_dojoin)
58 #define PL_lex_expect (PL_parser->lex_expect)
59 #define PL_lex_formbrack (PL_parser->lex_formbrack)
60 #define PL_lex_inpat (PL_parser->lex_inpat)
61 #define PL_lex_inwhat (PL_parser->lex_inwhat)
62 #define PL_lex_op (PL_parser->lex_op)
63 #define PL_lex_repl (PL_parser->lex_repl)
64 #define PL_lex_starts (PL_parser->lex_starts)
65 #define PL_lex_stuff (PL_parser->lex_stuff)
66 #define PL_multi_start (PL_parser->multi_start)
67 #define PL_multi_open (PL_parser->multi_open)
68 #define PL_multi_close (PL_parser->multi_close)
69 #define PL_preambled (PL_parser->preambled)
70 #define PL_sublex_info (PL_parser->sublex_info)
71 #define PL_linestr (PL_parser->linestr)
72 #define PL_expect (PL_parser->expect)
73 #define PL_copline (PL_parser->copline)
74 #define PL_bufptr (PL_parser->bufptr)
75 #define PL_oldbufptr (PL_parser->oldbufptr)
76 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
77 #define PL_linestart (PL_parser->linestart)
78 #define PL_bufend (PL_parser->bufend)
79 #define PL_last_uni (PL_parser->last_uni)
80 #define PL_last_lop (PL_parser->last_lop)
81 #define PL_last_lop_op (PL_parser->last_lop_op)
82 #define PL_lex_state (PL_parser->lex_state)
83 #define PL_rsfp (PL_parser->rsfp)
84 #define PL_rsfp_filters (PL_parser->rsfp_filters)
85 #define PL_in_my (PL_parser->in_my)
86 #define PL_in_my_stash (PL_parser->in_my_stash)
87 #define PL_tokenbuf (PL_parser->tokenbuf)
88 #define PL_multi_end (PL_parser->multi_end)
89 #define PL_error_count (PL_parser->error_count)
92 # define PL_endwhite (PL_parser->endwhite)
93 # define PL_faketokens (PL_parser->faketokens)
94 # define PL_lasttoke (PL_parser->lasttoke)
95 # define PL_nextwhite (PL_parser->nextwhite)
96 # define PL_realtokenstart (PL_parser->realtokenstart)
97 # define PL_skipwhite (PL_parser->skipwhite)
98 # define PL_thisclose (PL_parser->thisclose)
99 # define PL_thismad (PL_parser->thismad)
100 # define PL_thisopen (PL_parser->thisopen)
101 # define PL_thisstuff (PL_parser->thisstuff)
102 # define PL_thistoken (PL_parser->thistoken)
103 # define PL_thiswhite (PL_parser->thiswhite)
104 # define PL_thiswhite (PL_parser->thiswhite)
105 # define PL_nexttoke (PL_parser->nexttoke)
106 # define PL_curforce (PL_parser->curforce)
108 # define PL_nexttoke (PL_parser->nexttoke)
109 # define PL_nexttype (PL_parser->nexttype)
110 # define PL_nextval (PL_parser->nextval)
113 static const char* const ident_too_long = "Identifier too long";
116 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
117 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
119 # define CURMAD(slot,sv)
120 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
123 #define XENUMMASK 0x3f
124 #define XFAKEEOF 0x40
125 #define XFAKEBRACK 0x80
127 #ifdef USE_UTF8_SCRIPTS
128 # define UTF (!IN_BYTES)
130 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
133 /* The maximum number of characters preceding the unrecognized one to display */
134 #define UNRECOGNIZED_PRECEDE_COUNT 10
136 /* In variables named $^X, these are the legal values for X.
137 * 1999-02-27 mjd-perl-patch@plover.com */
138 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
140 #define SPACE_OR_TAB(c) isBLANK_A(c)
142 /* LEX_* are values for PL_lex_state, the state of the lexer.
143 * They are arranged oddly so that the guard on the switch statement
144 * can get by with a single comparison (if the compiler is smart enough).
146 * These values refer to the various states within a sublex parse,
147 * i.e. within a double quotish string
150 /* #define LEX_NOTPARSING 11 is done in perl.h. */
152 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
153 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
154 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
155 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
156 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
158 /* at end of code, eg "$x" followed by: */
159 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
160 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
162 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
163 string or after \E, $foo, etc */
164 #define LEX_INTERPCONST 2 /* NOT USED */
165 #define LEX_FORMLINE 1 /* expecting a format line */
166 #define LEX_KNOWNEXT 0 /* next token known; just return it */
170 static const char* const lex_state_names[] = {
185 #include "keywords.h"
187 /* CLINE is a macro that ensures PL_copline has a sane value */
189 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
192 # define SKIPSPACE0(s) skipspace0(s)
193 # define SKIPSPACE1(s) skipspace1(s)
194 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
195 # define PEEKSPACE(s) skipspace2(s,0)
197 # define SKIPSPACE0(s) skipspace(s)
198 # define SKIPSPACE1(s) skipspace(s)
199 # define SKIPSPACE2(s,tsv) skipspace(s)
200 # define PEEKSPACE(s) skipspace(s)
204 * Convenience functions to return different tokens and prime the
205 * lexer for the next token. They all take an argument.
207 * TOKEN : generic token (used for '(', DOLSHARP, etc)
208 * OPERATOR : generic operator
209 * AOPERATOR : assignment operator
210 * PREBLOCK : beginning the block after an if, while, foreach, ...
211 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
212 * PREREF : *EXPR where EXPR is not a simple identifier
213 * TERM : expression term
214 * POSTDEREF : postfix dereference (->$* ->@[...] etc.)
215 * LOOPX : loop exiting command (goto, last, dump, etc)
216 * FTST : file test operator
217 * FUN0 : zero-argument function
218 * FUN0OP : zero-argument function, with its op created in this file
219 * FUN1 : not used, except for not, which isn't a UNIOP
220 * BOop : bitwise or or xor
222 * SHop : shift operator
223 * PWop : power operator
224 * PMop : pattern-matching operator
225 * Aop : addition-level operator
226 * Mop : multiplication-level operator
227 * Eop : equality-testing operator
228 * Rop : relational operator <= != gt
230 * Also see LOP and lop() below.
233 #ifdef DEBUGGING /* Serve -DT. */
234 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
236 # define REPORT(retval) (retval)
239 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
240 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
241 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
242 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
243 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
244 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
245 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
246 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
247 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
248 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
249 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
250 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
251 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
252 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
253 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
254 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
255 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
256 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
257 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
258 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
259 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
260 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
262 /* This bit of chicanery makes a unary function followed by
263 * a parenthesis into a function with one argument, highest precedence.
264 * The UNIDOR macro is for unary functions that can be followed by the //
265 * operator (such as C<shift // 0>).
267 #define UNI3(f,x,have_x) { \
268 pl_yylval.ival = f; \
269 if (have_x) PL_expect = x; \
271 PL_last_uni = PL_oldbufptr; \
272 PL_last_lop_op = f; \
274 return REPORT( (int)FUNC1 ); \
276 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
278 #define UNI(f) UNI3(f,XTERM,1)
279 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
280 #define UNIPROTO(f,optional) { \
281 if (optional) PL_last_uni = PL_oldbufptr; \
285 #define UNIBRACK(f) UNI3(f,0,0)
287 /* grandfather return to old style */
290 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
291 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
292 pl_yylval.ival = (f); \
298 #define COPLINE_INC_WITH_HERELINES \
300 CopLINE_inc(PL_curcop); \
301 if (PL_parser->herelines) \
302 CopLINE(PL_curcop) += PL_parser->herelines, \
303 PL_parser->herelines = 0; \
305 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
306 * is no sublex_push to follow. */
307 #define COPLINE_SET_FROM_MULTI_END \
309 CopLINE_set(PL_curcop, PL_multi_end); \
310 if (PL_multi_end != PL_multi_start) \
311 PL_parser->herelines = 0; \
317 /* how to interpret the pl_yylval associated with the token */
321 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
326 static struct debug_tokens {
328 enum token_type type;
330 } const debug_tokens[] =
332 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
333 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
334 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
335 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
336 { ARROW, TOKENTYPE_NONE, "ARROW" },
337 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
338 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
339 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
340 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
341 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
342 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
343 { DO, TOKENTYPE_NONE, "DO" },
344 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
345 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
346 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
347 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
348 { ELSE, TOKENTYPE_NONE, "ELSE" },
349 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
350 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
351 { FOR, TOKENTYPE_IVAL, "FOR" },
352 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
353 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
354 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
355 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
356 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
357 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
358 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
359 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
360 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
361 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
362 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
363 { IF, TOKENTYPE_IVAL, "IF" },
364 { LABEL, TOKENTYPE_PVAL, "LABEL" },
365 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
366 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
367 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
368 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
369 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
370 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
371 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
372 { MY, TOKENTYPE_IVAL, "MY" },
373 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
374 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
375 { OROP, TOKENTYPE_IVAL, "OROP" },
376 { OROR, TOKENTYPE_NONE, "OROR" },
377 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
378 { PEG, TOKENTYPE_NONE, "PEG" },
379 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
380 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
381 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
382 { POSTJOIN, TOKENTYPE_NONE, "POSTJOIN" },
383 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
384 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
385 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
386 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
387 { PREINC, TOKENTYPE_NONE, "PREINC" },
388 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
389 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
390 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
391 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
392 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
393 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
394 { SUB, TOKENTYPE_NONE, "SUB" },
395 { THING, TOKENTYPE_OPVAL, "THING" },
396 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
397 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
398 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
399 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
400 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
401 { USE, TOKENTYPE_IVAL, "USE" },
402 { WHEN, TOKENTYPE_IVAL, "WHEN" },
403 { WHILE, TOKENTYPE_IVAL, "WHILE" },
404 { WORD, TOKENTYPE_OPVAL, "WORD" },
405 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
406 { 0, TOKENTYPE_NONE, NULL }
409 /* dump the returned token in rv, plus any optional arg in pl_yylval */
412 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
416 PERL_ARGS_ASSERT_TOKEREPORT;
419 const char *name = NULL;
420 enum token_type type = TOKENTYPE_NONE;
421 const struct debug_tokens *p;
422 SV* const report = newSVpvs("<== ");
424 for (p = debug_tokens; p->token; p++) {
425 if (p->token == (int)rv) {
432 Perl_sv_catpv(aTHX_ report, name);
433 else if ((char)rv > ' ' && (char)rv <= '~')
435 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
437 sv_catpvs(report, " (pending identifier)");
440 sv_catpvs(report, "EOF");
442 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
447 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
449 case TOKENTYPE_OPNUM:
450 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
451 PL_op_name[lvalp->ival]);
454 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
456 case TOKENTYPE_OPVAL:
458 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
459 PL_op_name[lvalp->opval->op_type]);
460 if (lvalp->opval->op_type == OP_CONST) {
461 Perl_sv_catpvf(aTHX_ report, " %s",
462 SvPEEK(cSVOPx_sv(lvalp->opval)));
467 sv_catpvs(report, "(opval=null)");
470 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
476 /* print the buffer with suitable escapes */
479 S_printbuf(pTHX_ const char *const fmt, const char *const s)
481 SV* const tmp = newSVpvs("");
483 PERL_ARGS_ASSERT_PRINTBUF;
485 GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */
486 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
494 S_deprecate_commaless_var_list(pTHX) {
496 deprecate("comma-less variable list");
497 return REPORT(','); /* grandfather non-comma-format format */
503 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
504 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
508 S_ao(pTHX_ int toketype)
511 if (*PL_bufptr == '=') {
513 if (toketype == ANDAND)
514 pl_yylval.ival = OP_ANDASSIGN;
515 else if (toketype == OROR)
516 pl_yylval.ival = OP_ORASSIGN;
517 else if (toketype == DORDOR)
518 pl_yylval.ival = OP_DORASSIGN;
526 * When Perl expects an operator and finds something else, no_op
527 * prints the warning. It always prints "<something> found where
528 * operator expected. It prints "Missing semicolon on previous line?"
529 * if the surprise occurs at the start of the line. "do you need to
530 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
531 * where the compiler doesn't know if foo is a method call or a function.
532 * It prints "Missing operator before end of line" if there's nothing
533 * after the missing operator, or "... before <...>" if there is something
534 * after the missing operator.
538 S_no_op(pTHX_ const char *const what, char *s)
541 char * const oldbp = PL_bufptr;
542 const bool is_first = (PL_oldbufptr == PL_linestart);
544 PERL_ARGS_ASSERT_NO_OP;
550 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
551 if (ckWARN_d(WARN_SYNTAX)) {
553 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
554 "\t(Missing semicolon on previous line?)\n");
555 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
557 for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
558 t += UTF ? UTF8SKIP(t) : 1)
560 if (t < PL_bufptr && isSPACE(*t))
561 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
562 "\t(Do you need to predeclare %"UTF8f"?)\n",
563 UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
567 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
568 "\t(Missing operator before %"UTF8f"?)\n",
569 UTF8fARG(UTF, s - oldbp, oldbp));
577 * Complain about missing quote/regexp/heredoc terminator.
578 * If it's called with NULL then it cauterizes the line buffer.
579 * If we're in a delimited string and the delimiter is a control
580 * character, it's reformatted into a two-char sequence like ^C.
585 S_missingterm(pTHX_ char *s)
591 char * const nl = strrchr(s,'\n');
595 else if ((U8) PL_multi_close < 32) {
597 tmpbuf[1] = (char)toCTRL(PL_multi_close);
602 *tmpbuf = (char)PL_multi_close;
606 q = strchr(s,'"') ? '\'' : '"';
607 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
613 * Check whether the named feature is enabled.
616 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
619 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
621 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
623 assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
625 if (namelen > MAX_FEATURE_LEN)
627 memcpy(&he_name[8], name, namelen);
629 return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
630 REFCOUNTED_HE_EXISTS));
634 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
635 * utf16-to-utf8-reversed.
638 #ifdef PERL_CR_FILTER
642 const char *s = SvPVX_const(sv);
643 const char * const e = s + SvCUR(sv);
645 PERL_ARGS_ASSERT_STRIP_RETURN;
647 /* outer loop optimized to do nothing if there are no CR-LFs */
649 if (*s++ == '\r' && *s == '\n') {
650 /* hit a CR-LF, need to copy the rest */
654 if (*s == '\r' && s[1] == '\n')
665 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
667 const I32 count = FILTER_READ(idx+1, sv, maxlen);
668 if (count > 0 && !maxlen)
675 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
677 Creates and initialises a new lexer/parser state object, supplying
678 a context in which to lex and parse from a new source of Perl code.
679 A pointer to the new state object is placed in L</PL_parser>. An entry
680 is made on the save stack so that upon unwinding the new state object
681 will be destroyed and the former value of L</PL_parser> will be restored.
682 Nothing else need be done to clean up the parsing context.
684 The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
685 non-null, provides a string (in SV form) containing code to be parsed.
686 A copy of the string is made, so subsequent modification of I<line>
687 does not affect parsing. I<rsfp>, if non-null, provides an input stream
688 from which code will be read to be parsed. If both are non-null, the
689 code in I<line> comes first and must consist of complete lines of input,
690 and I<rsfp> supplies the remainder of the source.
692 The I<flags> parameter is reserved for future use. Currently it is only
693 used by perl internally, so extensions should always pass zero.
698 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
699 can share filters with the current parser.
700 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
701 caller, hence isn't owned by the parser, so shouldn't be closed on parser
702 destruction. This is used to handle the case of defaulting to reading the
703 script from the standard input because no filename was given on the command
704 line (without getting confused by situation where STDIN has been closed, so
705 the script handle is opened on fd 0) */
708 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
711 const char *s = NULL;
712 yy_parser *parser, *oparser;
713 if (flags && flags & ~LEX_START_FLAGS)
714 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
716 /* create and initialise a parser */
718 Newxz(parser, 1, yy_parser);
719 parser->old_parser = oparser = PL_parser;
722 parser->stack = NULL;
724 parser->stack_size = 0;
726 /* on scope exit, free this parser and restore any outer one */
728 parser->saved_curcop = PL_curcop;
730 /* initialise lexer state */
733 parser->curforce = -1;
735 parser->nexttoke = 0;
737 parser->error_count = oparser ? oparser->error_count : 0;
738 parser->copline = parser->preambling = NOLINE;
739 parser->lex_state = LEX_NORMAL;
740 parser->expect = XSTATE;
742 parser->rsfp_filters =
743 !(flags & LEX_START_SAME_FILTER) || !oparser
745 : MUTABLE_AV(SvREFCNT_inc(
746 oparser->rsfp_filters
747 ? oparser->rsfp_filters
748 : (oparser->rsfp_filters = newAV())
751 Newx(parser->lex_brackstack, 120, char);
752 Newx(parser->lex_casestack, 12, char);
753 *parser->lex_casestack = '\0';
754 Newxz(parser->lex_shared, 1, LEXSHARED);
758 s = SvPV_const(line, len);
759 parser->linestr = flags & LEX_START_COPIED
760 ? SvREFCNT_inc_simple_NN(line)
761 : newSVpvn_flags(s, len, SvUTF8(line));
762 sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2);
764 parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
766 parser->oldoldbufptr =
769 parser->linestart = SvPVX(parser->linestr);
770 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
771 parser->last_lop = parser->last_uni = NULL;
773 assert(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
774 |LEX_DONT_CLOSE_RSFP));
775 parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
776 |LEX_DONT_CLOSE_RSFP));
778 parser->in_pod = parser->filtered = 0;
782 /* delete a parser object */
785 Perl_parser_free(pTHX_ const yy_parser *parser)
787 PERL_ARGS_ASSERT_PARSER_FREE;
789 PL_curcop = parser->saved_curcop;
790 SvREFCNT_dec(parser->linestr);
792 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
793 PerlIO_clearerr(parser->rsfp);
794 else if (parser->rsfp && (!parser->old_parser ||
795 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
796 PerlIO_close(parser->rsfp);
797 SvREFCNT_dec(parser->rsfp_filters);
798 SvREFCNT_dec(parser->lex_stuff);
799 SvREFCNT_dec(parser->sublex_info.repl);
801 Safefree(parser->lex_brackstack);
802 Safefree(parser->lex_casestack);
803 Safefree(parser->lex_shared);
804 PL_parser = parser->old_parser;
809 Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab)
812 I32 nexttoke = parser->lasttoke;
814 I32 nexttoke = parser->nexttoke;
816 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
819 if (S_is_opval_token(parser->nexttoke[nexttoke].next_type
821 && parser->nexttoke[nexttoke].next_val.opval
822 && parser->nexttoke[nexttoke].next_val.opval->op_slabbed
823 && OpSLAB(parser->nexttoke[nexttoke].next_val.opval) == slab) {
824 op_free(parser->nexttoke[nexttoke].next_val.opval);
825 parser->nexttoke[nexttoke].next_val.opval = NULL;
828 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
829 && parser->nextval[nexttoke].opval
830 && parser->nextval[nexttoke].opval->op_slabbed
831 && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
832 op_free(parser->nextval[nexttoke].opval);
833 parser->nextval[nexttoke].opval = NULL;
841 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
843 Buffer scalar containing the chunk currently under consideration of the
844 text currently being lexed. This is always a plain string scalar (for
845 which C<SvPOK> is true). It is not intended to be used as a scalar by
846 normal scalar means; instead refer to the buffer directly by the pointer
847 variables described below.
849 The lexer maintains various C<char*> pointers to things in the
850 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
851 reallocated, all of these pointers must be updated. Don't attempt to
852 do this manually, but rather use L</lex_grow_linestr> if you need to
853 reallocate the buffer.
855 The content of the text chunk in the buffer is commonly exactly one
856 complete line of input, up to and including a newline terminator,
857 but there are situations where it is otherwise. The octets of the
858 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
859 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
860 flag on this scalar, which may disagree with it.
862 For direct examination of the buffer, the variable
863 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
864 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
865 of these pointers is usually preferable to examination of the scalar
866 through normal scalar means.
868 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
870 Direct pointer to the end of the chunk of text currently being lexed, the
871 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
872 + SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
873 always located at the end of the buffer, and does not count as part of
874 the buffer's contents.
876 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
878 Points to the current position of lexing inside the lexer buffer.
879 Characters around this point may be freely examined, within
880 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
881 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
882 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
884 Lexing code (whether in the Perl core or not) moves this pointer past
885 the characters that it consumes. It is also expected to perform some
886 bookkeeping whenever a newline character is consumed. This movement
887 can be more conveniently performed by the function L</lex_read_to>,
888 which handles newlines appropriately.
890 Interpretation of the buffer's octets can be abstracted out by
891 using the slightly higher-level functions L</lex_peek_unichar> and
892 L</lex_read_unichar>.
894 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
896 Points to the start of the current line inside the lexer buffer.
897 This is useful for indicating at which column an error occurred, and
898 not much else. This must be updated by any lexing code that consumes
899 a newline; the function L</lex_read_to> handles this detail.
905 =for apidoc Amx|bool|lex_bufutf8
907 Indicates whether the octets in the lexer buffer
908 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
909 of Unicode characters. If not, they should be interpreted as Latin-1
910 characters. This is analogous to the C<SvUTF8> flag for scalars.
912 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
913 contains valid UTF-8. Lexing code must be robust in the face of invalid
916 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
917 is significant, but not the whole story regarding the input character
918 encoding. Normally, when a file is being read, the scalar contains octets
919 and its C<SvUTF8> flag is off, but the octets should be interpreted as
920 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
921 however, the scalar may have the C<SvUTF8> flag on, and in this case its
922 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
923 is in effect. This logic may change in the future; use this function
924 instead of implementing the logic yourself.
930 Perl_lex_bufutf8(pTHX)
936 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
938 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
939 at least I<len> octets (including terminating NUL). Returns a
940 pointer to the reallocated buffer. This is necessary before making
941 any direct modification of the buffer that would increase its length.
942 L</lex_stuff_pvn> provides a more convenient way to insert text into
945 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
946 this function updates all of the lexer's variables that point directly
953 Perl_lex_grow_linestr(pTHX_ STRLEN len)
957 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
958 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
959 linestr = PL_parser->linestr;
960 buf = SvPVX(linestr);
961 if (len <= SvLEN(linestr))
963 bufend_pos = PL_parser->bufend - buf;
964 bufptr_pos = PL_parser->bufptr - buf;
965 oldbufptr_pos = PL_parser->oldbufptr - buf;
966 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
967 linestart_pos = PL_parser->linestart - buf;
968 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
969 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
970 re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
971 PL_parser->lex_shared->re_eval_start - buf : 0;
973 buf = sv_grow(linestr, len);
975 PL_parser->bufend = buf + bufend_pos;
976 PL_parser->bufptr = buf + bufptr_pos;
977 PL_parser->oldbufptr = buf + oldbufptr_pos;
978 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
979 PL_parser->linestart = buf + linestart_pos;
980 if (PL_parser->last_uni)
981 PL_parser->last_uni = buf + last_uni_pos;
982 if (PL_parser->last_lop)
983 PL_parser->last_lop = buf + last_lop_pos;
984 if (PL_parser->lex_shared->re_eval_start)
985 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
990 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
992 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
993 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
994 reallocating the buffer if necessary. This means that lexing code that
995 runs later will see the characters as if they had appeared in the input.
996 It is not recommended to do this as part of normal parsing, and most
997 uses of this facility run the risk of the inserted characters being
998 interpreted in an unintended manner.
1000 The string to be inserted is represented by I<len> octets starting
1001 at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
1002 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
1003 The characters are recoded for the lexer buffer, according to how the
1004 buffer is currently being interpreted (L</lex_bufutf8>). If a string
1005 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1006 function is more convenient.
1012 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1016 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1017 if (flags & ~(LEX_STUFF_UTF8))
1018 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1020 if (flags & LEX_STUFF_UTF8) {
1023 STRLEN highhalf = 0; /* Count of variants */
1024 const char *p, *e = pv+len;
1025 for (p = pv; p != e; p++) {
1026 if (! UTF8_IS_INVARIANT(*p)) {
1032 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1033 bufptr = PL_parser->bufptr;
1034 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1035 SvCUR_set(PL_parser->linestr,
1036 SvCUR(PL_parser->linestr) + len+highhalf);
1037 PL_parser->bufend += len+highhalf;
1038 for (p = pv; p != e; p++) {
1040 if (! UTF8_IS_INVARIANT(c)) {
1041 *bufptr++ = UTF8_TWO_BYTE_HI(c);
1042 *bufptr++ = UTF8_TWO_BYTE_LO(c);
1044 *bufptr++ = (char)c;
1049 if (flags & LEX_STUFF_UTF8) {
1050 STRLEN highhalf = 0;
1051 const char *p, *e = pv+len;
1052 for (p = pv; p != e; p++) {
1054 if (UTF8_IS_ABOVE_LATIN1(c)) {
1055 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1056 "non-Latin-1 character into Latin-1 input");
1057 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1060 } else if (! UTF8_IS_INVARIANT(c)) {
1061 /* malformed UTF-8 */
1063 SAVESPTR(PL_warnhook);
1064 PL_warnhook = PERL_WARNHOOK_FATAL;
1065 utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
1071 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1072 bufptr = PL_parser->bufptr;
1073 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1074 SvCUR_set(PL_parser->linestr,
1075 SvCUR(PL_parser->linestr) + len-highhalf);
1076 PL_parser->bufend += len-highhalf;
1079 if (UTF8_IS_INVARIANT(*p)) {
1085 *bufptr++ = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
1091 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1092 bufptr = PL_parser->bufptr;
1093 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1094 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1095 PL_parser->bufend += len;
1096 Copy(pv, bufptr, len, char);
1102 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1104 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1105 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1106 reallocating the buffer if necessary. This means that lexing code that
1107 runs later will see the characters as if they had appeared in the input.
1108 It is not recommended to do this as part of normal parsing, and most
1109 uses of this facility run the risk of the inserted characters being
1110 interpreted in an unintended manner.
1112 The string to be inserted is represented by octets starting at I<pv>
1113 and continuing to the first nul. These octets are interpreted as either
1114 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1115 in I<flags>. The characters are recoded for the lexer buffer, according
1116 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1117 If it is not convenient to nul-terminate a string to be inserted, the
1118 L</lex_stuff_pvn> function is more appropriate.
1124 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1126 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1127 lex_stuff_pvn(pv, strlen(pv), flags);
1131 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1133 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1134 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1135 reallocating the buffer if necessary. This means that lexing code that
1136 runs later will see the characters as if they had appeared in the input.
1137 It is not recommended to do this as part of normal parsing, and most
1138 uses of this facility run the risk of the inserted characters being
1139 interpreted in an unintended manner.
1141 The string to be inserted is the string value of I<sv>. The characters
1142 are recoded for the lexer buffer, according to how the buffer is currently
1143 being interpreted (L</lex_bufutf8>). If a string to be inserted is
1144 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1145 need to construct a scalar.
1151 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1155 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1157 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1159 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1163 =for apidoc Amx|void|lex_unstuff|char *ptr
1165 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1166 I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1167 This hides the discarded text from any lexing code that runs later,
1168 as if the text had never appeared.
1170 This is not the normal way to consume lexed text. For that, use
1177 Perl_lex_unstuff(pTHX_ char *ptr)
1181 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1182 buf = PL_parser->bufptr;
1184 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1187 bufend = PL_parser->bufend;
1189 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1190 unstuff_len = ptr - buf;
1191 Move(ptr, buf, bufend+1-ptr, char);
1192 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1193 PL_parser->bufend = bufend - unstuff_len;
1197 =for apidoc Amx|void|lex_read_to|char *ptr
1199 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1200 to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1201 performing the correct bookkeeping whenever a newline character is passed.
1202 This is the normal way to consume lexed text.
1204 Interpretation of the buffer's octets can be abstracted out by
1205 using the slightly higher-level functions L</lex_peek_unichar> and
1206 L</lex_read_unichar>.
1212 Perl_lex_read_to(pTHX_ char *ptr)
1215 PERL_ARGS_ASSERT_LEX_READ_TO;
1216 s = PL_parser->bufptr;
1217 if (ptr < s || ptr > PL_parser->bufend)
1218 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1219 for (; s != ptr; s++)
1221 COPLINE_INC_WITH_HERELINES;
1222 PL_parser->linestart = s+1;
1224 PL_parser->bufptr = ptr;
1228 =for apidoc Amx|void|lex_discard_to|char *ptr
1230 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1231 up to I<ptr>. The remaining content of the buffer will be moved, and
1232 all pointers into the buffer updated appropriately. I<ptr> must not
1233 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1234 it is not permitted to discard text that has yet to be lexed.
1236 Normally it is not necessarily to do this directly, because it suffices to
1237 use the implicit discarding behaviour of L</lex_next_chunk> and things
1238 based on it. However, if a token stretches across multiple lines,
1239 and the lexing code has kept multiple lines of text in the buffer for
1240 that purpose, then after completion of the token it would be wise to
1241 explicitly discard the now-unneeded earlier lines, to avoid future
1242 multi-line tokens growing the buffer without bound.
1248 Perl_lex_discard_to(pTHX_ char *ptr)
1252 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1253 buf = SvPVX(PL_parser->linestr);
1255 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1258 if (ptr > PL_parser->bufptr)
1259 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1260 discard_len = ptr - buf;
1261 if (PL_parser->oldbufptr < ptr)
1262 PL_parser->oldbufptr = ptr;
1263 if (PL_parser->oldoldbufptr < ptr)
1264 PL_parser->oldoldbufptr = ptr;
1265 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1266 PL_parser->last_uni = NULL;
1267 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1268 PL_parser->last_lop = NULL;
1269 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1270 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1271 PL_parser->bufend -= discard_len;
1272 PL_parser->bufptr -= discard_len;
1273 PL_parser->oldbufptr -= discard_len;
1274 PL_parser->oldoldbufptr -= discard_len;
1275 if (PL_parser->last_uni)
1276 PL_parser->last_uni -= discard_len;
1277 if (PL_parser->last_lop)
1278 PL_parser->last_lop -= discard_len;
1282 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1284 Reads in the next chunk of text to be lexed, appending it to
1285 L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1286 looked to the end of the current chunk and wants to know more. It is
1287 usual, but not necessary, for lexing to have consumed the entirety of
1288 the current chunk at this time.
1290 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1291 chunk (i.e., the current chunk has been entirely consumed), normally the
1292 current chunk will be discarded at the same time that the new chunk is
1293 read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1294 will not be discarded. If the current chunk has not been entirely
1295 consumed, then it will not be discarded regardless of the flag.
1297 Returns true if some new text was added to the buffer, or false if the
1298 buffer has reached the end of the input text.
1303 #define LEX_FAKE_EOF 0x80000000
1304 #define LEX_NO_TERM 0x40000000
1307 Perl_lex_next_chunk(pTHX_ U32 flags)
1311 STRLEN old_bufend_pos, new_bufend_pos;
1312 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1313 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1314 bool got_some_for_debugger = 0;
1316 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1317 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1318 linestr = PL_parser->linestr;
1319 buf = SvPVX(linestr);
1320 if (!(flags & LEX_KEEP_PREVIOUS) &&
1321 PL_parser->bufptr == PL_parser->bufend) {
1322 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1324 if (PL_parser->last_uni != PL_parser->bufend)
1325 PL_parser->last_uni = NULL;
1326 if (PL_parser->last_lop != PL_parser->bufend)
1327 PL_parser->last_lop = NULL;
1328 last_uni_pos = last_lop_pos = 0;
1332 old_bufend_pos = PL_parser->bufend - buf;
1333 bufptr_pos = PL_parser->bufptr - buf;
1334 oldbufptr_pos = PL_parser->oldbufptr - buf;
1335 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1336 linestart_pos = PL_parser->linestart - buf;
1337 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1338 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1340 if (flags & LEX_FAKE_EOF) {
1342 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1344 } else if (filter_gets(linestr, old_bufend_pos)) {
1346 got_some_for_debugger = 1;
1347 } else if (flags & LEX_NO_TERM) {
1350 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1351 sv_setpvs(linestr, "");
1353 /* End of real input. Close filehandle (unless it was STDIN),
1354 * then add implicit termination.
1356 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1357 PerlIO_clearerr(PL_parser->rsfp);
1358 else if (PL_parser->rsfp)
1359 (void)PerlIO_close(PL_parser->rsfp);
1360 PL_parser->rsfp = NULL;
1361 PL_parser->in_pod = PL_parser->filtered = 0;
1363 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1366 if (!PL_in_eval && PL_minus_p) {
1368 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1369 PL_minus_n = PL_minus_p = 0;
1370 } else if (!PL_in_eval && PL_minus_n) {
1371 sv_catpvs(linestr, /*{*/";}");
1374 sv_catpvs(linestr, ";");
1377 buf = SvPVX(linestr);
1378 new_bufend_pos = SvCUR(linestr);
1379 PL_parser->bufend = buf + new_bufend_pos;
1380 PL_parser->bufptr = buf + bufptr_pos;
1381 PL_parser->oldbufptr = buf + oldbufptr_pos;
1382 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1383 PL_parser->linestart = buf + linestart_pos;
1384 if (PL_parser->last_uni)
1385 PL_parser->last_uni = buf + last_uni_pos;
1386 if (PL_parser->last_lop)
1387 PL_parser->last_lop = buf + last_lop_pos;
1388 if (PL_parser->preambling != NOLINE) {
1389 CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1390 PL_parser->preambling = NOLINE;
1392 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1393 PL_curstash != PL_debstash) {
1394 /* debugger active and we're not compiling the debugger code,
1395 * so store the line into the debugger's array of lines
1397 update_debugger_info(NULL, buf+old_bufend_pos,
1398 new_bufend_pos-old_bufend_pos);
1404 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1406 Looks ahead one (Unicode) character in the text currently being lexed.
1407 Returns the codepoint (unsigned integer value) of the next character,
1408 or -1 if lexing has reached the end of the input text. To consume the
1409 peeked character, use L</lex_read_unichar>.
1411 If the next character is in (or extends into) the next chunk of input
1412 text, the next chunk will be read in. Normally the current chunk will be
1413 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1414 then the current chunk will not be discarded.
1416 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1417 is encountered, an exception is generated.
1423 Perl_lex_peek_unichar(pTHX_ U32 flags)
1427 if (flags & ~(LEX_KEEP_PREVIOUS))
1428 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1429 s = PL_parser->bufptr;
1430 bufend = PL_parser->bufend;
1436 if (!lex_next_chunk(flags))
1438 s = PL_parser->bufptr;
1439 bufend = PL_parser->bufend;
1442 if (UTF8_IS_INVARIANT(head))
1444 if (UTF8_IS_START(head)) {
1445 len = UTF8SKIP(&head);
1446 while ((STRLEN)(bufend-s) < len) {
1447 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1449 s = PL_parser->bufptr;
1450 bufend = PL_parser->bufend;
1453 unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1454 if (retlen == (STRLEN)-1) {
1455 /* malformed UTF-8 */
1457 SAVESPTR(PL_warnhook);
1458 PL_warnhook = PERL_WARNHOOK_FATAL;
1459 utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
1465 if (!lex_next_chunk(flags))
1467 s = PL_parser->bufptr;
1474 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1476 Reads the next (Unicode) character in the text currently being lexed.
1477 Returns the codepoint (unsigned integer value) of the character read,
1478 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1479 if lexing has reached the end of the input text. To non-destructively
1480 examine the next character, use L</lex_peek_unichar> instead.
1482 If the next character is in (or extends into) the next chunk of input
1483 text, the next chunk will be read in. Normally the current chunk will be
1484 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1485 then the current chunk will not be discarded.
1487 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1488 is encountered, an exception is generated.
1494 Perl_lex_read_unichar(pTHX_ U32 flags)
1497 if (flags & ~(LEX_KEEP_PREVIOUS))
1498 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1499 c = lex_peek_unichar(flags);
1502 COPLINE_INC_WITH_HERELINES;
1504 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1506 ++(PL_parser->bufptr);
1512 =for apidoc Amx|void|lex_read_space|U32 flags
1514 Reads optional spaces, in Perl style, in the text currently being
1515 lexed. The spaces may include ordinary whitespace characters and
1516 Perl-style comments. C<#line> directives are processed if encountered.
1517 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1518 at a non-space character (or the end of the input text).
1520 If spaces extend into the next chunk of input text, the next chunk will
1521 be read in. Normally the current chunk will be discarded at the same
1522 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1523 chunk will not be discarded.
1528 #define LEX_NO_INCLINE 0x40000000
1529 #define LEX_NO_NEXT_CHUNK 0x80000000
1532 Perl_lex_read_space(pTHX_ U32 flags)
1535 const bool can_incline = !(flags & LEX_NO_INCLINE);
1536 bool need_incline = 0;
1537 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1538 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1541 sv_free(PL_skipwhite);
1542 PL_skipwhite = NULL;
1545 PL_skipwhite = newSVpvs("");
1546 #endif /* PERL_MAD */
1547 s = PL_parser->bufptr;
1548 bufend = PL_parser->bufend;
1554 } while (!(c == '\n' || (c == 0 && s == bufend)));
1555 } else if (c == '\n') {
1558 PL_parser->linestart = s;
1564 } else if (isSPACE(c)) {
1566 } else if (c == 0 && s == bufend) {
1571 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1572 #endif /* PERL_MAD */
1573 if (flags & LEX_NO_NEXT_CHUNK)
1575 PL_parser->bufptr = s;
1576 l = CopLINE(PL_curcop);
1577 CopLINE(PL_curcop) += PL_parser->herelines + 1;
1578 got_more = lex_next_chunk(flags);
1579 CopLINE_set(PL_curcop, l);
1580 s = PL_parser->bufptr;
1581 bufend = PL_parser->bufend;
1584 if (can_incline && need_incline && PL_parser->rsfp) {
1594 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1595 #endif /* PERL_MAD */
1596 PL_parser->bufptr = s;
1601 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1603 This function performs syntax checking on a prototype, C<proto>.
1604 If C<warn> is true, any illegal characters or mismatched brackets
1605 will trigger illegalproto warnings, declaring that they were
1606 detected in the prototype for C<name>.
1608 The return value is C<true> if this is a valid prototype, and
1609 C<false> if it is not, regardless of whether C<warn> was C<true> or
1612 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1619 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
1621 STRLEN len, origlen;
1622 char *p = proto ? SvPV(proto, len) : NULL;
1623 bool bad_proto = FALSE;
1624 bool in_brackets = FALSE;
1625 bool after_slash = FALSE;
1626 char greedy_proto = ' ';
1627 bool proto_after_greedy_proto = FALSE;
1628 bool must_be_last = FALSE;
1629 bool underscore = FALSE;
1630 bool bad_proto_after_underscore = FALSE;
1632 PERL_ARGS_ASSERT_VALIDATE_PROTO;
1638 for (; len--; p++) {
1641 proto_after_greedy_proto = TRUE;
1643 if (!strchr(";@%", *p))
1644 bad_proto_after_underscore = TRUE;
1647 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1654 in_brackets = FALSE;
1655 else if ((*p == '@' || *p == '%') &&
1658 must_be_last = TRUE;
1667 after_slash = FALSE;
1672 SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1675 ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1676 origlen, UNI_DISPLAY_ISPRINT)
1677 : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1679 if (proto_after_greedy_proto)
1680 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1681 "Prototype after '%c' for %"SVf" : %s",
1682 greedy_proto, SVfARG(name), p);
1684 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1685 "Missing ']' in prototype for %"SVf" : %s",
1688 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1689 "Illegal character in prototype for %"SVf" : %s",
1691 if (bad_proto_after_underscore)
1692 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1693 "Illegal character after '_' in prototype for %"SVf" : %s",
1697 return (! (proto_after_greedy_proto || bad_proto) );
1702 * This subroutine has nothing to do with tilting, whether at windmills
1703 * or pinball tables. Its name is short for "increment line". It
1704 * increments the current line number in CopLINE(PL_curcop) and checks
1705 * to see whether the line starts with a comment of the form
1706 * # line 500 "foo.pm"
1707 * If so, it sets the current line number and file to the values in the comment.
1711 S_incline(pTHX_ const char *s)
1719 PERL_ARGS_ASSERT_INCLINE;
1721 COPLINE_INC_WITH_HERELINES;
1722 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1723 && s+1 == PL_bufend && *s == ';') {
1724 /* fake newline in string eval */
1725 CopLINE_dec(PL_curcop);
1730 while (SPACE_OR_TAB(*s))
1732 if (strnEQ(s, "line", 4))
1736 if (SPACE_OR_TAB(*s))
1740 while (SPACE_OR_TAB(*s))
1748 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1750 while (SPACE_OR_TAB(*s))
1752 if (*s == '"' && (t = strchr(s+1, '"'))) {
1758 while (!isSPACE(*t))
1762 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1764 if (*e != '\n' && *e != '\0')
1765 return; /* false alarm */
1767 line_num = atoi(n)-1;
1770 const STRLEN len = t - s;
1772 if (!PL_rsfp && !PL_parser->filtered) {
1773 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1774 * to *{"::_<newfilename"} */
1775 /* However, the long form of evals is only turned on by the
1776 debugger - usually they're "(eval %lu)" */
1777 GV * const cfgv = CopFILEGV(PL_curcop);
1780 STRLEN tmplen2 = len;
1784 if (tmplen2 + 2 <= sizeof smallbuf)
1787 Newx(tmpbuf2, tmplen2 + 2, char);
1792 memcpy(tmpbuf2 + 2, s, tmplen2);
1795 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1797 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1798 /* adjust ${"::_<newfilename"} to store the new file name */
1799 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1800 /* The line number may differ. If that is the case,
1801 alias the saved lines that are in the array.
1802 Otherwise alias the whole array. */
1803 if (CopLINE(PL_curcop) == line_num) {
1804 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1805 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1807 else if (GvAV(cfgv)) {
1808 AV * const av = GvAV(cfgv);
1809 const I32 start = CopLINE(PL_curcop)+1;
1810 I32 items = AvFILLp(av) - start;
1812 AV * const av2 = GvAVn(gv2);
1813 SV **svp = AvARRAY(av) + start;
1814 I32 l = (I32)line_num+1;
1816 av_store(av2, l++, SvREFCNT_inc(*svp++));
1821 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1824 CopFILE_free(PL_curcop);
1825 CopFILE_setn(PL_curcop, s, len);
1827 CopLINE_set(PL_curcop, line_num);
1830 #define skipspace(s) skipspace_flags(s, 0)
1833 /* skip space before PL_thistoken */
1836 S_skipspace0(pTHX_ char *s)
1838 PERL_ARGS_ASSERT_SKIPSPACE0;
1845 PL_thiswhite = newSVpvs("");
1846 sv_catsv(PL_thiswhite, PL_skipwhite);
1847 sv_free(PL_skipwhite);
1850 PL_realtokenstart = s - SvPVX(PL_linestr);
1854 /* skip space after PL_thistoken */
1857 S_skipspace1(pTHX_ char *s)
1859 const char *start = s;
1860 I32 startoff = start - SvPVX(PL_linestr);
1862 PERL_ARGS_ASSERT_SKIPSPACE1;
1867 start = SvPVX(PL_linestr) + startoff;
1868 if (!PL_thistoken && PL_realtokenstart >= 0) {
1869 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1870 PL_thistoken = newSVpvn(tstart, start - tstart);
1872 PL_realtokenstart = -1;
1875 PL_nextwhite = newSVpvs("");
1876 sv_catsv(PL_nextwhite, PL_skipwhite);
1877 sv_free(PL_skipwhite);
1884 S_skipspace2(pTHX_ char *s, SV **svp)
1887 const I32 startoff = s - SvPVX(PL_linestr);
1889 PERL_ARGS_ASSERT_SKIPSPACE2;
1892 if (!PL_madskills || !svp)
1894 start = SvPVX(PL_linestr) + startoff;
1895 if (!PL_thistoken && PL_realtokenstart >= 0) {
1896 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1897 PL_thistoken = newSVpvn(tstart, start - tstart);
1898 PL_realtokenstart = -1;
1902 *svp = newSVpvs("");
1903 sv_setsv(*svp, PL_skipwhite);
1904 sv_free(PL_skipwhite);
1913 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1915 AV *av = CopFILEAVx(PL_curcop);
1918 if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1920 sv = *av_fetch(av, 0, 1);
1921 SvUPGRADE(sv, SVt_PVMG);
1923 if (!SvPOK(sv)) sv_setpvs(sv,"");
1925 sv_catsv(sv, orig_sv);
1927 sv_catpvn(sv, buf, len);
1932 if (PL_parser->preambling == NOLINE)
1933 av_store(av, CopLINE(PL_curcop), sv);
1939 * Called to gobble the appropriate amount and type of whitespace.
1940 * Skips comments as well.
1944 S_skipspace_flags(pTHX_ char *s, U32 flags)
1948 #endif /* PERL_MAD */
1949 PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1952 sv_free(PL_skipwhite);
1953 PL_skipwhite = NULL;
1955 #endif /* PERL_MAD */
1956 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1957 while (s < PL_bufend && SPACE_OR_TAB(*s))
1960 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1962 lex_read_space(flags | LEX_KEEP_PREVIOUS |
1963 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1964 LEX_NO_NEXT_CHUNK : 0));
1966 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1967 if (PL_linestart > PL_bufptr)
1968 PL_bufptr = PL_linestart;
1973 PL_skipwhite = newSVpvn(start, s-start);
1974 #endif /* PERL_MAD */
1980 * Check the unary operators to ensure there's no ambiguity in how they're
1981 * used. An ambiguous piece of code would be:
1983 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1984 * the +5 is its argument.
1994 if (PL_oldoldbufptr != PL_last_uni)
1996 while (isSPACE(*PL_last_uni))
1999 while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
2001 if ((t = strchr(s, '(')) && t < PL_bufptr)
2004 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
2005 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
2006 (int)(s - PL_last_uni), PL_last_uni);
2010 * LOP : macro to build a list operator. Its behaviour has been replaced
2011 * with a subroutine, S_lop() for which LOP is just another name.
2014 #define LOP(f,x) return lop(f,x,s)
2018 * Build a list operator (or something that might be one). The rules:
2019 * - if we have a next token, then it's a list operator [why?]
2020 * - if the next thing is an opening paren, then it's a function
2021 * - else it's a list operator
2025 S_lop(pTHX_ I32 f, int x, char *s)
2029 PERL_ARGS_ASSERT_LOP;
2035 PL_last_lop = PL_oldbufptr;
2036 PL_last_lop_op = (OPCODE)f;
2045 return REPORT(FUNC);
2048 return REPORT(FUNC);
2051 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
2052 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2053 return REPORT(LSTOP);
2060 * Sets up for an eventual force_next(). start_force(0) basically does
2061 * an unshift, while start_force(-1) does a push. yylex removes items
2066 S_start_force(pTHX_ int where)
2070 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
2071 where = PL_lasttoke;
2072 assert(PL_curforce < 0 || PL_curforce == where);
2073 if (PL_curforce != where) {
2074 for (i = PL_lasttoke; i > where; --i) {
2075 PL_nexttoke[i] = PL_nexttoke[i-1];
2079 if (PL_curforce < 0) /* in case of duplicate start_force() */
2080 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
2081 PL_curforce = where;
2084 curmad('^', newSVpvs(""));
2085 CURMAD('_', PL_nextwhite);
2090 S_curmad(pTHX_ char slot, SV *sv)
2096 if (PL_curforce < 0)
2097 where = &PL_thismad;
2099 where = &PL_nexttoke[PL_curforce].next_mad;
2105 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
2107 else if (PL_encoding) {
2108 sv_recode_to_utf8(sv, PL_encoding);
2113 /* keep a slot open for the head of the list? */
2114 if (slot != '_' && *where && (*where)->mad_key == '^') {
2115 (*where)->mad_key = slot;
2116 sv_free(MUTABLE_SV(((*where)->mad_val)));
2117 (*where)->mad_val = (void*)sv;
2120 addmad(newMADsv(slot, sv), where, 0);
2123 # define start_force(where) NOOP
2124 # define curmad(slot, sv) NOOP
2129 * When the lexer realizes it knows the next token (for instance,
2130 * it is reordering tokens for the parser) then it can call S_force_next
2131 * to know what token to return the next time the lexer is called. Caller
2132 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
2133 * and possibly PL_expect to ensure the lexer handles the token correctly.
2137 S_force_next(pTHX_ I32 type)
2142 PerlIO_printf(Perl_debug_log, "### forced token:\n");
2143 tokereport(type, &NEXTVAL_NEXTTOKE);
2147 if (PL_curforce < 0)
2148 start_force(PL_lasttoke);
2149 PL_nexttoke[PL_curforce].next_type = type;
2150 if (PL_lex_state != LEX_KNOWNEXT)
2151 PL_lex_defer = PL_lex_state;
2152 PL_lex_state = LEX_KNOWNEXT;
2153 PL_lex_expect = PL_expect;
2156 PL_nexttype[PL_nexttoke] = type;
2158 if (PL_lex_state != LEX_KNOWNEXT) {
2159 PL_lex_defer = PL_lex_state;
2160 PL_lex_expect = PL_expect;
2161 PL_lex_state = LEX_KNOWNEXT;
2169 * This subroutine handles postfix deref syntax after the arrow has already
2170 * been emitted. @* $* etc. are emitted as two separate token right here.
2171 * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2172 * only the first, leaving yylex to find the next.
2176 S_postderef(pTHX_ int const funny, char const next)
2179 assert(funny == DOLSHARP || strchr("$@%&*", funny));
2180 assert(strchr("*[{", next));
2182 PL_expect = XOPERATOR;
2183 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2184 assert('@' == funny || '$' == funny || DOLSHARP == funny);
2185 PL_lex_state = LEX_INTERPEND;
2186 start_force(PL_curforce);
2187 force_next(POSTJOIN);
2189 start_force(PL_curforce);
2194 if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
2195 && !PL_lex_brackets)
2197 PL_expect = XOPERATOR;
2206 int yyc = PL_parser->yychar;
2207 if (yyc != YYEMPTY) {
2210 NEXTVAL_NEXTTOKE = PL_parser->yylval;
2211 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2212 PL_lex_allbrackets--;
2214 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2215 } else if (yyc == '('/*)*/) {
2216 PL_lex_allbrackets--;
2221 PL_parser->yychar = YYEMPTY;
2226 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2229 SV * const sv = newSVpvn_utf8(start, len,
2232 && !is_ascii_string((const U8*)start, len)
2233 && is_utf8_string((const U8*)start, len));
2239 * When the lexer knows the next thing is a word (for instance, it has
2240 * just seen -> and it knows that the next char is a word char, then
2241 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2245 * char *start : buffer position (must be within PL_linestr)
2246 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2247 * int check_keyword : if true, Perl checks to make sure the word isn't
2248 * a keyword (do this if the word is a label, e.g. goto FOO)
2249 * int allow_pack : if true, : characters will also be allowed (require,
2250 * use, etc. do this)
2251 * int allow_initial_tick : used by the "sub" lexer only.
2255 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2261 PERL_ARGS_ASSERT_FORCE_WORD;
2263 start = SKIPSPACE1(start);
2265 if (isIDFIRST_lazy_if(s,UTF) ||
2266 (allow_pack && *s == ':') )
2268 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2269 if (check_keyword) {
2270 char *s2 = PL_tokenbuf;
2271 if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
2273 if (keyword(s2, len, 0))
2276 start_force(PL_curforce);
2278 curmad('X', newSVpvn(start,s-start));
2279 if (token == METHOD) {
2284 PL_expect = XOPERATOR;
2288 curmad('g', newSVpvs( "forced" ));
2289 NEXTVAL_NEXTTOKE.opval
2290 = (OP*)newSVOP(OP_CONST,0,
2291 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2292 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2300 * Called when the lexer wants $foo *foo &foo etc, but the program
2301 * text only contains the "foo" portion. The first argument is a pointer
2302 * to the "foo", and the second argument is the type symbol to prefix.
2303 * Forces the next token to be a "WORD".
2304 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2308 S_force_ident(pTHX_ const char *s, int kind)
2312 PERL_ARGS_ASSERT_FORCE_IDENT;
2315 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2316 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2317 UTF ? SVf_UTF8 : 0));
2318 start_force(PL_curforce);
2319 NEXTVAL_NEXTTOKE.opval = o;
2322 o->op_private = OPpCONST_ENTERED;
2323 /* XXX see note in pp_entereval() for why we forgo typo
2324 warnings if the symbol must be introduced in an eval.
2326 gv_fetchpvn_flags(s, len,
2327 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2328 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2329 kind == '$' ? SVt_PV :
2330 kind == '@' ? SVt_PVAV :
2331 kind == '%' ? SVt_PVHV :
2339 S_force_ident_maybe_lex(pTHX_ char pit)
2341 start_force(PL_curforce);
2342 NEXTVAL_NEXTTOKE.ival = pit;
2347 Perl_str_to_version(pTHX_ SV *sv)
2352 const char *start = SvPV_const(sv,len);
2353 const char * const end = start + len;
2354 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2356 PERL_ARGS_ASSERT_STR_TO_VERSION;
2358 while (start < end) {
2362 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2367 retval += ((NV)n)/nshift;
2376 * Forces the next token to be a version number.
2377 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2378 * and if "guessing" is TRUE, then no new token is created (and the caller
2379 * must use an alternative parsing method).
2383 S_force_version(pTHX_ char *s, int guessing)
2389 I32 startoff = s - SvPVX(PL_linestr);
2392 PERL_ARGS_ASSERT_FORCE_VERSION;
2400 while (isDIGIT(*d) || *d == '_' || *d == '.')
2404 start_force(PL_curforce);
2405 curmad('X', newSVpvn(s,d-s));
2408 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2410 s = scan_num(s, &pl_yylval);
2411 version = pl_yylval.opval;
2412 ver = cSVOPx(version)->op_sv;
2413 if (SvPOK(ver) && !SvNIOK(ver)) {
2414 SvUPGRADE(ver, SVt_PVNV);
2415 SvNV_set(ver, str_to_version(ver));
2416 SvNOK_on(ver); /* hint that it is a version */
2419 else if (guessing) {
2422 sv_free(PL_nextwhite); /* let next token collect whitespace */
2424 s = SvPVX(PL_linestr) + startoff;
2432 if (PL_madskills && !version) {
2433 sv_free(PL_nextwhite); /* let next token collect whitespace */
2435 s = SvPVX(PL_linestr) + startoff;
2438 /* NOTE: The parser sees the package name and the VERSION swapped */
2439 start_force(PL_curforce);
2440 NEXTVAL_NEXTTOKE.opval = version;
2447 * S_force_strict_version
2448 * Forces the next token to be a version number using strict syntax rules.
2452 S_force_strict_version(pTHX_ char *s)
2457 I32 startoff = s - SvPVX(PL_linestr);
2459 const char *errstr = NULL;
2461 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2463 while (isSPACE(*s)) /* leading whitespace */
2466 if (is_STRICT_VERSION(s,&errstr)) {
2468 s = (char *)scan_version(s, ver, 0);
2469 version = newSVOP(OP_CONST, 0, ver);
2471 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2472 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2476 yyerror(errstr); /* version required */
2481 if (PL_madskills && !version) {
2482 sv_free(PL_nextwhite); /* let next token collect whitespace */
2484 s = SvPVX(PL_linestr) + startoff;
2487 /* NOTE: The parser sees the package name and the VERSION swapped */
2488 start_force(PL_curforce);
2489 NEXTVAL_NEXTTOKE.opval = version;
2497 * Tokenize a quoted string passed in as an SV. It finds the next
2498 * chunk, up to end of string or a backslash. It may make a new
2499 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2504 S_tokeq(pTHX_ SV *sv)
2512 PERL_ARGS_ASSERT_TOKEQ;
2516 assert (!SvIsCOW(sv));
2517 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2521 /* This is relying on the SV being "well formed" with a trailing '\0' */
2522 while (s < send && !(*s == '\\' && s[1] == '\\'))
2527 if ( PL_hints & HINT_NEW_STRING ) {
2528 pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2529 SVs_TEMP | SvUTF8(sv));
2533 if (s + 1 < send && (s[1] == '\\'))
2534 s++; /* all that, just for this */
2539 SvCUR_set(sv, d - SvPVX_const(sv));
2541 if ( PL_hints & HINT_NEW_STRING )
2542 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2547 * Now come three functions related to double-quote context,
2548 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2549 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2550 * interact with PL_lex_state, and create fake ( ... ) argument lists
2551 * to handle functions and concatenation.
2555 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
2560 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2562 * Pattern matching will set PL_lex_op to the pattern-matching op to
2563 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2565 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2567 * Everything else becomes a FUNC.
2569 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2570 * had an OP_CONST or OP_READLINE). This just sets us up for a
2571 * call to S_sublex_push().
2575 S_sublex_start(pTHX)
2578 const I32 op_type = pl_yylval.ival;
2580 if (op_type == OP_NULL) {
2581 pl_yylval.opval = PL_lex_op;
2585 if (op_type == OP_CONST) {
2586 SV *sv = tokeq(PL_lex_stuff);
2588 if (SvTYPE(sv) == SVt_PVIV) {
2589 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2591 const char * const p = SvPV_const(sv, len);
2592 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2596 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2597 PL_lex_stuff = NULL;
2601 PL_sublex_info.super_state = PL_lex_state;
2602 PL_sublex_info.sub_inwhat = (U16)op_type;
2603 PL_sublex_info.sub_op = PL_lex_op;
2604 PL_lex_state = LEX_INTERPPUSH;
2608 pl_yylval.opval = PL_lex_op;
2618 * Create a new scope to save the lexing state. The scope will be
2619 * ended in S_sublex_done. Returns a '(', starting the function arguments
2620 * to the uc, lc, etc. found before.
2621 * Sets PL_lex_state to LEX_INTERPCONCAT.
2629 const bool is_heredoc = PL_multi_close == '<';
2632 PL_lex_state = PL_sublex_info.super_state;
2633 SAVEI8(PL_lex_dojoin);
2634 SAVEI32(PL_lex_brackets);
2635 SAVEI32(PL_lex_allbrackets);
2636 SAVEI32(PL_lex_formbrack);
2637 SAVEI8(PL_lex_fakeeof);
2638 SAVEI32(PL_lex_casemods);
2639 SAVEI32(PL_lex_starts);
2640 SAVEI8(PL_lex_state);
2641 SAVESPTR(PL_lex_repl);
2642 SAVEVPTR(PL_lex_inpat);
2643 SAVEI16(PL_lex_inwhat);
2646 SAVECOPLINE(PL_curcop);
2647 SAVEI32(PL_multi_end);
2648 SAVEI32(PL_parser->herelines);
2649 PL_parser->herelines = 0;
2651 SAVEI8(PL_multi_close);
2652 SAVEPPTR(PL_bufptr);
2653 SAVEPPTR(PL_bufend);
2654 SAVEPPTR(PL_oldbufptr);
2655 SAVEPPTR(PL_oldoldbufptr);
2656 SAVEPPTR(PL_last_lop);
2657 SAVEPPTR(PL_last_uni);
2658 SAVEPPTR(PL_linestart);
2659 SAVESPTR(PL_linestr);
2660 SAVEGENERICPV(PL_lex_brackstack);
2661 SAVEGENERICPV(PL_lex_casestack);
2662 SAVEGENERICPV(PL_parser->lex_shared);
2663 SAVEBOOL(PL_parser->lex_re_reparsing);
2664 SAVEI32(PL_copline);
2666 /* The here-doc parser needs to be able to peek into outer lexing
2667 scopes to find the body of the here-doc. So we put PL_linestr and
2668 PL_bufptr into lex_shared, to ‘share’ those values.
2670 PL_parser->lex_shared->ls_linestr = PL_linestr;
2671 PL_parser->lex_shared->ls_bufptr = PL_bufptr;
2673 PL_linestr = PL_lex_stuff;
2674 PL_lex_repl = PL_sublex_info.repl;
2675 PL_lex_stuff = NULL;
2676 PL_sublex_info.repl = NULL;
2678 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2679 = SvPVX(PL_linestr);
2680 PL_bufend += SvCUR(PL_linestr);
2681 PL_last_lop = PL_last_uni = NULL;
2682 SAVEFREESV(PL_linestr);
2683 if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2685 PL_lex_dojoin = FALSE;
2686 PL_lex_brackets = PL_lex_formbrack = 0;
2687 PL_lex_allbrackets = 0;
2688 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2689 Newx(PL_lex_brackstack, 120, char);
2690 Newx(PL_lex_casestack, 12, char);
2691 PL_lex_casemods = 0;
2692 *PL_lex_casestack = '\0';
2694 PL_lex_state = LEX_INTERPCONCAT;
2696 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2697 PL_copline = NOLINE;
2699 Newxz(shared, 1, LEXSHARED);
2700 shared->ls_prev = PL_parser->lex_shared;
2701 PL_parser->lex_shared = shared;
2703 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2704 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2705 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2706 PL_lex_inpat = PL_sublex_info.sub_op;
2708 PL_lex_inpat = NULL;
2710 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2711 PL_in_eval &= ~EVAL_RE_REPARSING;
2718 * Restores lexer state after a S_sublex_push.
2725 if (!PL_lex_starts++) {
2726 SV * const sv = newSVpvs("");
2727 if (SvUTF8(PL_linestr))
2729 PL_expect = XOPERATOR;
2730 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2734 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2735 PL_lex_state = LEX_INTERPCASEMOD;
2739 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2740 assert(PL_lex_inwhat != OP_TRANSR);
2742 assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2743 PL_linestr = PL_lex_repl;
2745 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2746 PL_bufend += SvCUR(PL_linestr);
2747 PL_last_lop = PL_last_uni = NULL;
2748 PL_lex_dojoin = FALSE;
2749 PL_lex_brackets = 0;
2750 PL_lex_allbrackets = 0;
2751 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2752 PL_lex_casemods = 0;
2753 *PL_lex_casestack = '\0';
2755 if (SvEVALED(PL_lex_repl)) {
2756 PL_lex_state = LEX_INTERPNORMAL;
2758 /* we don't clear PL_lex_repl here, so that we can check later
2759 whether this is an evalled subst; that means we rely on the
2760 logic to ensure sublex_done() is called again only via the
2761 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2764 PL_lex_state = LEX_INTERPCONCAT;
2767 if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2768 CopLINE(PL_curcop) +=
2769 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xpad_cop_seq.xlow
2770 + PL_parser->herelines;
2771 PL_parser->herelines = 0;
2776 const line_t l = CopLINE(PL_curcop);
2781 PL_endwhite = newSVpvs("");
2782 sv_catsv(PL_endwhite, PL_thiswhite);
2786 sv_setpvs(PL_thistoken,"");
2788 PL_realtokenstart = -1;
2792 if (PL_multi_close == '<')
2793 PL_parser->herelines += l - PL_multi_end;
2794 PL_bufend = SvPVX(PL_linestr);
2795 PL_bufend += SvCUR(PL_linestr);
2796 PL_expect = XOPERATOR;
2797 PL_sublex_info.sub_inwhat = 0;
2802 PERL_STATIC_INLINE SV*
2803 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2805 /* <s> points to first character of interior of \N{}, <e> to one beyond the
2806 * interior, hence to the "}". Finds what the name resolves to, returning
2807 * an SV* containing it; NULL if no valid one found */
2809 SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2816 const U8* first_bad_char_loc;
2817 const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2819 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2821 if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2823 &first_bad_char_loc))
2825 /* If warnings are on, this will print a more detailed analysis of what
2826 * is wrong than the error message below */
2827 utf8n_to_uvchr(first_bad_char_loc,
2828 e - ((char *) first_bad_char_loc),
2831 /* We deliberately don't try to print the malformed character, which
2832 * might not print very well; it also may be just the first of many
2833 * malformations, so don't print what comes after it */
2834 yyerror(Perl_form(aTHX_
2835 "Malformed UTF-8 character immediately after '%.*s'",
2836 (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
2840 res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2841 /* include the <}> */
2842 e - backslash_ptr + 1);
2844 SvREFCNT_dec_NN(res);
2848 /* See if the charnames handler is the Perl core's, and if so, we can skip
2849 * the validation needed for a user-supplied one, as Perl's does its own
2851 table = GvHV(PL_hintgv); /* ^H */
2852 cvp = hv_fetchs(table, "charnames", FALSE);
2853 if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2854 SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2856 const char * const name = HvNAME(stash);
2857 if (HvNAMELEN(stash) == sizeof("_charnames")-1
2858 && strEQ(name, "_charnames")) {
2863 /* Here, it isn't Perl's charname handler. We can't rely on a
2864 * user-supplied handler to validate the input name. For non-ut8 input,
2865 * look to see that the first character is legal. Then loop through the
2866 * rest checking that each is a continuation */
2868 /* This code needs to be sync'ed with a regex in _charnames.pm which does
2872 if (! isALPHAU(*s)) {
2877 if (! isCHARNAME_CONT(*s)) {
2880 if (*s == ' ' && *(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2881 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2882 "A sequence of multiple spaces in a charnames "
2883 "alias definition is deprecated");
2887 if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2888 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2889 "Trailing white-space in a charnames alias "
2890 "definition is deprecated");
2894 /* Similarly for utf8. For invariants can check directly; for other
2895 * Latin1, can calculate their code point and check; otherwise use a
2897 if (UTF8_IS_INVARIANT(*s)) {
2898 if (! isALPHAU(*s)) {
2902 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2903 if (! isALPHAU(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) {
2909 if (! PL_utf8_charname_begin) {
2910 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2911 PL_utf8_charname_begin = _core_swash_init("utf8",
2912 "_Perl_Charname_Begin",
2914 1, 0, NULL, &flags);
2916 if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2923 if (UTF8_IS_INVARIANT(*s)) {
2924 if (! isCHARNAME_CONT(*s)) {
2927 if (*s == ' ' && *(s-1) == ' '
2928 && ckWARN_d(WARN_DEPRECATED)) {
2929 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2930 "A sequence of multiple spaces in a charnam"
2931 "es alias definition is deprecated");
2935 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2936 if (! isCHARNAME_CONT(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1))))
2943 if (! PL_utf8_charname_continue) {
2944 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2945 PL_utf8_charname_continue = _core_swash_init("utf8",
2946 "_Perl_Charname_Continue",
2948 1, 0, NULL, &flags);
2950 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2956 if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2957 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2958 "Trailing white-space in a charnames alias "
2959 "definition is deprecated");
2963 if (SvUTF8(res)) { /* Don't accept malformed input */
2964 const U8* first_bad_char_loc;
2966 const char* const str = SvPV_const(res, len);
2967 if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2968 /* If warnings are on, this will print a more detailed analysis of
2969 * what is wrong than the error message below */
2970 utf8n_to_uvchr(first_bad_char_loc,
2971 (char *) first_bad_char_loc - str,
2974 /* We deliberately don't try to print the malformed character,
2975 * which might not print very well; it also may be just the first
2976 * of many malformations, so don't print what comes after it */
2979 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2980 (int) (e - backslash_ptr + 1), backslash_ptr,
2981 (int) ((char *) first_bad_char_loc - str), str
2991 int bad_char_size = ((UTF) ? UTF8SKIP(s) : 1);
2993 /* The final %.*s makes sure that should the trailing NUL be missing
2994 * that this print won't run off the end of the string */
2997 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2998 (int)(s - backslash_ptr + bad_char_size), backslash_ptr,
2999 (int)(e - s + bad_char_size), s + bad_char_size
3001 UTF ? SVf_UTF8 : 0);
3009 Extracts the next constant part of a pattern, double-quoted string,
3010 or transliteration. This is terrifying code.
3012 For example, in parsing the double-quoted string "ab\x63$d", it would
3013 stop at the '$' and return an OP_CONST containing 'abc'.
3015 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3016 processing a pattern (PL_lex_inpat is true), a transliteration
3017 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
3019 Returns a pointer to the character scanned up to. If this is
3020 advanced from the start pointer supplied (i.e. if anything was
3021 successfully parsed), will leave an OP_CONST for the substring scanned
3022 in pl_yylval. Caller must intuit reason for not parsing further
3023 by looking at the next characters herself.
3027 \N{FOO} => \N{U+hex_for_character_FOO}
3028 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
3031 all other \-char, including \N and \N{ apart from \N{ABC}
3034 @ and $ where it appears to be a var, but not for $ as tail anchor
3039 In transliterations:
3040 characters are VERY literal, except for - not at the start or end
3041 of the string, which indicates a range. If the range is in bytes,
3042 scan_const expands the range to the full set of intermediate
3043 characters. If the range is in utf8, the hyphen is replaced with
3044 a certain range mark which will be handled by pmtrans() in op.c.
3046 In double-quoted strings:
3048 double-quoted style: \r and \n
3049 constants: \x31, etc.
3050 deprecated backrefs: \1 (in substitution replacements)
3051 case and quoting: \U \Q \E
3054 scan_const does *not* construct ops to handle interpolated strings.
3055 It stops processing as soon as it finds an embedded $ or @ variable
3056 and leaves it to the caller to work out what's going on.
3058 embedded arrays (whether in pattern or not) could be:
3059 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
3061 $ in double-quoted strings must be the symbol of an embedded scalar.
3063 $ in pattern could be $foo or could be tail anchor. Assumption:
3064 it's a tail anchor if $ is the last thing in the string, or if it's
3065 followed by one of "()| \r\n\t"
3067 \1 (backreferences) are turned into $1 in substitutions
3069 The structure of the code is
3070 while (there's a character to process) {
3071 handle transliteration ranges
3072 skip regexp comments /(?#comment)/ and codes /(?{code})/
3073 skip #-initiated comments in //x patterns
3074 check for embedded arrays
3075 check for embedded scalars
3077 deprecate \1 in substitution replacements
3078 handle string-changing backslashes \l \U \Q \E, etc.
3079 switch (what was escaped) {
3080 handle \- in a transliteration (becomes a literal -)
3081 if a pattern and not \N{, go treat as regular character
3082 handle \132 (octal characters)
3083 handle \x15 and \x{1234} (hex characters)
3084 handle \N{name} (named characters, also \N{3,5} in a pattern)
3085 handle \cV (control characters)
3086 handle printf-style backslashes (\f, \r, \n, etc)
3089 } (end if backslash)
3090 handle regular character
3091 } (end while character to read)
3096 S_scan_const(pTHX_ char *start)
3099 char *send = PL_bufend; /* end of the constant */
3100 SV *sv = newSV(send - start); /* sv for the constant. See
3101 note below on sizing. */
3102 char *s = start; /* start of the constant */
3103 char *d = SvPVX(sv); /* destination for copies */
3104 bool dorange = FALSE; /* are we in a translit range? */
3105 bool didrange = FALSE; /* did we just finish a range? */
3106 bool in_charclass = FALSE; /* within /[...]/ */
3107 bool has_utf8 = FALSE; /* Output constant is UTF8 */
3108 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
3109 to be UTF8? But, this can
3110 show as true when the source
3111 isn't utf8, as for example
3112 when it is entirely composed
3114 SV *res; /* result from charnames */
3116 /* Note on sizing: The scanned constant is placed into sv, which is
3117 * initialized by newSV() assuming one byte of output for every byte of
3118 * input. This routine expects newSV() to allocate an extra byte for a
3119 * trailing NUL, which this routine will append if it gets to the end of
3120 * the input. There may be more bytes of input than output (eg., \N{LATIN
3121 * CAPITAL LETTER A}), or more output than input if the constant ends up
3122 * recoded to utf8, but each time a construct is found that might increase
3123 * the needed size, SvGROW() is called. Its size parameter each time is
3124 * based on the best guess estimate at the time, namely the length used so
3125 * far, plus the length the current construct will occupy, plus room for
3126 * the trailing NUL, plus one byte for every input byte still unscanned */
3128 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3131 UV literal_endpoint = 0;
3132 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
3135 PERL_ARGS_ASSERT_SCAN_CONST;
3137 assert(PL_lex_inwhat != OP_TRANSR);
3138 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3139 /* If we are doing a trans and we know we want UTF8 set expectation */
3140 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
3141 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3144 /* Protect sv from errors and fatal warnings. */
3145 ENTER_with_name("scan_const");
3148 while (s < send || dorange) {
3150 /* get transliterations out of the way (they're most literal) */
3151 if (PL_lex_inwhat == OP_TRANS) {
3152 /* expand a range A-Z to the full set of characters. AIE! */
3154 I32 i; /* current expanded character */
3155 I32 min; /* first character in range */
3156 I32 max; /* last character in range */
3167 char * const c = (char*)utf8_hop((U8*)d, -1);
3171 *c = (char) ILLEGAL_UTF8_BYTE;
3172 /* mark the range as done, and continue */
3178 i = d - SvPVX_const(sv); /* remember current offset */
3181 SvLEN(sv) + (has_utf8 ?
3182 (512 - UTF_CONTINUATION_MARK +
3185 /* How many two-byte within 0..255: 128 in UTF-8,
3186 * 96 in UTF-8-mod. */
3188 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
3190 d = SvPVX(sv) + i; /* refresh d after realloc */
3194 for (j = 0; j <= 1; j++) {
3195 char * const c = (char*)utf8_hop((U8*)d, -1);
3196 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
3202 max = (U8)0xff; /* only to \xff */
3203 uvmax = uv; /* \x{100} to uvmax */
3205 d = c; /* eat endpoint chars */
3210 d -= 2; /* eat the first char and the - */
3211 min = (U8)*d; /* first char in range */
3212 max = (U8)d[1]; /* last char in range */
3219 "Invalid range \"%c-%c\" in transliteration operator",
3220 (char)min, (char)max);
3224 if (literal_endpoint == 2 &&
3225 ((isLOWER_A(min) && isLOWER_A(max)) ||
3226 (isUPPER_A(min) && isUPPER_A(max))))
3228 for (i = min; i <= max; i++) {
3235 for (i = min; i <= max; i++)
3238 append_utf8_from_native_byte(i, &d);
3246 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
3248 *d++ = (char) ILLEGAL_UTF8_BYTE;
3250 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
3254 /* mark the range as done, and continue */
3258 literal_endpoint = 0;
3263 /* range begins (ignore - as first or last char) */
3264 else if (*s == '-' && s+1 < send && s != start) {
3266 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
3273 *d++ = (char) ILLEGAL_UTF8_BYTE; /* use illegal utf8 byte--see pmtrans */
3283 literal_endpoint = 0;
3284 native_range = TRUE;
3289 /* if we get here, we're not doing a transliteration */
3291 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3294 while (s1 >= start && *s1-- == '\\')
3297 in_charclass = TRUE;
3300 else if (*s == ']' && PL_lex_inpat && in_charclass) {
3303 while (s1 >= start && *s1-- == '\\')
3306 in_charclass = FALSE;
3309 /* skip for regexp comments /(?#comment)/, except for the last
3310 * char, which will be done separately.
3311 * Stop on (?{..}) and friends */
3313 else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3315 while (s+1 < send && *s != ')')
3318 else if (!PL_lex_casemods &&
3319 ( s[2] == '{' /* This should match regcomp.c */
3320 || (s[2] == '?' && s[3] == '{')))
3326 /* likewise skip #-initiated comments in //x patterns */
3327 else if (*s == '#' && PL_lex_inpat && !in_charclass &&
3328 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
3329 while (s+1 < send && *s != '\n')
3333 /* no further processing of single-quoted regex */
3334 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3335 goto default_action;
3337 /* check for embedded arrays
3338 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3340 else if (*s == '@' && s[1]) {
3341 if (isWORDCHAR_lazy_if(s+1,UTF))
3343 if (strchr(":'{$", s[1]))
3345 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3346 break; /* in regexp, neither @+ nor @- are interpolated */
3349 /* check for embedded scalars. only stop if we're sure it's a
3352 else if (*s == '$') {
3353 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
3355 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3357 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3358 "Possible unintended interpolation of $\\ in regex");
3360 break; /* in regexp, $ might be tail anchor */
3364 /* End of else if chain - OP_TRANS rejoin rest */
3367 if (*s == '\\' && s+1 < send) {
3368 char* e; /* Can be used for ending '}', etc. */
3372 /* warn on \1 - \9 in substitution replacements, but note that \11
3373 * is an octal; and \19 is \1 followed by '9' */
3374 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
3375 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
3377 /* diag_listed_as: \%d better written as $%d */
3378 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3383 /* string-change backslash escapes */
3384 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3388 /* In a pattern, process \N, but skip any other backslash escapes.
3389 * This is because we don't want to translate an escape sequence
3390 * into a meta symbol and have the regex compiler use the meta
3391 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3392 * in spite of this, we do have to process \N here while the proper
3393 * charnames handler is in scope. See bugs #56444 and #62056.
3394 * There is a complication because \N in a pattern may also stand
3395 * for 'match a non-nl', and not mean a charname, in which case its
3396 * processing should be deferred to the regex compiler. To be a
3397 * charname it must be followed immediately by a '{', and not look
3398 * like \N followed by a curly quantifier, i.e., not something like
3399 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3401 else if (PL_lex_inpat
3404 || regcurly(s + 1, FALSE)))
3407 goto default_action;
3412 /* quoted - in transliterations */
3414 if (PL_lex_inwhat == OP_TRANS) {
3421 if ((isALPHANUMERIC(*s)))
3422 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3423 "Unrecognized escape \\%c passed through",
3425 /* default action is to copy the quoted character */
3426 goto default_action;
3429 /* eg. \132 indicates the octal constant 0132 */
3430 case '0': case '1': case '2': case '3':
3431 case '4': case '5': case '6': case '7':
3433 I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3435 uv = grok_oct(s, &len, &flags, NULL);
3437 if (len < 3 && s < send && isDIGIT(*s)
3438 && ckWARN(WARN_MISC))
3440 Perl_warner(aTHX_ packWARN(WARN_MISC),
3441 "%s", form_short_octal_warning(s, len));
3444 goto NUM_ESCAPE_INSERT;
3446 /* eg. \o{24} indicates the octal constant \024 */
3451 bool valid = grok_bslash_o(&s, &uv, &error,
3452 TRUE, /* Output warning */
3453 FALSE, /* Not strict */
3454 TRUE, /* Output warnings for
3461 goto NUM_ESCAPE_INSERT;
3464 /* eg. \x24 indicates the hex constant 0x24 */
3469 bool valid = grok_bslash_x(&s, &uv, &error,
3470 TRUE, /* Output warning */
3471 FALSE, /* Not strict */
3472 TRUE, /* Output warnings for
3482 /* Insert oct or hex escaped character. There will always be
3483 * enough room in sv since such escapes will be longer than any
3484 * UTF-8 sequence they can end up as, except if they force us
3485 * to recode the rest of the string into utf8 */
3487 /* Here uv is the ordinal of the next character being added */
3488 if (!UVCHR_IS_INVARIANT(uv)) {
3489 if (!has_utf8 && uv > 255) {
3490 /* Might need to recode whatever we have accumulated so
3491 * far if it contains any chars variant in utf8 or
3494 SvCUR_set(sv, d - SvPVX_const(sv));
3497 /* See Note on sizing above. */
3498 sv_utf8_upgrade_flags_grow(sv,
3499 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3500 UNISKIP(uv) + (STRLEN)(send - s) + 1);
3501 d = SvPVX(sv) + SvCUR(sv);
3506 d = (char*)uvchr_to_utf8((U8*)d, uv);
3507 if (PL_lex_inwhat == OP_TRANS &&
3508 PL_sublex_info.sub_op) {
3509 PL_sublex_info.sub_op->op_private |=
3510 (PL_lex_repl ? OPpTRANS_FROM_UTF
3514 if (uv > 255 && !dorange)
3515 native_range = FALSE;
3528 /* In a non-pattern \N must be a named character, like \N{LATIN
3529 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3530 * mean to match a non-newline. For non-patterns, named
3531 * characters are converted to their string equivalents. In
3532 * patterns, named characters are not converted to their
3533 * ultimate forms for the same reasons that other escapes
3534 * aren't. Instead, they are converted to the \N{U+...} form
3535 * to get the value from the charnames that is in effect right
3536 * now, while preserving the fact that it was a named character
3537 * so that the regex compiler knows this */
3539 /* The structure of this section of code (besides checking for
3540 * errors and upgrading to utf8) is:
3541 * Further disambiguate between the two meanings of \N, and if
3542 * not a charname, go process it elsewhere
3543 * If of form \N{U+...}, pass it through if a pattern;
3544 * otherwise convert to utf8
3545 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3546 * pattern; otherwise convert to utf8 */
3548 /* Here, s points to the 'N'; the test below is guaranteed to
3549 * succeed if we are being called on a pattern as we already
3550 * know from a test above that the next character is a '{'.
3551 * On a non-pattern \N must mean 'named sequence, which
3552 * requires braces */
3555 yyerror("Missing braces on \\N{}");
3560 /* If there is no matching '}', it is an error. */
3561 if (! (e = strchr(s, '}'))) {
3562 if (! PL_lex_inpat) {
3563 yyerror("Missing right brace on \\N{}");
3565 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3570 /* Here it looks like a named character */
3572 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3573 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3574 | PERL_SCAN_DISALLOW_PREFIX;
3577 /* For \N{U+...}, the '...' is a unicode value even on
3578 * EBCDIC machines */
3579 s += 2; /* Skip to next char after the 'U+' */
3581 uv = grok_hex(s, &len, &flags, NULL);
3582 if (len == 0 || len != (STRLEN)(e - s)) {
3583 yyerror("Invalid hexadecimal number in \\N{U+...}");
3590 /* On non-EBCDIC platforms, pass through to the regex
3591 * compiler unchanged. The reason we evaluated the
3592 * number above is to make sure there wasn't a syntax
3593 * error. But on EBCDIC we convert to native so
3594 * downstream code can continue to assume it's native
3596 s -= 5; /* Include the '\N{U+' */
3598 d += my_snprintf(d, e - s + 1 + 1, /* includes the }
3601 (unsigned int) UNI_TO_NATIVE(uv));
3603 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3607 else { /* Not a pattern: convert the hex to string */
3609 /* If destination is not in utf8, unconditionally
3610 * recode it to be so. This is because \N{} implies
3611 * Unicode semantics, and scalars have to be in utf8
3612 * to guarantee those semantics */
3614 SvCUR_set(sv, d - SvPVX_const(sv));
3617 /* See Note on sizing above. */
3618 sv_utf8_upgrade_flags_grow(
3620 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3621 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3622 d = SvPVX(sv) + SvCUR(sv);
3626 /* Add the (Unicode) code point to the output. */
3627 if (UNI_IS_INVARIANT(uv)) {
3628 *d++ = (char) LATIN1_TO_NATIVE(uv);
3631 d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3635 else /* Here is \N{NAME} but not \N{U+...}. */
3636 if ((res = get_and_check_backslash_N_name(s, e)))
3639 const char *str = SvPV_const(res, len);
3642 if (! len) { /* The name resolved to an empty string */
3643 Copy("\\N{}", d, 4, char);
3647 /* In order to not lose information for the regex
3648 * compiler, pass the result in the specially made
3649 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3650 * the code points in hex of each character
3651 * returned by charnames */
3653 const char *str_end = str + len;
3654 const STRLEN off = d - SvPVX_const(sv);
3656 if (! SvUTF8(res)) {
3657 /* For the non-UTF-8 case, we can determine the
3658 * exact length needed without having to parse
3659 * through the string. Each character takes up
3660 * 2 hex digits plus either a trailing dot or
3662 d = off + SvGROW(sv, off
3664 + 6 /* For the "\N{U+", and
3666 + (STRLEN)(send - e));
3667 Copy("\\N{U+", d, 5, char);
3669 while (str < str_end) {
3671 my_snprintf(hex_string, sizeof(hex_string),
3672 "%02X.", (U8) *str);
3673 Copy(hex_string, d, 3, char);
3677 d--; /* We will overwrite below the final
3678 dot with a right brace */
3681 STRLEN char_length; /* cur char's byte length */
3683 /* and the number of bytes after this is
3684 * translated into hex digits */
3685 STRLEN output_length;
3687 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3688 * for max('U+', '.'); and 1 for NUL */
3689 char hex_string[2 * UTF8_MAXBYTES + 5];
3691 /* Get the first character of the result. */
3692 U32 uv = utf8n_to_uvchr((U8 *) str,
3696 /* Convert first code point to hex, including
3697 * the boiler plate before it. */
3699 my_snprintf(hex_string, sizeof(hex_string),
3703 /* Make sure there is enough space to hold it */
3704 d = off + SvGROW(sv, off
3706 + (STRLEN)(send - e)
3707 + 2); /* '}' + NUL */
3709 Copy(hex_string, d, output_length, char);
3712 /* For each subsequent character, append dot and
3713 * its ordinal in hex */
3714 while ((str += char_length) < str_end) {
3715 const STRLEN off = d - SvPVX_const(sv);
3716 U32 uv = utf8n_to_uvchr((U8 *) str,
3721 my_snprintf(hex_string,
3726 d = off + SvGROW(sv, off
3728 + (STRLEN)(send - e)
3729 + 2); /* '}' + NUL */
3730 Copy(hex_string, d, output_length, char);
3735 *d++ = '}'; /* Done. Add the trailing brace */
3738 else { /* Here, not in a pattern. Convert the name to a
3741 /* If destination is not in utf8, unconditionally
3742 * recode it to be so. This is because \N{} implies
3743 * Unicode semantics, and scalars have to be in utf8
3744 * to guarantee those semantics */
3746 SvCUR_set(sv, d - SvPVX_const(sv));
3749 /* See Note on sizing above. */
3750 sv_utf8_upgrade_flags_grow(sv,
3751 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3752 len + (STRLEN)(send - s) + 1);
3753 d = SvPVX(sv) + SvCUR(sv);
3755 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3757 /* See Note on sizing above. (NOTE: SvCUR() is not
3758 * set correctly here). */
3759 const STRLEN off = d - SvPVX_const(sv);
3760 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3762 Copy(str, d, len, char);
3768 } /* End \N{NAME} */
3771 native_range = FALSE; /* \N{} is defined to be Unicode */
3773 s = e + 1; /* Point to just after the '}' */
3776 /* \c is a control character */
3780 *d++ = grok_bslash_c(*s++, 1);
3783 yyerror("Missing control char name in \\c");
3787 /* printf-style backslashes, formfeeds, newlines, etc */
3804 *d++ = ASCII_TO_NATIVE('\033');
3813 } /* end if (backslash) */
3820 /* If we started with encoded form, or already know we want it,
3821 then encode the next character */
3822 if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3826 /* One might think that it is wasted effort in the case of the
3827 * source being utf8 (this_utf8 == TRUE) to take the next character
3828 * in the source, convert it to an unsigned value, and then convert
3829 * it back again. But the source has not been validated here. The
3830 * routine that does the conversion checks for errors like
3833 const UV nextuv = (this_utf8)
3834 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
3836 const STRLEN need = UNISKIP(nextuv);
3838 SvCUR_set(sv, d - SvPVX_const(sv));
3841 /* See Note on sizing above. */
3842 sv_utf8_upgrade_flags_grow(sv,
3843 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3844 need + (STRLEN)(send - s) + 1);
3845 d = SvPVX(sv) + SvCUR(sv);
3847 } else if (need > len) {
3848 /* encoded value larger than old, may need extra space (NOTE:
3849 * SvCUR() is not set correctly here). See Note on sizing
3851 const STRLEN off = d - SvPVX_const(sv);
3852 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3856 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3858 if (uv > 255 && !dorange)
3859 native_range = FALSE;
3865 } /* while loop to process each character */
3867 /* terminate the string and set up the sv */
3869 SvCUR_set(sv, d - SvPVX_const(sv));
3870 if (SvCUR(sv) >= SvLEN(sv))
3871 Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3872 " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3875 if (PL_encoding && !has_utf8) {
3876 sv_recode_to_utf8(sv, PL_encoding);
3882 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3883 PL_sublex_info.sub_op->op_private |=
3884 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3888 /* shrink the sv if we allocated more than we used */
3889 if (SvCUR(sv) + 5 < SvLEN(sv)) {
3890 SvPV_shrink_to_cur(sv);
3893 /* return the substring (via pl_yylval) only if we parsed anything */
3896 for (; s2 < s; s2++) {
3898 COPLINE_INC_WITH_HERELINES;
3900 SvREFCNT_inc_simple_void_NN(sv);
3901 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
3902 && ! PL_parser->lex_re_reparsing)
3904 const char *const key = PL_lex_inpat ? "qr" : "q";
3905 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3909 if (PL_lex_inwhat == OP_TRANS) {
3912 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3915 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3923 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3926 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3928 LEAVE_with_name("scan_const");
3933 * Returns TRUE if there's more to the expression (e.g., a subscript),
3936 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3938 * ->[ and ->{ return TRUE
3939 * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
3940 * { and [ outside a pattern are always subscripts, so return TRUE
3941 * if we're outside a pattern and it's not { or [, then return FALSE
3942 * if we're in a pattern and the first char is a {
3943 * {4,5} (any digits around the comma) returns FALSE
3944 * if we're in a pattern and the first char is a [
3946 * [SOMETHING] has a funky algorithm to decide whether it's a
3947 * character class or not. It has to deal with things like
3948 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3949 * anything else returns TRUE
3952 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3955 S_intuit_more(pTHX_ char *s)
3959 PERL_ARGS_ASSERT_INTUIT_MORE;
3961 if (PL_lex_brackets)
3963 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3965 if (*s == '-' && s[1] == '>'
3966 && FEATURE_POSTDEREF_QQ_IS_ENABLED
3967 && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
3968 ||(s[2] == '@' && strchr("*[{",s[3])) ))
3970 if (*s != '{' && *s != '[')
3975 /* In a pattern, so maybe we have {n,m}. */
3977 if (regcurly(s, FALSE)) {
3983 /* On the other hand, maybe we have a character class */
3986 if (*s == ']' || *s == '^')
3989 /* this is terrifying, and it works */
3992 const char * const send = strchr(s,']');
3993 unsigned char un_char, last_un_char;
3994 char tmpbuf[sizeof PL_tokenbuf * 4];
3996 if (!send) /* has to be an expression */
3998 weight = 2; /* let's weigh the evidence */
4002 else if (isDIGIT(*s)) {
4004 if (isDIGIT(s[1]) && s[2] == ']')
4010 Zero(seen,256,char);
4012 for (; s < send; s++) {
4013 last_un_char = un_char;
4014 un_char = (unsigned char)*s;
4019 weight -= seen[un_char] * 10;
4020 if (isWORDCHAR_lazy_if(s+1,UTF)) {
4022 char *tmp = PL_bufend;
4023 PL_bufend = (char*)send;
4024 scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4026 len = (int)strlen(tmpbuf);
4027 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
4028 UTF ? SVf_UTF8 : 0, SVt_PV))
4033 else if (*s == '$' && s[1] &&
4034 strchr("[#!%*<>()-=",s[1])) {
4035 if (/*{*/ strchr("])} =",s[2]))
4044 if (strchr("wds]",s[1]))
4046 else if (seen[(U8)'\''] || seen[(U8)'"'])
4048 else if (strchr("rnftbxcav",s[1]))
4050 else if (isDIGIT(s[1])) {
4052 while (s[1] && isDIGIT(s[1]))
4062 if (strchr("aA01! ",last_un_char))
4064 if (strchr("zZ79~",s[1]))
4066 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4067 weight -= 5; /* cope with negative subscript */
4070 if (!isWORDCHAR(last_un_char)
4071 && !(last_un_char == '$' || last_un_char == '@'
4072 || last_un_char == '&')
4073 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4078 if (keyword(tmpbuf, d - tmpbuf, 0))
4081 if (un_char == last_un_char + 1)
4083 weight -= seen[un_char];
4088 if (weight >= 0) /* probably a character class */
4098 * Does all the checking to disambiguate
4100 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
4101 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4103 * First argument is the stuff after the first token, e.g. "bar".
4105 * Not a method if foo is a filehandle.
4106 * Not a method if foo is a subroutine prototyped to take a filehandle.
4107 * Not a method if it's really "Foo $bar"
4108 * Method if it's "foo $bar"
4109 * Not a method if it's really "print foo $bar"
4110 * Method if it's really "foo package::" (interpreted as package->foo)
4111 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4112 * Not a method if bar is a filehandle or package, but is quoted with
4117 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
4120 char *s = start + (*start == '$');
4121 char tmpbuf[sizeof PL_tokenbuf];
4128 PERL_ARGS_ASSERT_INTUIT_METHOD;
4130 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4132 if (cv && SvPOK(cv)) {
4133 const char *proto = CvPROTO(cv);
4135 while (*proto && (isSPACE(*proto) || *proto == ';'))
4142 if (*start == '$') {
4143 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
4144 isUPPER(*PL_tokenbuf))
4147 len = start - SvPVX(PL_linestr);
4151 start = SvPVX(PL_linestr) + len;
4155 return *s == '(' ? FUNCMETH : METHOD;
4158 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4159 /* start is the beginning of the possible filehandle/object,
4160 * and s is the end of it
4161 * tmpbuf is a copy of it (but with single quotes as double colons)
4164 if (!keyword(tmpbuf, len, 0)) {
4165 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4169 soff = s - SvPVX(PL_linestr);
4173 indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
4174 if (indirgv && GvCVu(indirgv))
4176 /* filehandle or package name makes it a method */
4177 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4179 soff = s - SvPVX(PL_linestr);
4182 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4183 return 0; /* no assumptions -- "=>" quotes bareword */
4185 start_force(PL_curforce);
4186 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
4187 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4188 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4190 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
4191 ( UTF ? SVf_UTF8 : 0 )));
4196 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
4198 return *s == '(' ? FUNCMETH : METHOD;
4204 /* Encoded script support. filter_add() effectively inserts a
4205 * 'pre-processing' function into the current source input stream.
4206 * Note that the filter function only applies to the current source file
4207 * (e.g., it will not affect files 'require'd or 'use'd by this one).
4209 * The datasv parameter (which may be NULL) can be used to pass
4210 * private data to this instance of the filter. The filter function
4211 * can recover the SV using the FILTER_DATA macro and use it to
4212 * store private buffers and state information.
4214 * The supplied datasv parameter is upgraded to a PVIO type
4215 * and the IoDIRP/IoANY field is used to store the function pointer,
4216 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4217 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4218 * private use must be set using malloc'd pointers.
4222 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4231 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4232 Perl_croak(aTHX_ "Source filters apply only to byte streams");
4234 if (!PL_rsfp_filters)
4235 PL_rsfp_filters = newAV();
4238 SvUPGRADE(datasv, SVt_PVIO);
4239 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4240 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4241 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4242 FPTR2DPTR(void *, IoANY(datasv)),
4243 SvPV_nolen(datasv)));
4244 av_unshift(PL_rsfp_filters, 1);
4245 av_store(PL_rsfp_filters, 0, datasv) ;
4247 !PL_parser->filtered
4248 && PL_parser->lex_flags & LEX_EVALBYTES
4249 && PL_bufptr < PL_bufend
4251 const char *s = PL_bufptr;
4252 while (s < PL_bufend) {
4254 SV *linestr = PL_parser->linestr;
4255 char *buf = SvPVX(linestr);
4256 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4257 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4258 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4259 STRLEN const linestart_pos = PL_parser->linestart - buf;
4260 STRLEN const last_uni_pos =
4261 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4262 STRLEN const last_lop_pos =
4263 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4264 av_push(PL_rsfp_filters, linestr);
4265 PL_parser->linestr =
4266 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4267 buf = SvPVX(PL_parser->linestr);
4268 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4269 PL_parser->bufptr = buf + bufptr_pos;
4270 PL_parser->oldbufptr = buf + oldbufptr_pos;
4271 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4272 PL_parser->linestart = buf + linestart_pos;
4273 if (PL_parser->last_uni)
4274 PL_parser->last_uni = buf + last_uni_pos;
4275 if (PL_parser->last_lop)
4276 PL_parser->last_lop = buf + last_lop_pos;
4277 SvLEN(linestr) = SvCUR(linestr);
4278 SvCUR(linestr) = s-SvPVX(linestr);
4279 PL_parser->filtered = 1;
4289 /* Delete most recently added instance of this filter function. */
4291 Perl_filter_del(pTHX_ filter_t funcp)
4296 PERL_ARGS_ASSERT_FILTER_DEL;
4299 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4300 FPTR2DPTR(void*, funcp)));
4302 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4304 /* if filter is on top of stack (usual case) just pop it off */
4305 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4306 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4307 sv_free(av_pop(PL_rsfp_filters));
4311 /* we need to search for the correct entry and clear it */
4312 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4316 /* Invoke the idxth filter function for the current rsfp. */
4317 /* maxlen 0 = read one text line */
4319 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4324 /* This API is bad. It should have been using unsigned int for maxlen.
4325 Not sure if we want to change the API, but if not we should sanity
4326 check the value here. */
4327 unsigned int correct_length = maxlen < 0 ? PERL_INT_MAX : maxlen;
4329 PERL_ARGS_ASSERT_FILTER_READ;
4331 if (!PL_parser || !PL_rsfp_filters)
4333 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
4334 /* Provide a default input filter to make life easy. */
4335 /* Note that we append to the line. This is handy. */
4336 DEBUG_P(PerlIO_printf(Perl_debug_log,
4337 "filter_read %d: from rsfp\n", idx));
4338 if (correct_length) {
4341 const int old_len = SvCUR(buf_sv);
4343 /* ensure buf_sv is large enough */
4344 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4345 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4346 correct_length)) <= 0) {
4347 if (PerlIO_error(PL_rsfp))
4348 return -1; /* error */
4350 return 0 ; /* end of file */
4352 SvCUR_set(buf_sv, old_len + len) ;
4353 SvPVX(buf_sv)[old_len + len] = '\0';
4356 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4357 if (PerlIO_error(PL_rsfp))
4358 return -1; /* error */
4360 return 0 ; /* end of file */
4363 return SvCUR(buf_sv);
4365 /* Skip this filter slot if filter has been deleted */
4366 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4367 DEBUG_P(PerlIO_printf(Perl_debug_log,
4368 "filter_read %d: skipped (filter deleted)\n",
4370 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4372 if (SvTYPE(datasv) != SVt_PVIO) {
4373 if (correct_length) {
4375 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4376 if (!remainder) return 0; /* eof */
4377 if (correct_length > remainder) correct_length = remainder;
4378 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4379 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4382 const char *s = SvEND(datasv);
4383 const char *send = SvPVX(datasv) + SvLEN(datasv);
4391 if (s == send) return 0; /* eof */
4392 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4393 SvCUR_set(datasv, s-SvPVX(datasv));
4395 return SvCUR(buf_sv);
4397 /* Get function pointer hidden within datasv */
4398 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4399 DEBUG_P(PerlIO_printf(Perl_debug_log,
4400 "filter_read %d: via function %p (%s)\n",
4401 idx, (void*)datasv, SvPV_nolen_const(datasv)));
4402 /* Call function. The function is expected to */
4403 /* call "FILTER_READ(idx+1, buf_sv)" first. */
4404 /* Return: <0:error, =0:eof, >0:not eof */
4405 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4409 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4413 PERL_ARGS_ASSERT_FILTER_GETS;
4415 #ifdef PERL_CR_FILTER
4416 if (!PL_rsfp_filters) {
4417 filter_add(S_cr_textfilter,NULL);
4420 if (PL_rsfp_filters) {
4422 SvCUR_set(sv, 0); /* start with empty line */
4423 if (FILTER_READ(0, sv, 0) > 0)
4424 return ( SvPVX(sv) ) ;
4429 return (sv_gets(sv, PL_rsfp, append));
4433 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4438 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4440 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4444 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4445 (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4447 return GvHV(gv); /* Foo:: */
4450 /* use constant CLASS => 'MyClass' */
4451 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4452 if (gv && GvCV(gv)) {
4453 SV * const sv = cv_const_sv(GvCV(gv));
4455 pkgname = SvPV_const(sv, len);
4458 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4464 * The intent of this yylex wrapper is to minimize the changes to the
4465 * tokener when we aren't interested in collecting madprops. It remains
4466 * to be seen how successful this strategy will be...
4473 char *s = PL_bufptr;
4475 /* make sure PL_thiswhite is initialized */
4479 /* previous token ate up our whitespace? */
4480 if (!PL_lasttoke && PL_nextwhite) {
4481 PL_thiswhite = PL_nextwhite;
4485 /* isolate the token, and figure out where it is without whitespace */
4486 PL_realtokenstart = -1;
4490 assert(PL_curforce < 0);
4492 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
4493 if (!PL_thistoken) {
4494 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4495 PL_thistoken = newSVpvs("");
4497 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4498 PL_thistoken = newSVpvn(tstart, s - tstart);
4501 if (PL_thismad) /* install head */
4502 CURMAD('X', PL_thistoken);
4505 /* last whitespace of a sublex? */
4506 if (optype == ')' && PL_endwhite) {
4507 CURMAD('X', PL_endwhite);
4512 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
4513 if (!PL_thiswhite && !PL_endwhite && !optype) {
4514 sv_free(PL_thistoken);
4519 /* put off final whitespace till peg */
4520 if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
4521 PL_nextwhite = PL_thiswhite;
4524 else if (PL_thisopen) {
4525 CURMAD('q', PL_thisopen);
4527 sv_free(PL_thistoken);
4531 /* Store actual token text as madprop X */
4532 CURMAD('X', PL_thistoken);
4536 /* add preceding whitespace as madprop _ */
4537 CURMAD('_', PL_thiswhite);
4541 /* add quoted material as madprop = */
4542 CURMAD('=', PL_thisstuff);
4546 /* add terminating quote as madprop Q */
4547 CURMAD('Q', PL_thisclose);
4551 /* special processing based on optype */
4555 /* opval doesn't need a TOKEN since it can already store mp */
4565 if (pl_yylval.opval)
4566 append_madprops(PL_thismad, pl_yylval.opval, 0);
4574 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4587 /* remember any fake bracket that lexer is about to discard */
4588 if (PL_lex_brackets == 1 &&
4589 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4592 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4595 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4596 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4599 break; /* don't bother looking for trailing comment */
4608 /* attach a trailing comment to its statement instead of next token */
4612 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4614 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4616 if (*s == '\n' || *s == '#') {
4617 while (s < PL_bufend && *s != '\n')
4621 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4622 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4635 /* Create new token struct. Note: opvals return early above. */
4636 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4643 S_tokenize_use(pTHX_ int is_use, char *s) {
4646 PERL_ARGS_ASSERT_TOKENIZE_USE;
4648 if (PL_expect != XSTATE)
4649 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4650 is_use ? "use" : "no"));
4653 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4654 s = force_version(s, TRUE);
4655 if (*s == ';' || *s == '}'
4656 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4657 start_force(PL_curforce);
4658 NEXTVAL_NEXTTOKE.opval = NULL;
4661 else if (*s == 'v') {
4662 s = force_word(s,WORD,FALSE,TRUE);
4663 s = force_version(s, FALSE);
4667 s = force_word(s,WORD,FALSE,TRUE);
4668 s = force_version(s, FALSE);
4670 pl_yylval.ival = is_use;
4674 static const char* const exp_name[] =
4675 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4676 "ATTRTERM", "TERMBLOCK", "POSTDEREF", "TERMORDORDOR"
4680 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4682 S_word_takes_any_delimeter(char *p, STRLEN len)
4684 return (len == 1 && strchr("msyq", p[0])) ||
4686 (p[0] == 't' && p[1] == 'r') ||
4687 (p[0] == 'q' && strchr("qwxr", p[1]))));
4691 S_check_scalar_slice(pTHX_ char *s)
4694 while (*s == ' ' || *s == '\t') s++;
4695 if (*s == 'q' && s[1] == 'w'
4696 && !isWORDCHAR_lazy_if(s+2,UTF))
4698 while (*s && (isWORDCHAR_lazy_if(s,UTF) || strchr(" \t$#+-'\"", *s)))
4699 s += UTF ? UTF8SKIP(s) : 1;
4700 if (*s == '}' || *s == ']')
4701 pl_yylval.ival = OPpSLICEWARNING;
4707 Works out what to call the token just pulled out of the input
4708 stream. The yacc parser takes care of taking the ops we return and
4709 stitching them into a tree.
4712 The type of the next token
4715 Switch based on the current state:
4716 - if we already built the token before, use it
4717 - if we have a case modifier in a string, deal with that
4718 - handle other cases of interpolation inside a string
4719 - scan the next line if we are inside a format
4720 In the normal state switch on the next character:
4722 if alphabetic, go to key lookup
4723 unrecoginized character - croak
4724 - 0/4/26: handle end-of-line or EOF
4725 - cases for whitespace
4726 - \n and #: handle comments and line numbers
4727 - various operators, brackets and sigils
4730 - 'v': vstrings (or go to key lookup)
4731 - 'x' repetition operator (or go to key lookup)
4732 - other ASCII alphanumerics (key lookup begins here):
4735 scan built-in keyword (but do nothing with it yet)
4736 check for statement label
4737 check for lexical subs
4738 goto just_a_word if there is one
4739 see whether built-in keyword is overridden
4740 switch on keyword number:
4741 - default: just_a_word:
4742 not a built-in keyword; handle bareword lookup
4743 disambiguate between method and sub call
4744 fall back to bareword
4745 - cases for built-in keywords
4753 char *s = PL_bufptr;
4757 const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil);
4761 /* orig_keyword, gvp, and gv are initialized here because
4762 * jump to the label just_a_word_zero can bypass their
4763 * initialization later. */
4764 I32 orig_keyword = 0;
4769 SV* tmp = newSVpvs("");
4770 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4771 (IV)CopLINE(PL_curcop),
4772 lex_state_names[PL_lex_state],
4773 exp_name[PL_expect],
4774 pv_display(tmp, s, strlen(s), 0, 60));
4778 switch (PL_lex_state) {
4780 case LEX_INTERPNORMAL:
4783 /* when we've already built the next token, just pull it out of the queue */
4787 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4789 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4790 PL_nexttoke[PL_lasttoke].next_mad = 0;
4791 if (PL_thismad && PL_thismad->mad_key == '_') {
4792 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4793 PL_thismad->mad_val = 0;
4794 mad_free(PL_thismad);
4799 PL_lex_state = PL_lex_defer;
4800 PL_expect = PL_lex_expect;
4801 PL_lex_defer = LEX_NORMAL;
4802 if (!PL_nexttoke[PL_lasttoke].next_type)
4807 pl_yylval = PL_nextval[PL_nexttoke];
4809 PL_lex_state = PL_lex_defer;
4810 PL_expect = PL_lex_expect;
4811 PL_lex_defer = LEX_NORMAL;
4817 next_type = PL_nexttoke[PL_lasttoke].next_type;
4819 next_type = PL_nexttype[PL_nexttoke];
4821 if (next_type & (7<<24)) {
4822 if (next_type & (1<<24)) {
4823 if (PL_lex_brackets > 100)
4824 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4825 PL_lex_brackstack[PL_lex_brackets++] =
4826 (char) ((next_type >> 16) & 0xff);
4828 if (next_type & (2<<24))
4829 PL_lex_allbrackets++;
4830 if (next_type & (4<<24))
4831 PL_lex_allbrackets--;
4832 next_type &= 0xffff;
4834 return REPORT(next_type == 'p' ? pending_ident() : next_type);
4837 /* interpolated case modifiers like \L \U, including \Q and \E.
4838 when we get here, PL_bufptr is at the \
4840 case LEX_INTERPCASEMOD:
4842 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4844 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4845 PL_bufptr, PL_bufend, *PL_bufptr);
4847 /* handle \E or end of string */
4848 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4850 if (PL_lex_casemods) {
4851 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4852 PL_lex_casestack[PL_lex_casemods] = '\0';
4854 if (PL_bufptr != PL_bufend
4855 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4856 || oldmod == 'F')) {
4858 PL_lex_state = LEX_INTERPCONCAT;
4861 PL_thistoken = newSVpvs("\\E");
4864 PL_lex_allbrackets--;
4867 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4868 /* Got an unpaired \E */
4869 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4870 "Useless use of \\E");
4873 while (PL_bufptr != PL_bufend &&
4874 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4877 PL_thiswhite = newSVpvs("");
4878 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4883 if (PL_bufptr != PL_bufend)
4886 PL_lex_state = LEX_INTERPCONCAT;
4890 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4891 "### Saw case modifier\n"); });
4893 if (s[1] == '\\' && s[2] == 'E') {
4897 PL_thiswhite = newSVpvs("");
4898 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4902 PL_lex_state = LEX_INTERPCONCAT;
4907 if (!PL_madskills) /* when just compiling don't need correct */
4908 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4909 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
4910 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4911 (strchr(PL_lex_casestack, 'L')
4912 || strchr(PL_lex_casestack, 'U')
4913 || strchr(PL_lex_casestack, 'F'))) {
4914 PL_lex_casestack[--PL_lex_casemods] = '\0';
4915 PL_lex_allbrackets--;
4918 if (PL_lex_casemods > 10)
4919 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4920 PL_lex_casestack[PL_lex_casemods++] = *s;
4921 PL_lex_casestack[PL_lex_casemods] = '\0';
4922 PL_lex_state = LEX_INTERPCONCAT;
4923 start_force(PL_curforce);
4924 NEXTVAL_NEXTTOKE.ival = 0;
4925 force_next((2<<24)|'(');
4926 start_force(PL_curforce);
4928 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4930 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4932 NEXTVAL_NEXTTOKE.ival = OP_LC;
4934 NEXTVAL_NEXTTOKE.ival = OP_UC;
4936 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4938 NEXTVAL_NEXTTOKE.ival = OP_FC;
4940 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4942 SV* const tmpsv = newSVpvs("\\ ");
4943 /* replace the space with the character we want to escape
4945 SvPVX(tmpsv)[1] = *s;
4951 if (PL_lex_starts) {
4957 sv_free(PL_thistoken);
4958 PL_thistoken = newSVpvs("");
4961 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4962 if (PL_lex_casemods == 1 && PL_lex_inpat)
4971 case LEX_INTERPPUSH:
4972 return REPORT(sublex_push());
4974 case LEX_INTERPSTART:
4975 if (PL_bufptr == PL_bufend)
4976 return REPORT(sublex_done());
4977 DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4978 "### Interpolated variable\n"); });
4980 /* for /@a/, we leave the joining for the regex engine to do
4981 * (unless we're within \Q etc) */
4982 PL_lex_dojoin = (*PL_bufptr == '@'
4983 && (!PL_lex_inpat || PL_lex_casemods));
4984 PL_lex_state = LEX_INTERPNORMAL;
4985 if (PL_lex_dojoin) {
4986 start_force(PL_curforce);
4987 NEXTVAL_NEXTTOKE.ival = 0;
4989 start_force(PL_curforce);
4990 force_ident("\"", '$');
4991 start_force(PL_curforce);
4992 NEXTVAL_NEXTTOKE.ival = 0;
4994 start_force(PL_curforce);
4995 NEXTVAL_NEXTTOKE.ival = 0;
4996 force_next((2<<24)|'(');
4997 start_force(PL_curforce);
4998 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
5001 /* Convert (?{...}) and friends to 'do {...}' */
5002 if (PL_lex_inpat && *PL_bufptr == '(') {
5003 PL_parser->lex_shared->re_eval_start = PL_bufptr;
5005 if (*PL_bufptr != '{')
5007 start_force(PL_curforce);
5008 /* XXX probably need a CURMAD(something) here */
5009 PL_expect = XTERMBLOCK;
5013 if (PL_lex_starts++) {
5018 sv_free(PL_thistoken);
5019 PL_thistoken = newSVpvs("");
5022 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5023 if (!PL_lex_casemods && PL_lex_inpat)
5030 case LEX_INTERPENDMAYBE:
5031 if (intuit_more(PL_bufptr)) {
5032 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
5038 if (PL_lex_dojoin) {
5039 const U8 dojoin_was = PL_lex_dojoin;
5040 PL_lex_dojoin = FALSE;
5041 PL_lex_state = LEX_INTERPCONCAT;
5045 sv_free(PL_thistoken);
5046 PL_thistoken = newSVpvs("");
5049 PL_lex_allbrackets--;
5050 return REPORT(dojoin_was == 1 ? ')' : POSTJOIN);
5052 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
5053 && SvEVALED(PL_lex_repl))
5055 if (PL_bufptr != PL_bufend)
5056 Perl_croak(aTHX_ "Bad evalled substitution pattern");
5059 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
5060 re_eval_str. If the here-doc body’s length equals the previous
5061 value of re_eval_start, re_eval_start will now be null. So
5062 check re_eval_str as well. */
5063 if (PL_parser->lex_shared->re_eval_start
5064 || PL_parser->lex_shared->re_eval_str) {
5066 if (*PL_bufptr != ')')
5067 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
5069 /* having compiled a (?{..}) expression, return the original
5070 * text too, as a const */
5071 if (PL_parser->lex_shared->re_eval_str) {
5072 sv = PL_parser->lex_shared->re_eval_str;
5073 PL_parser->lex_shared->re_eval_str = NULL;
5075 PL_bufptr - PL_parser->lex_shared->re_eval_start);
5076 SvPV_shrink_to_cur(sv);
5078 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
5079 PL_bufptr - PL_parser->lex_shared->re_eval_start);
5080 start_force(PL_curforce);
5081 /* XXX probably need a CURMAD(something) here */
5082 NEXTVAL_NEXTTOKE.opval =
5083 (OP*)newSVOP(OP_CONST, 0,
5086 PL_parser->lex_shared->re_eval_start = NULL;
5092 case LEX_INTERPCONCAT:
5094 if (PL_lex_brackets)
5095 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
5096 (long) PL_lex_brackets);
5098 if (PL_bufptr == PL_bufend)
5099 return REPORT(sublex_done());
5101 /* m'foo' still needs to be parsed for possible (?{...}) */
5102 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
5103 SV *sv = newSVsv(PL_linestr);
5105 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5109 s = scan_const(PL_bufptr);
5111 PL_lex_state = LEX_INTERPCASEMOD;
5113 PL_lex_state = LEX_INTERPSTART;
5116 if (s != PL_bufptr) {
5117 start_force(PL_curforce);
5119 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
5121 NEXTVAL_NEXTTOKE = pl_yylval;
5124 if (PL_lex_starts++) {
5128 sv_free(PL_thistoken);
5129 PL_thistoken = newSVpvs("");
5132 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5133 if (!PL_lex_casemods && PL_lex_inpat)
5146 s = scan_formline(PL_bufptr);
5147 if (!PL_lex_formbrack)
5156 /* We really do *not* want PL_linestr ever becoming a COW. */
5157 assert (!SvIsCOW(PL_linestr));
5159 PL_oldoldbufptr = PL_oldbufptr;
5161 PL_parser->saw_infix_sigil = 0;
5166 sv_free(PL_thistoken);
5169 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5173 if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
5176 SV *dsv = newSVpvs_flags("", SVs_TEMP);
5177 const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s,
5179 SVs_TEMP | SVf_UTF8),
5180 10, UNI_DISPLAY_ISPRINT)
5181 : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
5182 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
5183 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
5184 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
5188 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %"UTF8f"<-- HERE near column %d", c,
5189 UTF8fARG(UTF, (s - d), d),
5194 goto fake_eof; /* emulate EOF on ^D or ^Z */
5200 if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
5203 if (PL_lex_brackets &&
5204 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
5205 yyerror((const char *)
5207 ? "Format not terminated"
5208 : "Missing right curly or square bracket"));
5210 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5211 "### Tokener got EOF\n");
5215 if (s++ < PL_bufend)
5216 goto retry; /* ignore stray nulls */
5219 if (!PL_in_eval && !PL_preambled) {
5220 PL_preambled = TRUE;
5226 /* Generate a string of Perl code to load the debugger.
5227 * If PERL5DB is set, it will return the contents of that,
5228 * otherwise a compile-time require of perl5db.pl. */
5230 const char * const pdb = PerlEnv_getenv("PERL5DB");
5233 sv_setpv(PL_linestr, pdb);
5234 sv_catpvs(PL_linestr,";");
5236 SETERRNO(0,SS_NORMAL);
5237 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
5239 PL_parser->preambling = CopLINE(PL_curcop);
5241 sv_setpvs(PL_linestr,"");
5242 if (PL_preambleav) {
5243 SV **svp = AvARRAY(PL_preambleav);
5244 SV **const end = svp + AvFILLp(PL_preambleav);
5246 sv_catsv(PL_linestr, *svp);
5248 sv_catpvs(PL_linestr, ";");
5250 sv_free(MUTABLE_SV(PL_preambleav));
5251 PL_preambleav = NULL;
5254 sv_catpvs(PL_linestr,
5255 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
5256 if (PL_minus_n || PL_minus_p) {
5257 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
5259 sv_catpvs(PL_linestr,"chomp;");
5262 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
5263 || *PL_splitstr == '"')
5264 && strchr(PL_splitstr + 1, *PL_splitstr))
5265 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
5267 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
5268 bytes can be used as quoting characters. :-) */
5269 const char *splits = PL_splitstr;
5270 sv_catpvs(PL_linestr, "our @F=split(q\0");
5273 if (*splits == '\\')
5274 sv_catpvn(PL_linestr, splits, 1);
5275 sv_catpvn(PL_linestr, splits, 1);
5276 } while (*splits++);
5277 /* This loop will embed the trailing NUL of
5278 PL_linestr as the last thing it does before
5280 sv_catpvs(PL_linestr, ");");
5284 sv_catpvs(PL_linestr,"our @F=split(' ');");
5287 sv_catpvs(PL_linestr, "\n");
5288 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5289 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5290 PL_last_lop = PL_last_uni = NULL;
5291 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5292 update_debugger_info(PL_linestr, NULL, 0);
5297 bof = PL_rsfp ? TRUE : FALSE;
5300 fake_eof = LEX_FAKE_EOF;
5302 PL_bufptr = PL_bufend;
5303 COPLINE_INC_WITH_HERELINES;
5304 if (!lex_next_chunk(fake_eof)) {
5305 CopLINE_dec(PL_curcop);
5307 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
5309 CopLINE_dec(PL_curcop);
5312 PL_realtokenstart = -1;
5315 /* If it looks like the start of a BOM or raw UTF-16,
5316 * check if it in fact is. */
5317 if (bof && PL_rsfp &&
5319 *(U8*)s == BOM_UTF8_FIRST_BYTE ||
5322 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
5323 bof = (offset == (Off_t)SvCUR(PL_linestr));
5324 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5325 /* offset may include swallowed CR */
5327 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
5330 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5331 s = swallow_bom((U8*)s);
5334 if (PL_parser->in_pod) {
5335 /* Incest with pod. */
5338 sv_catsv(PL_thiswhite, PL_linestr);
5340 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
5341 sv_setpvs(PL_linestr, "");
5342 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5343 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5344 PL_last_lop = PL_last_uni = NULL;
5345 PL_parser->in_pod = 0;
5348 if (PL_rsfp || PL_parser->filtered)
5350 } while (PL_parser->in_pod);
5351 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5352 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5353 PL_last_lop = PL_last_uni = NULL;
5354 if (CopLINE(PL_curcop) == 1) {
5355 while (s < PL_bufend && isSPACE(*s))
5357 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5361 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5365 if (*s == '#' && *(s+1) == '!')
5367 #ifdef ALTERNATE_SHEBANG
5369 static char const as[] = ALTERNATE_SHEBANG;
5370 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5371 d = s + (sizeof(as) - 1);
5373 #endif /* ALTERNATE_SHEBANG */
5382 while (*d && !isSPACE(*d))
5386 #ifdef ARG_ZERO_IS_SCRIPT
5387 if (ipathend > ipath) {
5389 * HP-UX (at least) sets argv[0] to the script name,
5390 * which makes $^X incorrect. And Digital UNIX and Linux,
5391 * at least, set argv[0] to the basename of the Perl
5392 * interpreter. So, having found "#!", we'll set it right.
5394 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5396 assert(SvPOK(x) || SvGMAGICAL(x));
5397 if (sv_eq(x, CopFILESV(PL_curcop))) {
5398 sv_setpvn(x, ipath, ipathend - ipath);
5404 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
5405 const char * const lstart = SvPV_const(x,llen);
5407 bstart += blen - llen;
5408 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5409 sv_setpvn(x, ipath, ipathend - ipath);
5414 TAINT_NOT; /* $^X is always tainted, but that's OK */
5416 #endif /* ARG_ZERO_IS_SCRIPT */
5421 d = instr(s,"perl -");
5423 d = instr(s,"perl");
5425 /* avoid getting into infinite loops when shebang
5426 * line contains "Perl" rather than "perl" */
5428 for (d = ipathend-4; d >= ipath; --d) {
5429 if ((*d == 'p' || *d == 'P')
5430 && !ibcmp(d, "perl", 4))
5440 #ifdef ALTERNATE_SHEBANG
5442 * If the ALTERNATE_SHEBANG on this system starts with a
5443 * character that can be part of a Perl expression, then if
5444 * we see it but not "perl", we're probably looking at the
5445 * start of Perl code, not a request to hand off to some
5446 * other interpreter. Similarly, if "perl" is there, but
5447 * not in the first 'word' of the line, we assume the line
5448 * contains the start of the Perl program.
5450 if (d && *s != '#') {
5451 const char *c = ipath;
5452 while (*c && !strchr("; \t\r\n\f\v#", *c))
5455 d = NULL; /* "perl" not in first word; ignore */
5457 *s = '#'; /* Don't try to parse shebang line */
5459 #endif /* ALTERNATE_SHEBANG */
5464 !instr(s,"indir") &&
5465 instr(PL_origargv[0],"perl"))
5472 while (s < PL_bufend && isSPACE(*s))
5474 if (s < PL_bufend) {
5475 Newx(newargv,PL_origargc+3,char*);
5477 while (s < PL_bufend && !isSPACE(*s))
5480 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5483 newargv = PL_origargv;
5486 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5488 Perl_croak(aTHX_ "Can't exec %s", ipath);
5491 while (*d && !isSPACE(*d))
5493 while (SPACE_OR_TAB(*d))
5497 const bool switches_done = PL_doswitches;
5498 const U32 oldpdb = PL_perldb;
5499 const bool oldn = PL_minus_n;
5500 const bool oldp = PL_minus_p;
5504 bool baduni = FALSE;
5506 const char *d2 = d1 + 1;
5507 if (parse_unicode_opts((const char **)&d2)
5511 if (baduni || *d1 == 'M' || *d1 == 'm') {
5512 const char * const m = d1;
5513 while (*d1 && !isSPACE(*d1))
5515 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5518 d1 = moreswitches(d1);
5520 if (PL_doswitches && !switches_done) {
5521 int argc = PL_origargc;
5522 char **argv = PL_origargv;
5525 } while (argc && argv[0][0] == '-' && argv[0][1]);
5526 init_argv_symbols(argc,argv);
5528 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5529 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5530 /* if we have already added "LINE: while (<>) {",
5531 we must not do it again */
5533 sv_setpvs(PL_linestr, "");
5534 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5535 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5536 PL_last_lop = PL_last_uni = NULL;
5537 PL_preambled = FALSE;
5538 if (PERLDB_LINE || PERLDB_SAVESRC)
5539 (void)gv_fetchfile(PL_origfilename);
5546 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5547 PL_lex_state = LEX_FORMLINE;
5548 start_force(PL_curforce);
5549 NEXTVAL_NEXTTOKE.ival = 0;
5550 force_next(FORMRBRACK);
5555 #ifdef PERL_STRICT_CR
5556 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5558 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5560 case ' ': case '\t': case '\f': case 013:
5562 PL_realtokenstart = -1;
5565 PL_thiswhite = newSVpvs("");
5566 sv_catpvn(PL_thiswhite, s, 1);
5574 PL_realtokenstart = -1;
5578 if (PL_lex_state != LEX_NORMAL ||
5579 (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5580 if (*s == '#' && s == PL_linestart && PL_in_eval
5581 && !PL_rsfp && !PL_parser->filtered) {
5582 /* handle eval qq[#line 1 "foo"\n ...] */
5583 CopLINE_dec(PL_curcop);
5586 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5588 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5592 const bool in_comment = *s == '#';
5594 while (d < PL_bufend && *d != '\n')
5598 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5599 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5603 PL_thiswhite = newSVpvn(s, d - s);
5606 if (in_comment && d == PL_bufend
5607 && PL_lex_state == LEX_INTERPNORMAL
5608 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5609 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5612 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5613 PL_lex_state = LEX_FORMLINE;
5614 start_force(PL_curforce);
5615 NEXTVAL_NEXTTOKE.ival = 0;
5616 force_next(FORMRBRACK);
5622 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5623 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5626 TOKEN(PEG); /* make sure any #! line is accessible */
5632 if (PL_madskills) d = s;
5633 while (s < PL_bufend && *s != '\n')
5641 else if (s > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5642 Perl_croak(aTHX_ "panic: input overflow");
5644 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5646 PL_thiswhite = newSVpvs("");
5647 if (CopLINE(PL_curcop) == 1) {
5648 sv_setpvs(PL_thiswhite, "");
5651 sv_catpvn(PL_thiswhite, d, s - d);
5658 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5666 while (s < PL_bufend && SPACE_OR_TAB(*s))
5669 if (strnEQ(s,"=>",2)) {
5670 s = force_word(PL_bufptr,WORD,FALSE,FALSE);
5671 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5672 OPERATOR('-'); /* unary minus */
5675 case 'r': ftst = OP_FTEREAD; break;
5676 case 'w': ftst = OP_FTEWRITE; break;
5677 case 'x': ftst = OP_FTEEXEC; break;
5678 case 'o': ftst = OP_FTEOWNED; break;
5679 case 'R': ftst = OP_FTRREAD; break;
5680 case 'W': ftst = OP_FTRWRITE; break;
5681 case 'X': ftst = OP_FTREXEC; break;
5682 case 'O': ftst = OP_FTROWNED; break;
5683 case 'e': ftst = OP_FTIS; break;
5684 case 'z': ftst = OP_FTZERO; break;
5685 case 's': ftst = OP_FTSIZE; break;
5686 case 'f': ftst = OP_FTFILE; break;
5687 case 'd': ftst = OP_FTDIR; break;
5688 case 'l': ftst = OP_FTLINK; break;
5689 case 'p': ftst = OP_FTPIPE; break;
5690 case 'S': ftst = OP_FTSOCK; break;
5691 case 'u': ftst = OP_FTSUID; break;
5692 case 'g': ftst = OP_FTSGID; break;
5693 case 'k': ftst = OP_FTSVTX; break;
5694 case 'b': ftst = OP_FTBLK; break;
5695 case 'c': ftst = OP_FTCHR; break;
5696 case 't': ftst = OP_FTTTY; break;
5697 case 'T': ftst = OP_FTTEXT; break;
5698 case 'B': ftst = OP_FTBINARY; break;
5699 case 'M': case 'A': case 'C':
5700 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5702 case 'M': ftst = OP_FTMTIME; break;
5703 case 'A': ftst = OP_FTATIME; break;
5704 case 'C': ftst = OP_FTCTIME; break;
5712 PL_last_uni = PL_oldbufptr;
5713 PL_last_lop_op = (OPCODE)ftst;
5714 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5715 "### Saw file test %c\n", (int)tmp);
5720 /* Assume it was a minus followed by a one-letter named
5721 * subroutine call (or a -bareword), then. */
5722 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5723 "### '-%c' looked like a file test but was not\n",
5730 const char tmp = *s++;
5733 if (PL_expect == XOPERATOR)
5738 else if (*s == '>') {
5741 if (FEATURE_POSTDEREF_IS_ENABLED && (
5742 ((*s == '$' || *s == '&') && s[1] == '*')
5743 ||(*s == '$' && s[1] == '#' && s[2] == '*')
5744 ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
5745 ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5748 Perl_ck_warner_d(aTHX_
5749 packWARN(WARN_EXPERIMENTAL__POSTDEREF),
5750 "Postfix dereference is experimental"
5752 PL_expect = XPOSTDEREF;
5755 if (isIDFIRST_lazy_if(s,UTF)) {
5756 s = force_word(s,METHOD,FALSE,TRUE);
5764 if (PL_expect == XOPERATOR) {
5765 if (*s == '=' && !PL_lex_allbrackets &&
5766 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5773 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5775 OPERATOR('-'); /* unary minus */
5781 const char tmp = *s++;
5784 if (PL_expect == XOPERATOR)
5789 if (PL_expect == XOPERATOR) {
5790 if (*s == '=' && !PL_lex_allbrackets &&
5791 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5798 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5805 if (PL_expect == XPOSTDEREF) POSTDEREF('*');
5806 if (PL_expect != XOPERATOR) {
5807 s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5808 PL_expect = XOPERATOR;
5809 force_ident(PL_tokenbuf, '*');
5817 if (*s == '=' && !PL_lex_allbrackets &&
5818 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5824 if (*s == '=' && !PL_lex_allbrackets &&
5825 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5829 PL_parser->saw_infix_sigil = 1;
5834 if (PL_expect == XOPERATOR) {
5835 if (s[1] == '=' && !PL_lex_allbrackets &&
5836 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5839 PL_parser->saw_infix_sigil = 1;
5842 else if (PL_expect == XPOSTDEREF) POSTDEREF('%');
5843 PL_tokenbuf[0] = '%';
5844 s = scan_ident(s, PL_tokenbuf + 1,
5845 sizeof PL_tokenbuf - 1, FALSE);
5847 if (!PL_tokenbuf[1]) {
5850 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5852 PL_tokenbuf[0] = '@';
5854 PL_expect = XOPERATOR;
5855 force_ident_maybe_lex('%');
5859 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5860 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5865 if (PL_lex_brackets > 100)
5866 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5867 PL_lex_brackstack[PL_lex_brackets++] = 0;
5868 PL_lex_allbrackets++;
5870 const char tmp = *s++;
5875 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5877 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5880 Perl_ck_warner_d(aTHX_
5881 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5882 "Smartmatch is experimental");
5888 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5895 goto just_a_word_zero_gv;
5898 switch (PL_expect) {
5904 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5906 PL_bufptr = s; /* update in case we back off */
5909 "Use of := for an empty attribute list is not allowed");
5916 PL_expect = XTERMBLOCK;
5919 stuffstart = s - SvPVX(PL_linestr) - 1;
5923 while (isIDFIRST_lazy_if(s,UTF)) {
5926 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5927 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5928 if (tmp < 0) tmp = -tmp;
5943 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5945 d = scan_str(d,TRUE,TRUE,FALSE,FALSE,NULL);
5946 COPLINE_SET_FROM_MULTI_END;
5948 /* MUST advance bufptr here to avoid bogus
5949 "at end of line" context messages from yyerror().
5951 PL_bufptr = s + len;
5952 yyerror("Unterminated attribute parameter in attribute list");
5956 return REPORT(0); /* EOF indicator */
5960 sv_catsv(sv, PL_lex_stuff);
5961 attrs = op_append_elem(OP_LIST, attrs,
5962 newSVOP(OP_CONST, 0, sv));
5963 SvREFCNT_dec(PL_lex_stuff);
5964 PL_lex_stuff = NULL;
5967 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5969 if (PL_in_my == KEY_our) {
5970 deprecate(":unique");
5973 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5976 /* NOTE: any CV attrs applied here need to be part of
5977 the CVf_BUILTIN_ATTRS define in cv.h! */
5978 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5980 CvLVALUE_on(PL_compcv);
5982 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5984 deprecate(":locked");
5986 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5988 CvMETHOD_on(PL_compcv);
5990 /* After we've set the flags, it could be argued that
5991 we don't need to do the attributes.pm-based setting
5992 process, and shouldn't bother appending recognized
5993 flags. To experiment with that, uncomment the
5994 following "else". (Note that's already been
5995 uncommented. That keeps the above-applied built-in
5996 attributes from being intercepted (and possibly
5997 rejected) by a package's attribute routines, but is
5998 justified by the performance win for the common case
5999 of applying only built-in attributes.) */
6001 attrs = op_append_elem(OP_LIST, attrs,
6002 newSVOP(OP_CONST, 0,
6006 if (*s == ':' && s[1] != ':')
6009 break; /* require real whitespace or :'s */
6010 /* XXX losing whitespace on sequential attributes here */
6013 if (*s != ';' && *s != '}' &&
6014 !(PL_expect == XOPERATOR
6015 ? (*s == '=' || *s == ')')
6016 : (*s == '{' || *s == '('))) {
6017 const char q = ((*s == '\'') ? '"' : '\'');
6018 /* If here for an expression, and parsed no attrs, back
6020 if (PL_expect == XOPERATOR && !attrs) {
6024 /* MUST advance bufptr here to avoid bogus "at end of line"
6025 context messages from yyerror().
6028 yyerror( (const char *)
6030 ? Perl_form(aTHX_ "Invalid separator character "
6031 "%c%c%c in attribute list", q, *s, q)
6032 : "Unterminated attribute list" ) );
6040 start_force(PL_curforce);
6041 NEXTVAL_NEXTTOKE.opval = attrs;
6042 CURMAD('_', PL_nextwhite);
6047 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
6048 (s - SvPVX(PL_linestr)) - stuffstart);
6053 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
6057 PL_lex_allbrackets--;
6061 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6062 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
6066 PL_lex_allbrackets++;
6069 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6075 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6078 PL_lex_allbrackets--;
6084 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6087 if (PL_lex_brackets <= 0)
6088 /* diag_listed_as: Unmatched right %s bracket */
6089 yyerror("Unmatched right square bracket");
6092 PL_lex_allbrackets--;
6093 if (PL_lex_state == LEX_INTERPNORMAL) {
6094 if (PL_lex_brackets == 0) {
6095 if (*s == '-' && s[1] == '>')
6096 PL_lex_state = LEX_INTERPENDMAYBE;
6097 else if (*s != '[' && *s != '{')
6098 PL_lex_state = LEX_INTERPEND;
6105 if (PL_lex_brackets > 100) {
6106 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6108 switch (PL_expect) {
6110 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6111 PL_lex_allbrackets++;
6112 OPERATOR(HASHBRACK);
6114 while (s < PL_bufend && SPACE_OR_TAB(*s))
6117 PL_tokenbuf[0] = '\0';
6118 if (d < PL_bufend && *d == '-') {
6119 PL_tokenbuf[0] = '-';
6121 while (d < PL_bufend && SPACE_OR_TAB(*d))
6124 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
6125 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6127 while (d < PL_bufend && SPACE_OR_TAB(*d))
6130 const char minus = (PL_tokenbuf[0] == '-');
6131 s = force_word(s + minus, WORD, FALSE, TRUE);
6139 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6140 PL_lex_allbrackets++;
6145 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6146 PL_lex_allbrackets++;
6151 if (PL_oldoldbufptr == PL_last_lop)
6152 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6154 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6155 PL_lex_allbrackets++;
6158 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6160 /* This hack is to get the ${} in the message. */
6162 yyerror("syntax error");
6165 OPERATOR(HASHBRACK);
6167 /* This hack serves to disambiguate a pair of curlies
6168 * as being a block or an anon hash. Normally, expectation
6169 * determines that, but in cases where we're not in a
6170 * position to expect anything in particular (like inside
6171 * eval"") we have to resolve the ambiguity. This code
6172 * covers the case where the first term in the curlies is a
6173 * quoted string. Most other cases need to be explicitly
6174 * disambiguated by prepending a "+" before the opening
6175 * curly in order to force resolution as an anon hash.
6177 * XXX should probably propagate the outer expectation
6178 * into eval"" to rely less on this hack, but that could
6179 * potentially break current behavior of eval"".
6183 if (*s == '\'' || *s == '"' || *s == '`') {
6184 /* common case: get past first string, handling escapes */
6185 for (t++; t < PL_bufend && *t != *s;)
6186 if (*t++ == '\\' && (*t == '\\' || *t == *s))
6190 else if (*s == 'q') {
6193 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6194 && !isWORDCHAR(*t))))
6196 /* skip q//-like construct */
6198 char open, close, term;
6201 while (t < PL_bufend && isSPACE(*t))
6203 /* check for q => */
6204 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6205 OPERATOR(HASHBRACK);
6209 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
6213 for (t++; t < PL_bufend; t++) {
6214 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6216 else if (*t == open)
6220 for (t++; t < PL_bufend; t++) {
6221 if (*t == '\\' && t+1 < PL_bufend)
6223 else if (*t == close && --brackets <= 0)
6225 else if (*t == open)
6232 /* skip plain q word */
6233 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
6236 else if (isWORDCHAR_lazy_if(t,UTF)) {
6238 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
6241 while (t < PL_bufend && isSPACE(*t))
6243 /* if comma follows first term, call it an anon hash */
6244 /* XXX it could be a comma expression with loop modifiers */
6245 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6246 || (*t == '=' && t[1] == '>')))
6247 OPERATOR(HASHBRACK);
6248 if (PL_expect == XREF)
6251 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6257 pl_yylval.ival = CopLINE(PL_curcop);
6258 if (isSPACE(*s) || *s == '#')
6259 PL_copline = NOLINE; /* invalidate current command line number */
6260 TOKEN(formbrack ? '=' : '{');
6262 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6266 if (PL_lex_brackets <= 0)
6267 /* diag_listed_as: Unmatched right %s bracket */
6268 yyerror("Unmatched right curly bracket");
6270 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6271 PL_lex_allbrackets--;
6272 if (PL_lex_state == LEX_INTERPNORMAL) {
6273 if (PL_lex_brackets == 0) {
6274 if (PL_expect & XFAKEBRACK) {
6275 PL_expect &= XENUMMASK;
6276 PL_lex_state = LEX_INTERPEND;
6281 PL_thiswhite = newSVpvs("");
6282 sv_catpvs(PL_thiswhite,"}");
6285 return yylex(); /* ignore fake brackets */
6287 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6288 && SvEVALED(PL_lex_repl))
6289 PL_lex_state = LEX_INTERPEND;
6290 else if (*s == '-' && s[1] == '>')
6291 PL_lex_state = LEX_INTERPENDMAYBE;
6292 else if (*s != '[' && *s != '{')
6293 PL_lex_state = LEX_INTERPEND;
6296 if (PL_expect & XFAKEBRACK) {
6297 PL_expect &= XENUMMASK;
6299 return yylex(); /* ignore fake brackets */
6301 start_force(PL_curforce);
6303 curmad('X', newSVpvn(s-1,1));
6304 CURMAD('_', PL_thiswhite);
6306 force_next(formbrack ? '.' : '}');
6307 if (formbrack) LEAVE;
6309 if (PL_madskills && !PL_thistoken)
6310 PL_thistoken = newSVpvs("");
6312 if (formbrack == 2) { /* means . where arguments were expected */
6313 start_force(PL_curforce);
6319 if (PL_expect == XPOSTDEREF) POSTDEREF('&');
6322 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6323 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6330 if (PL_expect == XOPERATOR) {
6331 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
6332 && isIDFIRST_lazy_if(s,UTF))
6334 CopLINE_dec(PL_curcop);
6335 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6336 CopLINE_inc(PL_curcop);
6338 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6339 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6343 PL_parser->saw_infix_sigil = 1;
6347 PL_tokenbuf[0] = '&';
6348 s = scan_ident(s - 1, PL_tokenbuf + 1,
6349 sizeof PL_tokenbuf - 1, TRUE);
6350 if (PL_tokenbuf[1]) {
6351 PL_expect = XOPERATOR;
6352 force_ident_maybe_lex('&');
6356 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6362 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6363 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6370 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6371 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6379 const char tmp = *s++;
6381 if (!PL_lex_allbrackets &&
6382 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6389 if (!PL_lex_allbrackets &&
6390 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
6398 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6399 && strchr("+-*/%.^&|<",tmp))
6400 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6401 "Reversed %c= operator",(int)tmp);
6403 if (PL_expect == XSTATE && isALPHA(tmp) &&
6404 (s == PL_linestart+1 || s[-2] == '\n') )
6406 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6407 || PL_lex_state != LEX_NORMAL) {
6412 if (strnEQ(s,"=cut",4)) {
6428 PL_thiswhite = newSVpvs("");
6429 sv_catpvn(PL_thiswhite, PL_linestart,
6430 PL_bufend - PL_linestart);
6434 PL_parser->in_pod = 1;
6438 if (PL_expect == XBLOCK) {
6440 #ifdef PERL_STRICT_CR
6441 while (SPACE_OR_TAB(*t))
6443 while (SPACE_OR_TAB(*t) || *t == '\r')
6446 if (*t == '\n' || *t == '#') {
6449 SAVEI8(PL_parser->form_lex_state);
6450 SAVEI32(PL_lex_formbrack);
6451 PL_parser->form_lex_state = PL_lex_state;
6452 PL_lex_formbrack = PL_lex_brackets + 1;
6456 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6465 const char tmp = *s++;
6467 /* was this !=~ where !~ was meant?
6468 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6470 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6471 const char *t = s+1;
6473 while (t < PL_bufend && isSPACE(*t))
6476 if (*t == '/' || *t == '?' ||
6477 ((*t == 'm' || *t == 's' || *t == 'y')
6478 && !isWORDCHAR(t[1])) ||
6479 (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6480 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6481 "!=~ should be !~");
6483 if (!PL_lex_allbrackets &&
6484 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6496 if (PL_expect != XOPERATOR) {
6497 if (s[1] != '<' && !strchr(s,'>'))
6500 s = scan_heredoc(s);
6502 s = scan_inputsymbol(s);
6503 PL_expect = XOPERATOR;
6504 TOKEN(sublex_start());
6510 if (*s == '=' && !PL_lex_allbrackets &&
6511 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6515 SHop(OP_LEFT_SHIFT);
6520 if (!PL_lex_allbrackets &&
6521 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6528 if (!PL_lex_allbrackets &&
6529 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6537 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6545 const char tmp = *s++;
6547 if (*s == '=' && !PL_lex_allbrackets &&
6548 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6552 SHop(OP_RIGHT_SHIFT);
6554 else if (tmp == '=') {
6555 if (!PL_lex_allbrackets &&
6556 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6564 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6573 if (PL_expect == XOPERATOR) {
6574 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6575 return deprecate_commaless_var_list();
6578 else if (PL_expect == XPOSTDEREF) {
6581 POSTDEREF(DOLSHARP);
6586 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6587 PL_tokenbuf[0] = '@';
6588 s = scan_ident(s + 1, PL_tokenbuf + 1,
6589 sizeof PL_tokenbuf - 1, FALSE);
6590 if (PL_expect == XOPERATOR)
6591 no_op("Array length", s);
6592 if (!PL_tokenbuf[1])
6594 PL_expect = XOPERATOR;
6595 force_ident_maybe_lex('#');
6599 PL_tokenbuf[0] = '$';
6600 s = scan_ident(s, PL_tokenbuf + 1,
6601 sizeof PL_tokenbuf - 1, FALSE);
6602 if (PL_expect == XOPERATOR)
6604 if (!PL_tokenbuf[1]) {
6606 yyerror("Final $ should be \\$ or $name");
6612 const char tmp = *s;
6613 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6616 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6617 && intuit_more(s)) {
6619 PL_tokenbuf[0] = '@';
6620 if (ckWARN(WARN_SYNTAX)) {
6623 while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
6626 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6627 while (t < PL_bufend && *t != ']')
6629 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6630 "Multidimensional syntax %.*s not supported",
6631 (int)((t - PL_bufptr) + 1), PL_bufptr);
6635 else if (*s == '{') {
6637 PL_tokenbuf[0] = '%';
6638 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6639 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6641 char tmpbuf[sizeof PL_tokenbuf];
6644 } while (isSPACE(*t));
6645 if (isIDFIRST_lazy_if(t,UTF)) {
6647 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6652 && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6653 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6654 "You need to quote \"%"UTF8f"\"",
6655 UTF8fARG(UTF, len, tmpbuf));
6661 PL_expect = XOPERATOR;
6662 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6663 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6664 if (!islop || PL_last_lop_op == OP_GREPSTART)
6665 PL_expect = XOPERATOR;
6666 else if (strchr("$@\"'`q", *s))
6667 PL_expect = XTERM; /* e.g. print $fh "foo" */
6668 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6669 PL_expect = XTERM; /* e.g. print $fh &sub */
6670 else if (isIDFIRST_lazy_if(s,UTF)) {
6671 char tmpbuf[sizeof PL_tokenbuf];
6673 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6674 if ((t2 = keyword(tmpbuf, len, 0))) {
6675 /* binary operators exclude handle interpretations */
6687 PL_expect = XTERM; /* e.g. print $fh length() */
6692 PL_expect = XTERM; /* e.g. print $fh subr() */
6695 else if (isDIGIT(*s))
6696 PL_expect = XTERM; /* e.g. print $fh 3 */
6697 else if (*s == '.' && isDIGIT(s[1]))
6698 PL_expect = XTERM; /* e.g. print $fh .3 */
6699 else if ((*s == '?' || *s == '-' || *s == '+')
6700 && !isSPACE(s[1]) && s[1] != '=')
6701 PL_expect = XTERM; /* e.g. print $fh -1 */
6702 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6704 PL_expect = XTERM; /* e.g. print $fh /.../
6705 XXX except DORDOR operator
6707 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6709 PL_expect = XTERM; /* print $fh <<"EOF" */
6712 force_ident_maybe_lex('$');
6716 if (PL_expect == XOPERATOR)
6718 else if (PL_expect == XPOSTDEREF) POSTDEREF('@');
6719 PL_tokenbuf[0] = '@';
6720 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6722 if (!PL_tokenbuf[1]) {
6725 if (PL_lex_state == LEX_NORMAL)
6727 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6729 PL_tokenbuf[0] = '%';
6731 /* Warn about @ where they meant $. */
6732 if (*s == '[' || *s == '{') {
6733 if (ckWARN(WARN_SYNTAX)) {
6734 S_check_scalar_slice(aTHX_ s);
6738 PL_expect = XOPERATOR;
6739 force_ident_maybe_lex('@');
6742 case '/': /* may be division, defined-or, or pattern */
6743 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6744 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6745 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6750 case '?': /* may either be conditional or pattern */
6751 if (PL_expect == XOPERATOR) {
6754 if (!PL_lex_allbrackets &&
6755 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6759 PL_lex_allbrackets++;
6765 /* A // operator. */
6766 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6767 (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6768 LEX_FAKEEOF_LOGIC)) {
6776 if (*s == '=' && !PL_lex_allbrackets &&
6777 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6786 /* Disable warning on "study /blah/" */
6787 if (PL_oldoldbufptr == PL_last_uni
6788 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6789 || memNE(PL_last_uni, "study", 5)
6790 || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
6794 deprecate("?PATTERN? without explicit operator");
6795 s = scan_pat(s,OP_MATCH);
6796 TERM(sublex_start());
6800 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6801 #ifdef PERL_STRICT_CR
6804 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6806 && (s == PL_linestart || s[-1] == '\n') )
6809 formbrack = 2; /* dot seen where arguments expected */
6812 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6816 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6819 if (!PL_lex_allbrackets &&
6820 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6827 pl_yylval.ival = OPf_SPECIAL;
6833 if (*s == '=' && !PL_lex_allbrackets &&
6834 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6841 case '0': case '1': case '2': case '3': case '4':
6842 case '5': case '6': case '7': case '8': case '9':
6843 s = scan_num(s, &pl_yylval);
6844 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6845 if (PL_expect == XOPERATOR)
6850 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
6851 COPLINE_SET_FROM_MULTI_END;
6852 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6853 if (PL_expect == XOPERATOR) {
6854 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6855 return deprecate_commaless_var_list();
6862 pl_yylval.ival = OP_CONST;
6863 TERM(sublex_start());
6866 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
6869 printbuf("### Saw string before %s\n", s);
6871 PerlIO_printf(Perl_debug_log,
6872 "### Saw unterminated string\n");
6874 if (PL_expect == XOPERATOR) {
6875 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6876 return deprecate_commaless_var_list();
6883 pl_yylval.ival = OP_CONST;
6884 /* FIXME. I think that this can be const if char *d is replaced by
6885 more localised variables. */
6886 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6887 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6888 pl_yylval.ival = OP_STRINGIFY;
6892 if (pl_yylval.ival == OP_CONST)
6893 COPLINE_SET_FROM_MULTI_END;
6894 TERM(sublex_start());
6897 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
6898 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6899 if (PL_expect == XOPERATOR)
6900 no_op("Backticks",s);
6903 pl_yylval.ival = OP_BACKTICK;
6904 TERM(sublex_start());
6908 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6910 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6912 if (PL_expect == XOPERATOR)
6913 no_op("Backslash",s);
6917 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6918 char *start = s + 2;
6919 while (isDIGIT(*start) || *start == '_')
6921 if (*start == '.' && isDIGIT(start[1])) {
6922 s = scan_num(s, &pl_yylval);
6925 else if ((*start == ':' && start[1] == ':')
6926 || (PL_expect == XSTATE && *start == ':'))
6928 else if (PL_expect == XSTATE) {
6930 while (d < PL_bufend && isSPACE(*d)) d++;
6931 if (*d == ':') goto keylookup;
6933 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6934 if (!isALPHA(*start) && (PL_expect == XTERM
6935 || PL_expect == XREF || PL_expect == XSTATE
6936 || PL_expect == XTERMORDORDOR)) {
6937 GV *const gv = gv_fetchpvn_flags(s, start - s,
6938 UTF ? SVf_UTF8 : 0, SVt_PVCV);
6940 s = scan_num(s, &pl_yylval);
6947 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
7000 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7002 /* Some keywords can be followed by any delimiter, including ':' */
7003 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
7005 /* x::* is just a word, unless x is "CORE" */
7006 if (!anydelim && *s == ':' && s[1] == ':') {
7007 if (strEQ(PL_tokenbuf, "CORE")) goto case_KEY_CORE;
7012 while (d < PL_bufend && isSPACE(*d))
7013 d++; /* no comments skipped here, or s### is misparsed */
7015 /* Is this a word before a => operator? */
7016 if (*d == '=' && d[1] == '>') {
7020 = (OP*)newSVOP(OP_CONST, 0,
7021 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7022 pl_yylval.opval->op_private = OPpCONST_BARE;
7026 /* Check for plugged-in keyword */
7030 char *saved_bufptr = PL_bufptr;
7032 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
7034 if (result == KEYWORD_PLUGIN_DECLINE) {
7035 /* not a plugged-in keyword */
7036 PL_bufptr = saved_bufptr;
7037 } else if (result == KEYWORD_PLUGIN_STMT) {
7038 pl_yylval.opval = o;
7041 return REPORT(PLUGSTMT);
7042 } else if (result == KEYWORD_PLUGIN_EXPR) {
7043 pl_yylval.opval = o;
7045 PL_expect = XOPERATOR;
7046 return REPORT(PLUGEXPR);
7048 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
7053 /* Check for built-in keyword */
7054 tmp = keyword(PL_tokenbuf, len, 0);
7056 /* Is this a label? */
7057 if (!anydelim && PL_expect == XSTATE
7058 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
7060 pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
7061 pl_yylval.pval[len] = '\0';
7062 pl_yylval.pval[len+1] = UTF ? 1 : 0;
7067 /* Check for lexical sub */
7068 if (PL_expect != XOPERATOR) {
7069 char tmpbuf[sizeof PL_tokenbuf + 1];
7071 Copy(PL_tokenbuf, tmpbuf+1, len, char);
7072 off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
7073 if (off != NOT_IN_PAD) {
7074 assert(off); /* we assume this is boolean-true below */
7075 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
7076 HV * const stash = PAD_COMPNAME_OURSTASH(off);
7077 HEK * const stashname = HvNAME_HEK(stash);
7078 sv = newSVhek(stashname);
7079 sv_catpvs(sv, "::");
7080 sv_catpvn_flags(sv, PL_tokenbuf, len,
7081 (UTF ? SV_CATUTF8 : SV_CATBYTES));
7082 gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
7092 rv2cv_op = newOP(OP_PADANY, 0);
7093 rv2cv_op->op_targ = off;
7094 cv = find_lexical_cv(off);
7102 if (tmp < 0) { /* second-class keyword? */
7103 GV *ogv = NULL; /* override (winner) */
7104 GV *hgv = NULL; /* hidden (loser) */
7105 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
7107 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7108 (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
7112 if (GvIMPORTED_CV(gv))
7114 else if (! CvMETHOD(cv))
7118 (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
7122 ? GvCVu(gv) && GvIMPORTED_CV(gv)
7123 : SvPCS_IMPORTED(gv)
7124 && (gv_init(gv, PL_globalstash, PL_tokenbuf,
7133 tmp = 0; /* overridden by import or by GLOBAL */
7136 && -tmp==KEY_lock /* XXX generalizable kludge */
7139 tmp = 0; /* any sub overrides "weak" keyword */
7141 else { /* no override */
7143 if (tmp == KEY_dump) {
7144 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
7145 "dump() better written as CORE::dump()");
7149 if (hgv && tmp != KEY_x) /* never ambiguous */
7150 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7151 "Ambiguous call resolved as CORE::%s(), "
7152 "qualify as such or use &",
7157 if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
7158 && (!anydelim || *s != '#')) {
7159 /* no override, and not s### either; skipspace is safe here
7160 * check for => on following line */
7162 STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
7163 STRLEN soff = s - SvPVX(PL_linestr);
7164 s = skipspace_flags(s, LEX_NO_INCLINE);
7165 arrow = *s == '=' && s[1] == '>';
7166 PL_bufptr = SvPVX(PL_linestr) + bufoff;
7167 s = SvPVX(PL_linestr) + soff;
7175 default: /* not a keyword */
7176 /* Trade off - by using this evil construction we can pull the
7177 variable gv into the block labelled keylookup. If not, then
7178 we have to give it function scope so that the goto from the
7179 earlier ':' case doesn't bypass the initialisation. */
7181 just_a_word_zero_gv:
7193 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7194 const char penultchar =
7195 lastchar && PL_bufptr - 2 >= PL_linestart
7199 SV *nextPL_nextwhite = 0;
7203 /* Get the rest if it looks like a package qualifier */
7205 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7207 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7210 Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
7211 UTF8fARG(UTF, len, PL_tokenbuf),
7212 *s == '\'' ? "'" : "::");
7217 if (PL_expect == XOPERATOR) {
7218 if (PL_bufptr == PL_linestart) {
7219 CopLINE_dec(PL_curcop);
7220 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7221 CopLINE_inc(PL_curcop);
7224 no_op("Bareword",s);
7227 /* Look for a subroutine with this name in current package,
7228 unless this is a lexical sub, or name is "Foo::",
7229 in which case Foo is a bareword
7230 (and a package name). */
7232 if (len > 2 && !PL_madskills &&
7233 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
7235 if (ckWARN(WARN_BAREWORD)
7236 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7237 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7238 "Bareword \"%"UTF8f"\" refers to nonexistent package",
7239 UTF8fARG(UTF, len, PL_tokenbuf));
7241 PL_tokenbuf[len] = '\0';
7247 /* Mustn't actually add anything to a symbol table.
7248 But also don't want to "initialise" any placeholder
7249 constants that might already be there into full
7250 blown PVGVs with attached PVCV. */
7251 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7252 GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
7258 /* if we saw a global override before, get the right name */
7261 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
7262 len ? len : strlen(PL_tokenbuf));
7264 SV * const tmp_sv = sv;
7265 sv = newSVpvs("CORE::GLOBAL::");
7266 sv_catsv(sv, tmp_sv);
7267 SvREFCNT_dec(tmp_sv);
7271 if (PL_madskills && !PL_thistoken) {
7272 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
7273 PL_thistoken = newSVpvn(start,s - start);
7274 PL_realtokenstart = s - SvPVX(PL_linestr);
7278 /* Presume this is going to be a bareword of some sort. */
7280 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
7281 pl_yylval.opval->op_private = OPpCONST_BARE;
7283 /* And if "Foo::", then that's what it certainly is. */
7289 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
7290 const_op->op_private = OPpCONST_BARE;
7291 rv2cv_op = newCVREF(0, const_op);
7292 cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0);
7295 /* See if it's the indirect object for a list operator. */
7297 if (PL_oldoldbufptr &&
7298 PL_oldoldbufptr < PL_bufptr &&
7299 (PL_oldoldbufptr == PL_last_lop
7300 || PL_oldoldbufptr == PL_last_uni) &&
7301 /* NO SKIPSPACE BEFORE HERE! */
7302 (PL_expect == XREF ||
7303 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
7305 bool immediate_paren = *s == '(';
7307 /* (Now we can afford to cross potential line boundary.) */
7308 s = SKIPSPACE2(s,nextPL_nextwhite);
7310 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
7313 /* Two barewords in a row may indicate method call. */
7315 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
7316 (tmp = intuit_method(s, gv, cv))) {
7318 if (tmp == METHOD && !PL_lex_allbrackets &&
7319 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7320 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7324 /* If not a declared subroutine, it's an indirect object. */
7325 /* (But it's an indir obj regardless for sort.) */
7326 /* Also, if "_" follows a filetest operator, it's a bareword */
7329 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
7331 (PL_last_lop_op != OP_MAPSTART &&
7332 PL_last_lop_op != OP_GREPSTART))))
7333 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7334 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
7337 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7342 PL_expect = XOPERATOR;
7345 s = SKIPSPACE2(s,nextPL_nextwhite);
7346 PL_nextwhite = nextPL_nextwhite;
7351 /* Is this a word before a => operator? */
7352 if (*s == '=' && s[1] == '>' && !pkgname) {
7355 /* This is our own scalar, created a few lines above,
7357 SvREADONLY_off(cSVOPx(pl_yylval.opval)->op_sv);
7358 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
7359 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
7360 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
7361 SvREADONLY_on(cSVOPx(pl_yylval.opval)->op_sv);
7365 /* If followed by a paren, it's certainly a subroutine. */
7370 while (SPACE_OR_TAB(*d))
7372 if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
7379 PL_nextwhite = PL_thiswhite;
7382 start_force(PL_curforce);
7384 NEXTVAL_NEXTTOKE.opval =
7385 off ? rv2cv_op : pl_yylval.opval;
7386 PL_expect = XOPERATOR;
7389 PL_nextwhite = nextPL_nextwhite;
7390 curmad('X', PL_thistoken);
7391 PL_thistoken = newSVpvs("");
7395 op_free(pl_yylval.opval), force_next(PRIVATEREF);
7396 else op_free(rv2cv_op), force_next(WORD);
7401 /* If followed by var or block, call it a method (unless sub) */
7403 if ((*s == '$' || *s == '{') && !cv) {
7405 PL_last_lop = PL_oldbufptr;
7406 PL_last_lop_op = OP_METHOD;
7407 if (!PL_lex_allbrackets &&
7408 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7409 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7413 /* If followed by a bareword, see if it looks like indir obj. */
7416 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
7417 && (tmp = intuit_method(s, gv, cv))) {
7419 if (tmp == METHOD && !PL_lex_allbrackets &&
7420 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7421 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7425 /* Not a method, so call it a subroutine (if defined) */
7428 if (lastchar == '-' && penultchar != '-') {
7429 const STRLEN l = len ? len : strlen(PL_tokenbuf);
7430 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7431 "Ambiguous use of -%"UTF8f" resolved as -&%"UTF8f"()",
7432 UTF8fARG(UTF, l, PL_tokenbuf),
7433 UTF8fARG(UTF, l, PL_tokenbuf));
7435 /* Check for a constant sub */
7436 if ((sv = cv_const_sv_or_av(cv))) {
7439 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7440 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7441 if (SvTYPE(sv) == SVt_PVAV)
7442 pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7445 pl_yylval.opval->op_private = 0;
7446 pl_yylval.opval->op_folded = 1;
7447 pl_yylval.opval->op_flags |= OPf_SPECIAL;
7452 op_free(pl_yylval.opval);
7454 off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7455 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7456 PL_last_lop = PL_oldbufptr;
7457 PL_last_lop_op = OP_ENTERSUB;
7458 /* Is there a prototype? */
7465 STRLEN protolen = CvPROTOLEN(cv);
7466 const char *proto = CvPROTO(cv);
7468 proto = S_strip_spaces(aTHX_ proto, &protolen);
7471 if ((optional = *proto == ';'))
7474 while (*proto == ';');
7478 *proto == '$' || *proto == '_'
7479 || *proto == '*' || *proto == '+'
7484 *proto == '\\' && proto[1] && proto[2] == '\0'
7487 UNIPROTO(UNIOPSUB,optional);
7488 if (*proto == '\\' && proto[1] == '[') {
7489 const char *p = proto + 2;
7490 while(*p && *p != ']')
7492 if(*p == ']' && !p[1])
7493 UNIPROTO(UNIOPSUB,optional);
7495 if (*proto == '&' && *s == '{') {
7497 sv_setpvs(PL_subname, "__ANON__");
7499 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7500 if (!PL_lex_allbrackets &&
7501 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7502 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7509 PL_nextwhite = PL_thiswhite;
7512 start_force(PL_curforce);
7513 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7516 PL_nextwhite = nextPL_nextwhite;
7517 curmad('X', PL_thistoken);
7518 PL_thistoken = newSVpvs("");
7520 force_next(off ? PRIVATEREF : WORD);
7521 if (!PL_lex_allbrackets &&
7522 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7523 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7528 /* Guess harder when madskills require "best effort". */
7529 if (PL_madskills && (!gv || !GvCVu(gv))) {
7530 int probable_sub = 0;
7531 if (strchr("\"'`$@%0123456789!*+{[<", *s))
7533 else if (isALPHA(*s)) {
7537 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
7538 if (!keyword(tmpbuf, tmplen, 0))
7541 while (d < PL_bufend && isSPACE(*d))
7543 if (*d == '=' && d[1] == '>')
7548 gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
7550 op_free(pl_yylval.opval);
7552 off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7553 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7554 PL_last_lop = PL_oldbufptr;
7555 PL_last_lop_op = OP_ENTERSUB;
7556 PL_nextwhite = PL_thiswhite;
7558 start_force(PL_curforce);
7559 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7561 PL_nextwhite = nextPL_nextwhite;
7562 curmad('X', PL_thistoken);
7563 PL_thistoken = newSVpvs("");
7564 force_next(off ? PRIVATEREF : WORD);
7565 if (!PL_lex_allbrackets &&
7566 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7567 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7571 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7573 force_next(off ? PRIVATEREF : WORD);
7574 if (!PL_lex_allbrackets &&
7575 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7576 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7581 /* Call it a bare word */
7583 if (PL_hints & HINT_STRICT_SUBS)
7584 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7587 /* after "print" and similar functions (corresponding to
7588 * "F? L" in opcode.pl), whatever wasn't already parsed as
7589 * a filehandle should be subject to "strict subs".
7590 * Likewise for the optional indirect-object argument to system
7591 * or exec, which can't be a bareword */
7592 if ((PL_last_lop_op == OP_PRINT
7593 || PL_last_lop_op == OP_PRTF
7594 || PL_last_lop_op == OP_SAY
7595 || PL_last_lop_op == OP_SYSTEM
7596 || PL_last_lop_op == OP_EXEC)
7597 && (PL_hints & HINT_STRICT_SUBS))
7598 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7599 if (lastchar != '-') {
7600 if (ckWARN(WARN_RESERVED)) {
7604 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7606 /* PL_warn_reserved is constant */
7607 GCC_DIAG_IGNORE(-Wformat-nonliteral);
7608 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7618 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7619 && saw_infix_sigil) {
7620 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7621 "Operator or semicolon missing before %c%"UTF8f,
7623 UTF8fARG(UTF, strlen(PL_tokenbuf),
7625 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7626 "Ambiguous use of %c resolved as operator %c",
7627 lastchar, lastchar);
7634 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7639 (OP*)newSVOP(OP_CONST, 0,
7640 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7643 case KEY___PACKAGE__:
7645 (OP*)newSVOP(OP_CONST, 0,
7647 ? newSVhek(HvNAME_HEK(PL_curstash))
7654 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7655 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
7658 gv = (GV *)*hv_fetchs(stash, "DATA", 1);
7660 gv_init(gv,stash,"DATA",4,0);
7663 GvIOp(gv) = newIO();
7664 IoIFP(GvIOp(gv)) = PL_rsfp;
7665 #if defined(HAS_FCNTL) && defined(F_SETFD)
7667 const int fd = PerlIO_fileno(PL_rsfp);
7668 fcntl(fd,F_SETFD,fd >= 3);
7671 /* Mark this internal pseudo-handle as clean */
7672 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7673 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7674 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7676 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7677 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7678 /* if the script was opened in binmode, we need to revert
7679 * it to text mode for compatibility; but only iff it has CRs
7680 * XXX this is a questionable hack at best. */
7681 if (PL_bufend-PL_bufptr > 2
7682 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7685 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7686 loc = PerlIO_tell(PL_rsfp);
7687 (void)PerlIO_seek(PL_rsfp, 0L, 0);
7690 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7692 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7693 #endif /* NETWARE */
7695 PerlIO_seek(PL_rsfp, loc, 0);
7699 #ifdef PERLIO_LAYERS
7702 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7703 else if (PL_encoding) {
7709 XPUSHs(PL_encoding);
7711 call_method("name", G_SCALAR);
7715 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7716 Perl_form(aTHX_ ":encoding(%"SVf")",
7725 if (PL_realtokenstart >= 0) {
7726 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7728 PL_endwhite = newSVpvs("");
7729 sv_catsv(PL_endwhite, PL_thiswhite);
7731 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7732 PL_realtokenstart = -1;
7734 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7744 FUN0OP(newPVOP(OP_RUNCV,0,NULL));
7753 if (PL_expect == XSTATE) {
7764 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7765 if ((*s == ':' && s[1] == ':')
7766 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7770 Copy(PL_bufptr, PL_tokenbuf, olen, char);
7774 Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
7775 UTF8fARG(UTF, len, PL_tokenbuf));
7778 else if (tmp == KEY_require || tmp == KEY_do
7780 /* that's a way to remember we saw "CORE::" */
7792 LOP(OP_ACCEPT,XTERM);
7795 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7800 LOP(OP_ATAN2,XTERM);
7806 LOP(OP_BINMODE,XTERM);
7809 LOP(OP_BLESS,XTERM);
7818 /* We have to disambiguate the two senses of
7819 "continue". If the next token is a '{' then
7820 treat it as the start of a continue block;
7821 otherwise treat it as a control operator.
7831 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7841 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7850 if (!PL_cryptseen) {
7851 PL_cryptseen = TRUE;
7855 LOP(OP_CRYPT,XTERM);
7858 LOP(OP_CHMOD,XTERM);
7861 LOP(OP_CHOWN,XTERM);
7864 LOP(OP_CONNECT,XTERM);
7884 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7886 if (len && (len != 4 || strNE(PL_tokenbuf+1, "CORE"))
7887 && !keyword(PL_tokenbuf + 1, len, 0)) {
7890 force_ident_maybe_lex('&');
7895 if (orig_keyword == KEY_do) {
7904 PL_hints |= HINT_BLOCK_SCOPE;
7914 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7915 STR_WITH_LEN("NDBM_File::"),
7916 STR_WITH_LEN("DB_File::"),
7917 STR_WITH_LEN("GDBM_File::"),
7918 STR_WITH_LEN("SDBM_File::"),
7919 STR_WITH_LEN("ODBM_File::"),
7921 LOP(OP_DBMOPEN,XTERM);
7927 PL_expect = XOPERATOR;
7928 s = force_word(s,WORD,TRUE,FALSE);
7935 pl_yylval.ival = CopLINE(PL_curcop);
7939 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7953 if (*s == '{') { /* block eval */
7954 PL_expect = XTERMBLOCK;
7955 UNIBRACK(OP_ENTERTRY);
7957 else { /* string eval */
7959 UNIBRACK(OP_ENTEREVAL);
7964 UNIBRACK(-OP_ENTEREVAL);
7978 case KEY_endhostent:
7984 case KEY_endservent:
7987 case KEY_endprotoent:
7998 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8000 pl_yylval.ival = CopLINE(PL_curcop);
8002 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
8005 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
8008 if ((PL_bufend - p) >= 3 &&
8009 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
8011 else if ((PL_bufend - p) >= 4 &&
8012 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
8015 /* skip optional package name, as in "for my abc $x (..)" */
8016 if (isIDFIRST_lazy_if(p,UTF)) {
8017 p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8021 Perl_croak(aTHX_ "Missing $ on loop variable");
8023 s = SvPVX(PL_linestr) + soff;
8029 LOP(OP_FORMLINE,XTERM);
8038 LOP(OP_FCNTL,XTERM);
8044 LOP(OP_FLOCK,XTERM);
8047 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8052 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8057 LOP(OP_GREPSTART, XREF);
8060 PL_expect = XOPERATOR;
8061 s = force_word(s,WORD,TRUE,FALSE);
8076 case KEY_getpriority:
8077 LOP(OP_GETPRIORITY,XTERM);
8079 case KEY_getprotobyname:
8082 case KEY_getprotobynumber:
8083 LOP(OP_GPBYNUMBER,XTERM);
8085 case KEY_getprotoent:
8097 case KEY_getpeername:
8098 UNI(OP_GETPEERNAME);
8100 case KEY_gethostbyname:
8103 case KEY_gethostbyaddr:
8104 LOP(OP_GHBYADDR,XTERM);
8106 case KEY_gethostent:
8109 case KEY_getnetbyname:
8112 case KEY_getnetbyaddr:
8113 LOP(OP_GNBYADDR,XTERM);
8118 case KEY_getservbyname:
8119 LOP(OP_GSBYNAME,XTERM);
8121 case KEY_getservbyport:
8122 LOP(OP_GSBYPORT,XTERM);
8124 case KEY_getservent:
8127 case KEY_getsockname:
8128 UNI(OP_GETSOCKNAME);
8130 case KEY_getsockopt:
8131 LOP(OP_GSOCKOPT,XTERM);
8146 pl_yylval.ival = CopLINE(PL_curcop);
8147 Perl_ck_warner_d(aTHX_
8148 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8149 "given is experimental");
8154 orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB,
8162 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8164 pl_yylval.ival = CopLINE(PL_curcop);
8168 LOP(OP_INDEX,XTERM);
8174 LOP(OP_IOCTL,XTERM);
8186 PL_expect = XOPERATOR;
8187 s = force_word(s,WORD,TRUE,FALSE);
8204 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8209 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8223 LOP(OP_LISTEN,XTERM);
8232 s = scan_pat(s,OP_MATCH);
8233 TERM(sublex_start());
8236 LOP(OP_MAPSTART, XREF);
8239 LOP(OP_MKDIR,XTERM);
8242 LOP(OP_MSGCTL,XTERM);
8245 LOP(OP_MSGGET,XTERM);
8248 LOP(OP_MSGRCV,XTERM);
8251 LOP(OP_MSGSND,XTERM);
8256 PL_in_my = (U16)tmp;
8258 if (isIDFIRST_lazy_if(s,UTF)) {
8262 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8263 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
8265 if (!FEATURE_LEXSUBS_IS_ENABLED)
8267 "Experimental \"%s\" subs not enabled",
8268 tmp == KEY_my ? "my" :
8269 tmp == KEY_state ? "state" : "our");
8270 Perl_ck_warner_d(aTHX_
8271 packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
8272 "The lexical_subs feature is experimental");
8275 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
8276 if (!PL_in_my_stash) {
8279 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
8280 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
8283 if (PL_madskills) { /* just add type to declarator token */
8284 sv_catsv(PL_thistoken, PL_nextwhite);
8286 sv_catpvn(PL_thistoken, start, s - start);
8294 PL_expect = XOPERATOR;
8295 s = force_word(s,WORD,TRUE,FALSE);
8299 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8304 s = tokenize_use(0, s);
8308 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
8311 if (!PL_lex_allbrackets &&
8312 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8313 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8319 if (isIDFIRST_lazy_if(s,UTF)) {
8321 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
8323 for (t=d; isSPACE(*t);)
8325 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8327 && !(t[0] == '=' && t[1] == '>')
8328 && !(t[0] == ':' && t[1] == ':')
8329 && !keyword(s, d-s, 0)
8331 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8332 "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
8333 UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8339 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8341 pl_yylval.ival = OP_OR;
8351 LOP(OP_OPEN_DIR,XTERM);
8354 checkcomma(s,PL_tokenbuf,"filehandle");
8358 checkcomma(s,PL_tokenbuf,"filehandle");
8377 s = force_word(s,WORD,FALSE,TRUE);
8379 s = force_strict_version(s);
8380 PL_lex_expect = XBLOCK;
8384 LOP(OP_PIPE_OP,XTERM);
8387 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8388 COPLINE_SET_FROM_MULTI_END;
8391 pl_yylval.ival = OP_CONST;
8392 TERM(sublex_start());
8399 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8400 COPLINE_SET_FROM_MULTI_END;
8403 PL_expect = XOPERATOR;
8404 if (SvCUR(PL_lex_stuff)) {
8405 int warned_comma = !ckWARN(WARN_QW);
8406 int warned_comment = warned_comma;
8407 d = SvPV_force(PL_lex_stuff, len);
8409 for (; isSPACE(*d) && len; --len, ++d)
8414 if (!warned_comma || !warned_comment) {
8415 for (; !isSPACE(*d) && len; --len, ++d) {
8416 if (!warned_comma && *d == ',') {
8417 Perl_warner(aTHX_ packWARN(WARN_QW),
8418 "Possible attempt to separate words with commas");
8421 else if (!warned_comment && *d == '#') {
8422 Perl_warner(aTHX_ packWARN(WARN_QW),
8423 "Possible attempt to put comments in qw() list");
8429 for (; !isSPACE(*d) && len; --len, ++d)
8432 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8433 words = op_append_elem(OP_LIST, words,
8434 newSVOP(OP_CONST, 0, tokeq(sv)));
8439 words = newNULLLIST();
8441 SvREFCNT_dec(PL_lex_stuff);
8442 PL_lex_stuff = NULL;
8444 PL_expect = XOPERATOR;
8445 pl_yylval.opval = sawparens(words);
8450 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8453 pl_yylval.ival = OP_STRINGIFY;
8454 if (SvIVX(PL_lex_stuff) == '\'')
8455 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
8456 TERM(sublex_start());
8459 s = scan_pat(s,OP_QR);
8460 TERM(sublex_start());
8463 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8466 pl_yylval.ival = OP_BACKTICK;
8467 TERM(sublex_start());
8474 PL_expect = XOPERATOR;
8476 s = force_version(s, FALSE);
8478 else if (*s != 'v' || !isDIGIT(s[1])
8479 || (s = force_version(s, TRUE), *s == 'v'))
8481 *PL_tokenbuf = '\0';
8482 s = force_word(s,WORD,TRUE,TRUE);
8483 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
8484 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8485 GV_ADD | (UTF ? SVf_UTF8 : 0));
8487 yyerror("<> should be quotes");
8489 if (orig_keyword == KEY_require) {
8497 PL_last_uni = PL_oldbufptr;
8498 PL_last_lop_op = OP_REQUIRE;
8500 return REPORT( (int)REQUIRE );
8506 PL_expect = XOPERATOR;
8507 s = force_word(s,WORD,TRUE,FALSE);
8511 LOP(OP_RENAME,XTERM);
8520 LOP(OP_RINDEX,XTERM);
8529 UNIDOR(OP_READLINE);
8532 UNIDOR(OP_BACKTICK);
8541 LOP(OP_REVERSE,XTERM);
8544 UNIDOR(OP_READLINK);
8551 if (pl_yylval.opval)
8552 TERM(sublex_start());
8554 TOKEN(1); /* force error */
8557 checkcomma(s,PL_tokenbuf,"filehandle");
8567 LOP(OP_SELECT,XTERM);
8573 LOP(OP_SEMCTL,XTERM);
8576 LOP(OP_SEMGET,XTERM);
8579 LOP(OP_SEMOP,XTERM);
8585 LOP(OP_SETPGRP,XTERM);
8587 case KEY_setpriority:
8588 LOP(OP_SETPRIORITY,XTERM);
8590 case KEY_sethostent:
8596 case KEY_setservent:
8599 case KEY_setprotoent:
8609 LOP(OP_SEEKDIR,XTERM);
8611 case KEY_setsockopt:
8612 LOP(OP_SSOCKOPT,XTERM);
8618 LOP(OP_SHMCTL,XTERM);
8621 LOP(OP_SHMGET,XTERM);
8624 LOP(OP_SHMREAD,XTERM);
8627 LOP(OP_SHMWRITE,XTERM);
8630 LOP(OP_SHUTDOWN,XTERM);
8639 LOP(OP_SOCKET,XTERM);
8641 case KEY_socketpair:
8642 LOP(OP_SOCKPAIR,XTERM);
8645 checkcomma(s,PL_tokenbuf,"subroutine name");
8648 s = force_word(s,WORD,TRUE,TRUE);
8652 LOP(OP_SPLIT,XTERM);
8655 LOP(OP_SPRINTF,XTERM);
8658 LOP(OP_SPLICE,XTERM);
8673 LOP(OP_SUBSTR,XTERM);
8679 char * const tmpbuf = PL_tokenbuf + 1;
8680 expectation attrful;
8681 bool have_name, have_proto;
8682 const int key = tmp;
8684 SV *format_name = NULL;
8690 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
8691 SV *subtoken = PL_madskills
8692 ? newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr))
8697 s = SKIPSPACE2(s,tmpwhite);
8703 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
8704 (*s == ':' && s[1] == ':'))
8707 SV *nametoke = NULL;
8711 attrful = XATTRBLOCK;
8712 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8716 nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
8718 if (key == KEY_format)
8719 format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
8722 if (memchr(tmpbuf, ':', len) || key != KEY_sub
8724 PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
8726 sv_setpvn(PL_subname, tmpbuf, len);
8728 sv_setsv(PL_subname,PL_curstname);
8729 sv_catpvs(PL_subname,"::");
8730 sv_catpvn(PL_subname,tmpbuf,len);
8732 if (SvUTF8(PL_linestr))
8733 SvUTF8_on(PL_subname);
8739 CURMAD('X', nametoke);
8740 CURMAD('_', tmpwhite);
8741 force_ident_maybe_lex('&');
8743 s = SKIPSPACE2(d,tmpwhite);
8749 if (key == KEY_my || key == KEY_our || key==KEY_state)
8752 /* diag_listed_as: Missing name in "%s sub" */
8754 "Missing name in \"%s\"", PL_bufptr);
8756 PL_expect = XTERMBLOCK;
8757 attrful = XATTRTERM;
8758 sv_setpvs(PL_subname,"?");
8762 if (key == KEY_format) {
8764 PL_thistoken = subtoken;
8768 start_force(PL_curforce);
8769 NEXTVAL_NEXTTOKE.opval
8770 = (OP*)newSVOP(OP_CONST,0, format_name);
8771 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
8778 /* Look for a prototype */
8779 if (*s == '(' && !FEATURE_SIGNATURES_IS_ENABLED) {
8780 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8781 COPLINE_SET_FROM_MULTI_END;
8783 Perl_croak(aTHX_ "Prototype not terminated");
8784 (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
8789 CURMAD('q', PL_thisopen);
8790 CURMAD('_', tmpwhite);
8791 CURMAD('=', PL_thisstuff);
8792 CURMAD('Q', PL_thisclose);
8793 NEXTVAL_NEXTTOKE.opval =
8794 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8795 PL_lex_stuff = NULL;
8798 s = SKIPSPACE2(s,tmpwhite);
8806 if (*s == ':' && s[1] != ':')
8807 PL_expect = attrful;
8808 else if ((*s != '{' && *s != '(') && key == KEY_sub) {
8810 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8811 else if (*s != ';' && *s != '}')
8812 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8819 curmad('^', newSVpvs(""));
8820 CURMAD('_', tmpwhite);
8824 PL_thistoken = subtoken;
8825 PERL_UNUSED_VAR(have_proto);
8828 NEXTVAL_NEXTTOKE.opval =
8829 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8830 PL_lex_stuff = NULL;
8836 sv_setpvs(PL_subname, "__ANON__");
8838 sv_setpvs(PL_subname, "__ANON__::__ANON__");
8842 force_ident_maybe_lex('&');
8848 LOP(OP_SYSTEM,XREF);
8851 LOP(OP_SYMLINK,XTERM);
8854 LOP(OP_SYSCALL,XTERM);
8857 LOP(OP_SYSOPEN,XTERM);
8860 LOP(OP_SYSSEEK,XTERM);
8863 LOP(OP_SYSREAD,XTERM);
8866 LOP(OP_SYSWRITE,XTERM);
8871 TERM(sublex_start());
8892 LOP(OP_TRUNCATE,XTERM);
8904 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8906 pl_yylval.ival = CopLINE(PL_curcop);
8910 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8912 pl_yylval.ival = CopLINE(PL_curcop);
8916 LOP(OP_UNLINK,XTERM);
8922 LOP(OP_UNPACK,XTERM);
8925 LOP(OP_UTIME,XTERM);
8931 LOP(OP_UNSHIFT,XTERM);
8934 s = tokenize_use(1, s);
8944 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8946 pl_yylval.ival = CopLINE(PL_curcop);
8947 Perl_ck_warner_d(aTHX_
8948 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8949 "when is experimental");
8953 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8955 pl_yylval.ival = CopLINE(PL_curcop);
8959 PL_hints |= HINT_BLOCK_SCOPE;
8966 LOP(OP_WAITPID,XTERM);
8972 /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8973 * we use the same number on EBCDIC */
8974 gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8978 if (PL_expect == XOPERATOR) {
8979 if (*s == '=' && !PL_lex_allbrackets &&
8980 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8988 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8990 pl_yylval.ival = OP_XOR;
8999 Looks up an identifier in the pad or in a package
9002 PRIVATEREF if this is a lexical name.
9003 WORD if this belongs to a package.
9006 if we're in a my declaration
9007 croak if they tried to say my($foo::bar)
9008 build the ops for a my() declaration
9009 if it's an access to a my() variable
9010 build ops for access to a my() variable
9011 if in a dq string, and they've said @foo and we can't find @foo
9013 build ops for a bareword
9017 S_pending_ident(pTHX)
9021 const char pit = (char)pl_yylval.ival;
9022 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
9023 /* All routes through this function want to know if there is a colon. */
9024 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
9026 DEBUG_T({ PerlIO_printf(Perl_debug_log,
9027 "### Pending identifier '%s'\n", PL_tokenbuf); });
9029 /* if we're in a my(), we can't allow dynamics here.
9030 $foo'bar has already been turned into $foo::bar, so
9031 just check for colons.
9033 if it's a legal name, the OP is a PADANY.
9036 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9038 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
9039 "variable %s in \"our\"",
9040 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
9041 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
9045 /* PL_no_myglob is constant */
9046 GCC_DIAG_IGNORE(-Wformat-nonliteral);
9047 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
9048 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
9049 UTF ? SVf_UTF8 : 0);
9053 pl_yylval.opval = newOP(OP_PADANY, 0);
9054 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
9055 UTF ? SVf_UTF8 : 0);
9061 build the ops for accesses to a my() variable.
9066 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9067 UTF ? SVf_UTF8 : 0);
9068 if (tmp != NOT_IN_PAD) {
9069 /* might be an "our" variable" */
9070 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9071 /* build ops for a bareword */
9072 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9073 HEK * const stashname = HvNAME_HEK(stash);
9074 SV * const sym = newSVhek(stashname);
9075 sv_catpvs(sym, "::");
9076 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9077 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
9078 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9082 ? (GV_ADDMULTI | GV_ADDINEVAL)
9085 ((PL_tokenbuf[0] == '$') ? SVt_PV
9086 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9091 pl_yylval.opval = newOP(OP_PADANY, 0);
9092 pl_yylval.opval->op_targ = tmp;
9098 Whine if they've said @foo in a doublequoted string,
9099 and @foo isn't a variable we can find in the symbol
9102 if (ckWARN(WARN_AMBIGUOUS) &&
9103 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
9104 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
9105 ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
9106 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9107 /* DO NOT warn for @- and @+ */
9108 && !( PL_tokenbuf[2] == '\0' &&
9109 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
9112 /* Downgraded from fatal to warning 20000522 mjd */
9113 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9114 "Possible unintended interpolation of %"UTF8f
9116 UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9120 /* build ops for a bareword */
9121 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
9122 newSVpvn_flags(PL_tokenbuf + 1,
9124 UTF ? SVf_UTF8 : 0 ));
9125 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9127 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
9128 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
9129 | ( UTF ? SVf_UTF8 : 0 ),
9130 ((PL_tokenbuf[0] == '$') ? SVt_PV
9131 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9137 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9141 PERL_ARGS_ASSERT_CHECKCOMMA;
9143 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
9144 if (ckWARN(WARN_SYNTAX)) {
9147 for (w = s+2; *w && level; w++) {
9155 /* the list of chars below is for end of statements or
9156 * block / parens, boolean operators (&&, ||, //) and branch
9157 * constructs (or, and, if, until, unless, while, err, for).
9158 * Not a very solid hack... */
9159 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9160 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9161 "%s (...) interpreted as function",name);
9164 while (s < PL_bufend && isSPACE(*s))
9168 while (s < PL_bufend && isSPACE(*s))
9170 if (isIDFIRST_lazy_if(s,UTF)) {
9171 const char * const w = s;
9172 s += UTF ? UTF8SKIP(s) : 1;
9173 while (isWORDCHAR_lazy_if(s,UTF))
9174 s += UTF ? UTF8SKIP(s) : 1;
9175 while (s < PL_bufend && isSPACE(*s))
9179 if (keyword(w, s - w, 0))
9182 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9183 if (gv && GvCVu(gv))
9185 Perl_croak(aTHX_ "No comma allowed after %s", what);
9190 /* S_new_constant(): do any overload::constant lookup.
9192 Either returns sv, or mortalizes/frees sv and returns a new SV*.
9193 Best used as sv=new_constant(..., sv, ...).
9194 If s, pv are NULL, calls subroutine with one argument,
9195 and <type> is used with error messages only.
9196 <type> is assumed to be well formed UTF-8 */
9199 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9200 SV *sv, SV *pv, const char *type, STRLEN typelen)
9203 HV * table = GvHV(PL_hintgv); /* ^H */
9208 const char *why1 = "", *why2 = "", *why3 = "";
9210 PERL_ARGS_ASSERT_NEW_CONSTANT;
9211 /* We assume that this is true: */
9212 if (*key == 'c') { assert (strEQ(key, "charnames")); }
9215 /* charnames doesn't work well if there have been errors found */
9216 if (PL_error_count > 0 && *key == 'c')
9218 SvREFCNT_dec_NN(sv);
9219 return &PL_sv_undef;
9222 sv_2mortal(sv); /* Parent created it permanently */
9224 || ! (PL_hints & HINT_LOCALIZE_HH)
9225 || ! (cvp = hv_fetch(table, key, keylen, FALSE))
9230 /* Here haven't found what we're looking for. If it is charnames,
9231 * perhaps it needs to be loaded. Try doing that before giving up */
9233 Perl_load_module(aTHX_
9235 newSVpvs("_charnames"),
9236 /* version parameter; no need to specify it, as if
9237 * we get too early a version, will fail anyway,
9238 * not being able to find '_charnames' */
9243 assert(sp == PL_stack_sp);
9244 table = GvHV(PL_hintgv);
9246 && (PL_hints & HINT_LOCALIZE_HH)
9247 && (cvp = hv_fetch(table, key, keylen, FALSE))
9253 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9254 msg = Perl_form(aTHX_
9255 "Constant(%.*s) unknown",
9256 (int)(type ? typelen : len),
9262 why3 = "} is not defined";
9265 msg = Perl_form(aTHX_
9266 /* The +3 is for '\N{'; -4 for that, plus '}' */
9267 "Unknown charname '%.*s'", (int)typelen - 4, type + 3
9271 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
9272 (int)(type ? typelen : len),
9273 (type ? type: s), why1, why2, why3);
9276 yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9277 return SvREFCNT_inc_simple_NN(sv);
9282 pv = newSVpvn_flags(s, len, SVs_TEMP);
9284 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9286 typesv = &PL_sv_undef;
9288 PUSHSTACKi(PERLSI_OVERLOAD);
9300 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9304 /* Check the eval first */
9305 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9307 const char * errstr;
9308 sv_catpvs(errsv, "Propagated");
9309 errstr = SvPV_const(errsv, errlen);
9310 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9312 res = SvREFCNT_inc_simple_NN(sv);
9316 SvREFCNT_inc_simple_void_NN(res);
9325 why1 = "Call to &{$^H{";
9327 why3 = "}} did not return a defined value";
9329 (void)sv_2mortal(sv);
9336 PERL_STATIC_INLINE void
9337 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) {
9339 PERL_ARGS_ASSERT_PARSE_IDENT;
9343 Perl_croak(aTHX_ "%s", ident_too_long);
9344 if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
9345 /* The UTF-8 case must come first, otherwise things
9346 * like c\N{COMBINING TILDE} would start failing, as the
9347 * isWORDCHAR_A case below would gobble the 'c' up.
9350 char *t = *s + UTF8SKIP(*s);
9351 while (isIDCONT_utf8((U8*)t))
9353 if (*d + (t - *s) > e)
9354 Perl_croak(aTHX_ "%s", ident_too_long);
9355 Copy(*s, *d, t - *s, char);
9359 else if ( isWORDCHAR_A(**s) ) {
9362 } while (isWORDCHAR_A(**s) && *d < e);
9364 else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
9369 else if (allow_package && **s == ':' && (*s)[1] == ':'
9370 /* Disallow things like Foo::$bar. For the curious, this is
9371 * the code path that triggers the "Bad name after" warning
9372 * when looking for barewords.
9374 && (*s)[2] != '$') {
9384 /* Returns a NUL terminated string, with the length of the string written to
9388 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9392 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9393 bool is_utf8 = cBOOL(UTF);
9395 PERL_ARGS_ASSERT_SCAN_WORD;
9397 parse_ident(&s, &d, e, allow_package, is_utf8);
9404 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
9407 I32 herelines = PL_parser->herelines;
9408 SSize_t bracket = -1;
9411 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9412 bool is_utf8 = cBOOL(UTF);
9413 I32 orig_copline = 0, tmp_copline = 0;
9415 PERL_ARGS_ASSERT_SCAN_IDENT;
9420 while (isDIGIT(*s)) {
9422 Perl_croak(aTHX_ "%s", ident_too_long);
9427 parse_ident(&s, &d, e, 1, is_utf8);
9432 /* Either a digit variable, or parse_ident() found an identifier
9433 (anything valid as a bareword), so job done and return. */
9434 if (PL_lex_state != LEX_NORMAL)
9435 PL_lex_state = LEX_INTERPENDMAYBE;
9438 if (*s == '$' && s[1] &&
9439 (isIDFIRST_lazy_if(s+1,is_utf8)
9440 || isDIGIT_A((U8)s[1])
9443 || strnEQ(s+1,"::",2)) )
9445 /* Dereferencing a value in a scalar variable.
9446 The alternatives are different syntaxes for a scalar variable.
9447 Using ' as a leading package separator isn't allowed. :: is. */
9450 /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */
9452 bracket = s - SvPVX(PL_linestr);
9454 orig_copline = CopLINE(PL_curcop);
9455 if (s < PL_bufend && isSPACE(*s)) {
9460 /* Is the byte 'd' a legal single character identifier name? 'u' is true
9461 * iff Unicode semantics are to be used. The legal ones are any of:
9463 * b) ASCII punctuation
9464 * c) When not under Unicode rules, any upper Latin1 character
9465 * d) \c?, \c\, \c^, \c_, and \cA..\cZ, minus the ones that have traditionally
9466 * been matched by \s on ASCII platforms. That is: \c?, plus 1-32, minus
9468 #define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d)) \
9469 || isDIGIT_A((U8)(d)) \
9470 || (!(u) && !isASCII((U8)(d))) \
9471 || ((((U8)(d)) < 32) \
9472 && (((((U8)(d)) >= 14) \
9473 || (((U8)(d)) <= 8 && (d) != 0) \
9474 || (((U8)(d)) == 13)))) \
9475 || (((U8)(d)) == toCTRL('?')))
9477 && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8)))
9479 if ( isCNTRL_A((U8)*s) ) {
9480 deprecate("literal control characters in variable names");
9484 const STRLEN skip = UTF8SKIP(s);
9487 for ( i = 0; i < skip; i++ )
9495 /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
9496 if (*d == '^' && *s && isCONTROLVAR(*s)) {
9500 /* Warn about ambiguous code after unary operators if {...} notation isn't
9501 used. There's no difference in ambiguity; it's merely a heuristic
9502 about when not to warn. */
9503 else if (ck_uni && bracket == -1)
9505 if (bracket != -1) {
9506 /* If we were processing {...} notation then... */
9507 if (isIDFIRST_lazy_if(d,is_utf8)) {
9508 /* if it starts as a valid identifier, assume that it is one.
9509 (the later check for } being at the expected point will trap
9510 cases where this doesn't pan out.) */
9511 d += is_utf8 ? UTF8SKIP(d) : 1;
9512 parse_ident(&s, &d, e, 1, is_utf8);
9514 tmp_copline = CopLINE(PL_curcop);
9515 if (s < PL_bufend && isSPACE(*s)) {
9518 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9519 /* ${foo[0]} and ${foo{bar}} notation. */
9520 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9521 const char * const brack =
9523 ((*s == '[') ? "[...]" : "{...}");
9524 orig_copline = CopLINE(PL_curcop);
9525 CopLINE_set(PL_curcop, tmp_copline);
9526 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9527 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9528 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9529 funny, dest, brack, funny, dest, brack);
9530 CopLINE_set(PL_curcop, orig_copline);
9533 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9534 PL_lex_allbrackets++;
9538 /* Handle extended ${^Foo} variables
9539 * 1999-02-27 mjd-perl-patch@plover.com */
9540 else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
9544 while (isWORDCHAR(*s) && d < e) {
9548 Perl_croak(aTHX_ "%s", ident_too_long);
9553 tmp_copline = CopLINE(PL_curcop);
9554 if (s < PL_bufend && isSPACE(*s)) {
9558 /* Expect to find a closing } after consuming any trailing whitespace.
9562 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9563 PL_lex_state = LEX_INTERPEND;
9566 if (PL_lex_state == LEX_NORMAL) {
9567 if (ckWARN(WARN_AMBIGUOUS) &&
9568 (keyword(dest, d - dest, 0)
9569 || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0)))
9571 SV *tmp = newSVpvn_flags( dest, d - dest,
9572 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
9575 orig_copline = CopLINE(PL_curcop);
9576 CopLINE_set(PL_curcop, tmp_copline);
9577 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9578 "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9579 funny, tmp, funny, tmp);
9580 CopLINE_set(PL_curcop, orig_copline);
9585 /* Didn't find the closing } at the point we expected, so restore
9586 state such that the next thing to process is the opening { and */
9587 s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
9588 CopLINE_set(PL_curcop, orig_copline);
9589 PL_parser->herelines = herelines;
9593 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9594 PL_lex_state = LEX_INTERPEND;
9599 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
9601 /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
9602 * the parse starting at 's', based on the subset that are valid in this
9603 * context input to this routine in 'valid_flags'. Advances s. Returns
9604 * TRUE if the input should be treated as a valid flag, so the next char
9605 * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
9606 * first call on the current regex. This routine will set it to any
9607 * charset modifier found. The caller shouldn't change it. This way,
9608 * another charset modifier encountered in the parse can be detected as an
9609 * error, as we have decided to allow only one */
9612 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9614 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9615 if (isWORDCHAR_lazy_if(*s, UTF)) {
9616 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9617 UTF ? SVf_UTF8 : 0);
9619 /* Pretend that it worked, so will continue processing before
9628 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
9629 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
9630 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
9631 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
9632 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
9633 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9634 case LOCALE_PAT_MOD:
9636 goto multiple_charsets;
9638 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9641 case UNICODE_PAT_MOD:
9643 goto multiple_charsets;
9645 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9648 case ASCII_RESTRICT_PAT_MOD:
9650 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9654 /* Error if previous modifier wasn't an 'a', but if it was, see
9655 * if, and accept, a second occurrence (only) */
9657 || get_regex_charset(*pmfl)
9658 != REGEX_ASCII_RESTRICTED_CHARSET)
9660 goto multiple_charsets;
9662 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9666 case DEPENDS_PAT_MOD:
9668 goto multiple_charsets;
9670 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9679 if (*charset != c) {
9680 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9682 else if (c == 'a') {
9683 /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
9684 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9687 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9690 /* Pretend that it worked, so will continue processing before dieing */
9696 S_scan_pat(pTHX_ char *start, I32 type)
9701 const char * const valid_flags =
9702 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9703 char charset = '\0'; /* character set modifier */
9708 PERL_ARGS_ASSERT_SCAN_PAT;
9710 s = scan_str(start,!!PL_madskills,FALSE, (PL_in_eval & EVAL_RE_REPARSING),
9711 TRUE /* look for escaped bracketed metas */, NULL);
9714 const char * const delimiter = skipspace(start);
9718 ? "Search pattern not terminated or ternary operator parsed as search pattern"
9719 : "Search pattern not terminated" ));
9722 pm = (PMOP*)newPMOP(type, 0);
9723 if (PL_multi_open == '?') {
9724 /* This is the only point in the code that sets PMf_ONCE: */
9725 pm->op_pmflags |= PMf_ONCE;
9727 /* Hence it's safe to do this bit of PMOP book-keeping here, which
9728 allows us to restrict the list needed by reset to just the ??
9730 assert(type != OP_TRANS);
9732 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9735 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9738 elements = mg->mg_len / sizeof(PMOP**);
9739 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9740 ((PMOP**)mg->mg_ptr) [elements++] = pm;
9741 mg->mg_len = elements * sizeof(PMOP**);
9742 PmopSTASH_set(pm,PL_curstash);
9749 /* if qr/...(?{..}).../, then need to parse the pattern within a new
9750 * anon CV. False positives like qr/[(?{]/ are harmless */
9752 if (type == OP_QR) {
9754 char *e, *p = SvPV(PL_lex_stuff, len);
9756 for (; p < e; p++) {
9757 if (p[0] == '(' && p[1] == '?'
9758 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9760 pm->op_pmflags |= PMf_HAS_CV;
9764 pm->op_pmflags |= PMf_IS_QR;
9767 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
9769 if (PL_madskills && modstart != s) {
9770 SV* tmptoken = newSVpvn(modstart, s - modstart);
9771 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9774 /* issue a warning if /c is specified,but /g is not */
9775 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9777 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9778 "Use of /c modifier is meaningless without /g" );
9781 PL_lex_op = (OP*)pm;
9782 pl_yylval.ival = OP_MATCH;
9787 S_scan_subst(pTHX_ char *start)
9795 char charset = '\0'; /* character set modifier */
9801 PERL_ARGS_ASSERT_SCAN_SUBST;
9803 pl_yylval.ival = OP_NULL;
9805 s = scan_str(start,!!PL_madskills,FALSE,FALSE,
9806 TRUE /* look for escaped bracketed metas */, &t);
9809 Perl_croak(aTHX_ "Substitution pattern not terminated");
9814 CURMAD('q', PL_thisopen);
9815 CURMAD('_', PL_thiswhite);
9816 CURMAD('E', PL_thisstuff);
9817 CURMAD('Q', PL_thisclose);
9818 PL_realtokenstart = s - SvPVX(PL_linestr);
9822 first_start = PL_multi_start;
9823 first_line = CopLINE(PL_curcop);
9824 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
9827 SvREFCNT_dec(PL_lex_stuff);
9828 PL_lex_stuff = NULL;
9830 Perl_croak(aTHX_ "Substitution replacement not terminated");
9832 PL_multi_start = first_start; /* so whole substitution is taken together */
9834 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9838 CURMAD('z', PL_thisopen);
9839 CURMAD('R', PL_thisstuff);
9840 CURMAD('Z', PL_thisclose);
9846 if (*s == EXEC_PAT_MOD) {
9850 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9859 curmad('m', newSVpvn(modstart, s - modstart));
9860 append_madprops(PL_thismad, (OP*)pm, 0);
9864 if ((pm->op_pmflags & PMf_CONTINUE)) {
9865 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9869 SV * const repl = newSVpvs("");
9872 pm->op_pmflags |= PMf_EVAL;
9875 sv_catpvs(repl, "eval ");
9877 sv_catpvs(repl, "do ");
9879 sv_catpvs(repl, "{");
9880 sv_catsv(repl, PL_sublex_info.repl);
9881 sv_catpvs(repl, "}");
9883 SvREFCNT_dec(PL_sublex_info.repl);
9884 PL_sublex_info.repl = repl;
9886 if (CopLINE(PL_curcop) != first_line) {
9887 sv_upgrade(PL_sublex_info.repl, SVt_PVNV);
9888 ((XPVNV*)SvANY(PL_sublex_info.repl))->xnv_u.xpad_cop_seq.xlow =
9889 CopLINE(PL_curcop) - first_line;
9890 CopLINE_set(PL_curcop, first_line);
9893 PL_lex_op = (OP*)pm;
9894 pl_yylval.ival = OP_SUBST;
9899 S_scan_trans(pTHX_ char *start)
9907 bool nondestruct = 0;
9913 PERL_ARGS_ASSERT_SCAN_TRANS;
9915 pl_yylval.ival = OP_NULL;
9917 s = scan_str(start,!!PL_madskills,FALSE,FALSE,FALSE,&t);
9919 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9924 CURMAD('q', PL_thisopen);
9925 CURMAD('_', PL_thiswhite);
9926 CURMAD('E', PL_thisstuff);
9927 CURMAD('Q', PL_thisclose);
9928 PL_realtokenstart = s - SvPVX(PL_linestr);
9932 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
9935 SvREFCNT_dec(PL_lex_stuff);
9936 PL_lex_stuff = NULL;
9938 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9941 CURMAD('z', PL_thisopen);
9942 CURMAD('R', PL_thisstuff);
9943 CURMAD('Z', PL_thisclose);
9946 complement = del = squash = 0;
9953 complement = OPpTRANS_COMPLEMENT;
9956 del = OPpTRANS_DELETE;
9959 squash = OPpTRANS_SQUASH;
9971 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9972 o->op_private &= ~OPpTRANS_ALL;
9973 o->op_private |= del|squash|complement|
9974 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9975 (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF : 0);
9978 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9983 curmad('m', newSVpvn(modstart, s - modstart));
9984 append_madprops(PL_thismad, o, 0);
9993 Takes a pointer to the first < in <<FOO.
9994 Returns a pointer to the byte following <<FOO.
9996 This function scans a heredoc, which involves different methods
9997 depending on whether we are in a string eval, quoted construct, etc.
9998 This is because PL_linestr could containing a single line of input, or
9999 a whole string being evalled, or the contents of the current quote-
10002 The two basic methods are:
10003 - Steal lines from the input stream
10004 - Scan the heredoc in PL_linestr and remove it therefrom
10006 In a file scope or filtered eval, the first method is used; in a
10007 string eval, the second.
10009 In a quote-like operator, we have to choose between the two,
10010 depending on where we can find a newline. We peek into outer lex-
10011 ing scopes until we find one with a newline in it. If we reach the
10012 outermost lexing scope and it is a file, we use the stream method.
10013 Otherwise it is treated as an eval.
10017 S_scan_heredoc(pTHX_ char *s)
10020 I32 op_type = OP_SCALAR;
10027 const bool infile = PL_rsfp || PL_parser->filtered;
10028 const line_t origline = CopLINE(PL_curcop);
10029 LEXSHARED *shared = PL_parser->lex_shared;
10031 I32 stuffstart = s - SvPVX(PL_linestr);
10034 PL_realtokenstart = -1;
10037 PERL_ARGS_ASSERT_SCAN_HEREDOC;
10040 d = PL_tokenbuf + 1;
10041 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
10042 *PL_tokenbuf = '\n';
10044 while (SPACE_OR_TAB(*peek))
10046 if (*peek == '`' || *peek == '\'' || *peek =='"') {
10049 s = delimcpy(d, e, s, PL_bufend, term, &len);
10050 if (s == PL_bufend)
10051 Perl_croak(aTHX_ "Unterminated delimiter for here document");
10057 /* <<\FOO is equivalent to <<'FOO' */
10061 if (!isWORDCHAR_lazy_if(s,UTF))
10062 deprecate("bare << to mean <<\"\"");
10063 for (; isWORDCHAR_lazy_if(s,UTF); s++) {
10068 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
10069 Perl_croak(aTHX_ "Delimiter for here document is too long");
10072 len = d - PL_tokenbuf;
10075 if (PL_madskills) {
10076 tstart = PL_tokenbuf + 1;
10077 PL_thisclose = newSVpvn(tstart, len - 1);
10078 tstart = SvPVX(PL_linestr) + stuffstart;
10079 PL_thisopen = newSVpvn(tstart, s - tstart);
10080 stuffstart = s - SvPVX(PL_linestr);
10083 #ifndef PERL_STRICT_CR
10084 d = strchr(s, '\r');
10086 char * const olds = s;
10088 while (s < PL_bufend) {
10094 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
10103 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10108 if (PL_madskills) {
10109 tstart = SvPVX(PL_linestr) + stuffstart;
10111 sv_catpvn(PL_thisstuff, tstart, s - tstart);
10113 PL_thisstuff = newSVpvn(tstart, s - tstart);
10116 stuffstart = s - SvPVX(PL_linestr);
10119 tmpstr = newSV_type(SVt_PVIV);
10120 SvGROW(tmpstr, 80);
10121 if (term == '\'') {
10122 op_type = OP_CONST;
10123 SvIV_set(tmpstr, -1);
10125 else if (term == '`') {
10126 op_type = OP_BACKTICK;
10127 SvIV_set(tmpstr, '\\');
10130 PL_multi_start = origline + 1 + PL_parser->herelines;
10131 PL_multi_open = PL_multi_close = '<';
10132 /* inside a string eval or quote-like operator */
10133 if (!infile || PL_lex_inwhat) {
10136 char * const olds = s;
10137 PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
10138 /* These two fields are not set until an inner lexing scope is
10139 entered. But we need them set here. */
10140 shared->ls_bufptr = s;
10141 shared->ls_linestr = PL_linestr;
10143 /* Look for a newline. If the current buffer does not have one,
10144 peek into the line buffer of the parent lexing scope, going
10145 up as many levels as necessary to find one with a newline
10148 while (!(s = (char *)memchr(
10149 (void *)shared->ls_bufptr, '\n',
10150 SvEND(shared->ls_linestr)-shared->ls_bufptr
10152 shared = shared->ls_prev;
10153 /* shared is only null if we have gone beyond the outermost
10154 lexing scope. In a file, we will have broken out of the
10155 loop in the previous iteration. In an eval, the string buf-
10156 fer ends with "\n;", so the while condition above will have
10157 evaluated to false. So shared can never be null. */
10159 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10160 most lexing scope. In a file, shared->ls_linestr at that
10161 level is just one line, so there is no body to steal. */
10162 if (infile && !shared->ls_prev) {
10168 s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10171 linestr = shared->ls_linestr;
10172 bufend = SvEND(linestr);
10174 while (s < bufend - len + 1 &&
10175 memNE(s,PL_tokenbuf,len) ) {
10177 ++PL_parser->herelines;
10179 if (s >= bufend - len + 1) {
10182 sv_setpvn(tmpstr,d+1,s-d);
10184 if (PL_madskills) {
10186 sv_catpvn(PL_thisstuff, d + 1, s - d);
10188 PL_thisstuff = newSVpvn(d + 1, s - d);
10189 stuffstart = s - SvPVX(PL_linestr);
10193 /* the preceding stmt passes a newline */
10194 PL_parser->herelines++;
10196 /* s now points to the newline after the heredoc terminator.
10197 d points to the newline before the body of the heredoc.
10200 /* We are going to modify linestr in place here, so set
10201 aside copies of the string if necessary for re-evals or
10203 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10204 check shared->re_eval_str. */
10205 if (shared->re_eval_start || shared->re_eval_str) {
10206 /* Set aside the rest of the regexp */
10207 if (!shared->re_eval_str)
10208 shared->re_eval_str =
10209 newSVpvn(shared->re_eval_start,
10210 bufend - shared->re_eval_start);
10211 shared->re_eval_start -= s-d;
10213 if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
10214 CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
10215 cx->blk_eval.cur_text == linestr)
10217 cx->blk_eval.cur_text = newSVsv(linestr);
10218 SvSCREAM_on(cx->blk_eval.cur_text);
10220 /* Copy everything from s onwards back to d. */
10221 Move(s,d,bufend-s + 1,char);
10222 SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10223 /* Setting PL_bufend only applies when we have not dug deeper
10224 into other scopes, because sublex_done sets PL_bufend to
10225 SvEND(PL_linestr). */
10226 if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
10233 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
10234 term = PL_tokenbuf[1];
10236 linestr_save = PL_linestr; /* must restore this afterwards */
10237 d = s; /* and this */
10238 PL_linestr = newSVpvs("");
10239 PL_bufend = SvPVX(PL_linestr);
10242 if (PL_madskills) {
10243 tstart = SvPVX(PL_linestr) + stuffstart;
10245 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10247 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10250 PL_bufptr = PL_bufend;
10251 CopLINE_set(PL_curcop,
10252 origline + 1 + PL_parser->herelines);
10253 if (!lex_next_chunk(LEX_NO_TERM)
10254 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
10255 SvREFCNT_dec(linestr_save);
10258 CopLINE_set(PL_curcop, origline);
10259 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10260 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10261 /* ^That should be enough to avoid this needing to grow: */
10262 sv_catpvs(PL_linestr, "\n\0");
10263 assert(s == SvPVX(PL_linestr));
10264 PL_bufend = SvEND(PL_linestr);
10268 stuffstart = s - SvPVX(PL_linestr);
10270 PL_parser->herelines++;
10271 PL_last_lop = PL_last_uni = NULL;
10272 #ifndef PERL_STRICT_CR
10273 if (PL_bufend - PL_linestart >= 2) {
10274 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
10275 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10277 PL_bufend[-2] = '\n';
10279 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10281 else if (PL_bufend[-1] == '\r')
10282 PL_bufend[-1] = '\n';
10284 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10285 PL_bufend[-1] = '\n';
10287 if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
10288 SvREFCNT_dec(PL_linestr);
10289 PL_linestr = linestr_save;
10290 PL_linestart = SvPVX(linestr_save);
10291 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10296 sv_catsv(tmpstr,PL_linestr);
10300 PL_multi_end = origline + PL_parser->herelines;
10301 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10302 SvPV_shrink_to_cur(tmpstr);
10305 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10307 else if (PL_encoding)
10308 sv_recode_to_utf8(tmpstr, PL_encoding);
10310 PL_lex_stuff = tmpstr;
10311 pl_yylval.ival = op_type;
10315 SvREFCNT_dec(tmpstr);
10316 CopLINE_set(PL_curcop, origline);
10317 missingterm(PL_tokenbuf + 1);
10320 /* scan_inputsymbol
10321 takes: current position in input buffer
10322 returns: new position in input buffer
10323 side-effects: pl_yylval and lex_op are set.
10328 <FH> read from filehandle
10329 <pkg::FH> read from package qualified filehandle
10330 <pkg'FH> read from package qualified filehandle
10331 <$fh> read from filehandle in $fh
10332 <*.h> filename glob
10337 S_scan_inputsymbol(pTHX_ char *start)
10340 char *s = start; /* current position in buffer */
10343 char *d = PL_tokenbuf; /* start of temp holding space */
10344 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
10346 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10348 end = strchr(s, '\n');
10351 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
10353 /* die if we didn't have space for the contents of the <>,
10354 or if it didn't end, or if we see a newline
10357 if (len >= (I32)sizeof PL_tokenbuf)
10358 Perl_croak(aTHX_ "Excessively long <> operator");
10360 Perl_croak(aTHX_ "Unterminated <> operator");
10365 Remember, only scalar variables are interpreted as filehandles by
10366 this code. Anything more complex (e.g., <$fh{$num}>) will be
10367 treated as a glob() call.
10368 This code makes use of the fact that except for the $ at the front,
10369 a scalar variable and a filehandle look the same.
10371 if (*d == '$' && d[1]) d++;
10373 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10374 while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
10375 d += UTF ? UTF8SKIP(d) : 1;
10377 /* If we've tried to read what we allow filehandles to look like, and
10378 there's still text left, then it must be a glob() and not a getline.
10379 Use scan_str to pull out the stuff between the <> and treat it
10380 as nothing more than a string.
10383 if (d - PL_tokenbuf != len) {
10384 pl_yylval.ival = OP_GLOB;
10385 s = scan_str(start,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
10387 Perl_croak(aTHX_ "Glob not terminated");
10391 bool readline_overriden = FALSE;
10393 /* we're in a filehandle read situation */
10396 /* turn <> into <ARGV> */
10398 Copy("ARGV",d,5,char);
10400 /* Check whether readline() is overriden */
10401 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
10402 if ((gv_readline = gv_override("readline",8)))
10403 readline_overriden = TRUE;
10405 /* if <$fh>, create the ops to turn the variable into a
10409 /* try to find it in the pad for this block, otherwise find
10410 add symbol table ops
10412 const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
10413 if (tmp != NOT_IN_PAD) {
10414 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10415 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10416 HEK * const stashname = HvNAME_HEK(stash);
10417 SV * const sym = sv_2mortal(newSVhek(stashname));
10418 sv_catpvs(sym, "::");
10419 sv_catpv(sym, d+1);
10424 OP * const o = newOP(OP_PADSV, 0);
10426 PL_lex_op = readline_overriden
10427 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10428 op_append_elem(OP_LIST, o,
10429 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10430 : (OP*)newUNOP(OP_READLINE, 0, o);
10439 ? (GV_ADDMULTI | GV_ADDINEVAL)
10440 : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
10442 PL_lex_op = readline_overriden
10443 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10444 op_append_elem(OP_LIST,
10445 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10446 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10447 : (OP*)newUNOP(OP_READLINE, 0,
10448 newUNOP(OP_RV2SV, 0,
10449 newGVOP(OP_GV, 0, gv)));
10451 if (!readline_overriden)
10452 PL_lex_op->op_flags |= OPf_SPECIAL;
10453 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10454 pl_yylval.ival = OP_NULL;
10457 /* If it's none of the above, it must be a literal filehandle
10458 (<Foo::BAR> or <FOO>) so build a simple readline OP */
10460 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
10461 PL_lex_op = readline_overriden
10462 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10463 op_append_elem(OP_LIST,
10464 newGVOP(OP_GV, 0, gv),
10465 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10466 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
10467 pl_yylval.ival = OP_NULL;
10477 start position in buffer
10478 keep_quoted preserve \ on the embedded delimiter(s)
10479 keep_delims preserve the delimiters around the string
10480 re_reparse compiling a run-time /(?{})/:
10481 collapse // to /, and skip encoding src
10482 deprecate_escaped_meta issue a deprecation warning for cer-
10483 tain paired metacharacters that appear
10485 delimp if non-null, this is set to the position of
10486 the closing delimiter, or just after it if
10487 the closing and opening delimiters differ
10488 (i.e., the opening delimiter of a substitu-
10490 returns: position to continue reading from buffer
10491 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10492 updates the read buffer.
10494 This subroutine pulls a string out of the input. It is called for:
10495 q single quotes q(literal text)
10496 ' single quotes 'literal text'
10497 qq double quotes qq(interpolate $here please)
10498 " double quotes "interpolate $here please"
10499 qx backticks qx(/bin/ls -l)
10500 ` backticks `/bin/ls -l`
10501 qw quote words @EXPORT_OK = qw( func() $spam )
10502 m// regexp match m/this/
10503 s/// regexp substitute s/this/that/
10504 tr/// string transliterate tr/this/that/
10505 y/// string transliterate y/this/that/
10506 ($*@) sub prototypes sub foo ($)
10507 (stuff) sub attr parameters sub foo : attr(stuff)
10508 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
10510 In most of these cases (all but <>, patterns and transliterate)
10511 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
10512 calls scan_str(). s/// makes yylex() call scan_subst() which calls
10513 scan_str(). tr/// and y/// make yylex() call scan_trans() which
10516 It skips whitespace before the string starts, and treats the first
10517 character as the delimiter. If the delimiter is one of ([{< then
10518 the corresponding "close" character )]}> is used as the closing
10519 delimiter. It allows quoting of delimiters, and if the string has
10520 balanced delimiters ([{<>}]) it allows nesting.
10522 On success, the SV with the resulting string is put into lex_stuff or,
10523 if that is already non-NULL, into lex_repl. The second case occurs only
10524 when parsing the RHS of the special constructs s/// and tr/// (y///).
10525 For convenience, the terminating delimiter character is stuffed into
10530 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
10531 bool deprecate_escaped_meta, char **delimp
10535 SV *sv; /* scalar value: string */
10536 const char *tmps; /* temp string, used for delimiter matching */
10537 char *s = start; /* current position in the buffer */
10538 char term; /* terminating character */
10539 char *to; /* current position in the sv's data */
10540 I32 brackets = 1; /* bracket nesting level */
10541 bool has_utf8 = FALSE; /* is there any utf8 content? */
10542 I32 termcode; /* terminating char. code */
10543 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
10544 STRLEN termlen; /* length of terminating string */
10545 int last_off = 0; /* last position for nesting bracket */
10546 char *escaped_open = NULL;
10553 PERL_ARGS_ASSERT_SCAN_STR;
10555 /* skip space before the delimiter */
10561 if (PL_realtokenstart >= 0) {
10562 stuffstart = PL_realtokenstart;
10563 PL_realtokenstart = -1;
10566 stuffstart = start - SvPVX(PL_linestr);
10568 /* mark where we are, in case we need to report errors */
10571 /* after skipping whitespace, the next character is the terminator */
10574 termcode = termstr[0] = term;
10578 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10579 Copy(s, termstr, termlen, U8);
10580 if (!UTF8_IS_INVARIANT(term))
10584 /* mark where we are */
10585 PL_multi_start = CopLINE(PL_curcop);
10586 PL_multi_open = term;
10587 herelines = PL_parser->herelines;
10589 /* find corresponding closing delimiter */
10590 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10591 termcode = termstr[0] = term = tmps[5];
10593 PL_multi_close = term;
10595 /* A warning is raised if the input parameter requires it for escaped (by a
10596 * backslash) paired metacharacters {} [] and () when the delimiters are
10597 * those same characters, and the backslash is ineffective. This doesn't
10598 * happen for <>, as they aren't metas. */
10599 if (deprecate_escaped_meta
10600 && (PL_multi_open == PL_multi_close
10601 || PL_multi_open == '<'
10602 || ! ckWARN_d(WARN_DEPRECATED)))
10604 deprecate_escaped_meta = FALSE;
10607 /* create a new SV to hold the contents. 79 is the SV's initial length.
10608 What a random number. */
10609 sv = newSV_type(SVt_PVIV);
10611 SvIV_set(sv, termcode);
10612 (void)SvPOK_only(sv); /* validate pointer */
10614 /* move past delimiter and try to read a complete string */
10616 sv_catpvn(sv, s, termlen);
10619 tstart = SvPVX(PL_linestr) + stuffstart;
10620 if (PL_madskills && !PL_thisopen && !keep_delims) {
10621 PL_thisopen = newSVpvn(tstart, s - tstart);
10622 stuffstart = s - SvPVX(PL_linestr);
10626 if (PL_encoding && !UTF && !re_reparse) {
10630 int offset = s - SvPVX_const(PL_linestr);
10631 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
10632 &offset, (char*)termstr, termlen);
10636 if (SvIsCOW(PL_linestr)) {
10637 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos;
10638 STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos;
10639 STRLEN last_lop_pos, re_eval_start_pos, s_pos;
10640 char *buf = SvPVX(PL_linestr);
10641 bufend_pos = PL_parser->bufend - buf;
10642 bufptr_pos = PL_parser->bufptr - buf;
10643 oldbufptr_pos = PL_parser->oldbufptr - buf;
10644 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
10645 linestart_pos = PL_parser->linestart - buf;
10646 last_uni_pos = PL_parser->last_uni
10647 ? PL_parser->last_uni - buf
10649 last_lop_pos = PL_parser->last_lop
10650 ? PL_parser->last_lop - buf
10652 re_eval_start_pos =
10653 PL_parser->lex_shared->re_eval_start ?
10654 PL_parser->lex_shared->re_eval_start - buf : 0;
10657 sv_force_normal(PL_linestr);
10659 buf = SvPVX(PL_linestr);
10660 PL_parser->bufend = buf + bufend_pos;
10661 PL_parser->bufptr = buf + bufptr_pos;
10662 PL_parser->oldbufptr = buf + oldbufptr_pos;
10663 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
10664 PL_parser->linestart = buf + linestart_pos;
10665 if (PL_parser->last_uni)
10666 PL_parser->last_uni = buf + last_uni_pos;
10667 if (PL_parser->last_lop)
10668 PL_parser->last_lop = buf + last_lop_pos;
10669 if (PL_parser->lex_shared->re_eval_start)
10670 PL_parser->lex_shared->re_eval_start =
10671 buf + re_eval_start_pos;
10674 ns = SvPVX_const(PL_linestr) + offset;
10675 svlast = SvEND(sv) - 1;
10677 for (; s < ns; s++) {
10678 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10679 COPLINE_INC_WITH_HERELINES;
10682 goto read_more_line;
10684 /* handle quoted delimiters */
10685 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10687 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10689 if ((svlast-1 - t) % 2) {
10690 if (!keep_quoted) {
10691 *(svlast-1) = term;
10693 SvCUR_set(sv, SvCUR(sv) - 1);
10698 if (PL_multi_open == PL_multi_close) {
10704 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
10705 /* At here, all closes are "was quoted" one,
10706 so we don't check PL_multi_close. */
10708 if (!keep_quoted && *(t+1) == PL_multi_open)
10713 else if (*t == PL_multi_open)
10721 SvCUR_set(sv, w - SvPVX_const(sv));
10723 last_off = w - SvPVX(sv);
10724 if (--brackets <= 0)
10729 if (!keep_delims) {
10730 SvCUR_set(sv, SvCUR(sv) - 1);
10736 /* extend sv if need be */
10737 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10738 /* set 'to' to the next character in the sv's string */
10739 to = SvPVX(sv)+SvCUR(sv);
10741 /* if open delimiter is the close delimiter read unbridle */
10742 if (PL_multi_open == PL_multi_close) {
10743 for (; s < PL_bufend; s++,to++) {
10744 /* embedded newlines increment the current line number */
10745 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10746 COPLINE_INC_WITH_HERELINES;
10747 /* handle quoted delimiters */
10748 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10751 || (re_reparse && s[1] == '\\'))
10754 /* any other quotes are simply copied straight through */
10758 /* terminate when run out of buffer (the for() condition), or
10759 have found the terminator */
10760 else if (*s == term) {
10763 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10766 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10772 /* if the terminator isn't the same as the start character (e.g.,
10773 matched brackets), we have to allow more in the quoting, and
10774 be prepared for nested brackets.
10777 /* read until we run out of string, or we find the terminator */
10778 for (; s < PL_bufend; s++,to++) {
10779 /* embedded newlines increment the line count */
10780 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10781 COPLINE_INC_WITH_HERELINES;
10782 /* backslashes can escape the open or closing characters */
10783 if (*s == '\\' && s+1 < PL_bufend) {
10784 if (!keep_quoted &&
10785 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10789 /* Here, 'deprecate_escaped_meta' is true iff the
10790 * delimiters are paired metacharacters, and 's' points
10791 * to an occurrence of one of them within the string,
10792 * which was preceded by a backslash. If this is a
10793 * context where the delimiter is also a metacharacter,
10794 * the backslash is useless, and deprecated. () and []
10795 * are meta in any context. {} are meta only when
10796 * appearing in a quantifier or in things like '\p{'
10797 * (but '\\p{' isn't meta). They also aren't meta
10798 * unless there is a matching closed, escaped char
10799 * later on within the string. If 's' points to an
10800 * open, set a flag; if to a close, test that flag, and
10801 * raise a warning if it was set */
10803 if (deprecate_escaped_meta) {
10804 if (*s == PL_multi_open) {
10808 /* Look for a closing '\}' */
10809 else if (regcurly(s, TRUE)) {
10812 /* Look for e.g. '\x{' */
10813 else if (s - start > 2
10814 && _generic_isCC(*(s-2),
10815 _CC_BACKSLASH_FOO_LBRACE_IS_META))
10816 { /* Exclude '\\x', '\\\\x', etc. */
10817 char *lookbehind = s - 4;
10818 bool is_meta = TRUE;
10819 while (lookbehind >= start
10820 && *lookbehind == '\\')
10822 is_meta = ! is_meta;
10830 else if (escaped_open) {
10831 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
10832 "Useless use of '\\'; doesn't escape metacharacter '%c'", PL_multi_open);
10833 escaped_open = NULL;
10840 /* allow nested opens and closes */
10841 else if (*s == PL_multi_close && --brackets <= 0)
10843 else if (*s == PL_multi_open)
10845 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10850 /* terminate the copied string and update the sv's end-of-string */
10852 SvCUR_set(sv, to - SvPVX_const(sv));
10855 * this next chunk reads more into the buffer if we're not done yet
10859 break; /* handle case where we are done yet :-) */
10861 #ifndef PERL_STRICT_CR
10862 if (to - SvPVX_const(sv) >= 2) {
10863 if ((to[-2] == '\r' && to[-1] == '\n') ||
10864 (to[-2] == '\n' && to[-1] == '\r'))
10868 SvCUR_set(sv, to - SvPVX_const(sv));
10870 else if (to[-1] == '\r')
10873 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10878 /* if we're out of file, or a read fails, bail and reset the current
10879 line marker so we can report where the unterminated string began
10882 if (PL_madskills) {
10883 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10885 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10887 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10890 COPLINE_INC_WITH_HERELINES;
10891 PL_bufptr = PL_bufend;
10892 if (!lex_next_chunk(0)) {
10894 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10903 /* at this point, we have successfully read the delimited string */
10905 if (!PL_encoding || UTF || re_reparse) {
10907 if (PL_madskills) {
10908 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10909 const int len = s - tstart;
10911 sv_catpvn(PL_thisstuff, tstart, len);
10913 PL_thisstuff = newSVpvn(tstart, len);
10914 if (!PL_thisclose && !keep_delims)
10915 PL_thisclose = newSVpvn(s,termlen);
10920 sv_catpvn(sv, s, termlen);
10925 if (PL_madskills) {
10926 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10927 const int len = s - tstart - termlen;
10929 sv_catpvn(PL_thisstuff, tstart, len);
10931 PL_thisstuff = newSVpvn(tstart, len);
10932 if (!PL_thisclose && !keep_delims)
10933 PL_thisclose = newSVpvn(s - termlen,termlen);
10937 if (has_utf8 || (PL_encoding && !re_reparse))
10940 PL_multi_end = CopLINE(PL_curcop);
10941 CopLINE_set(PL_curcop, PL_multi_start);
10942 PL_parser->herelines = herelines;
10944 /* if we allocated too much space, give some back */
10945 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10946 SvLEN_set(sv, SvCUR(sv) + 1);
10947 SvPV_renew(sv, SvLEN(sv));
10950 /* decide whether this is the first or second quoted string we've read
10955 PL_sublex_info.repl = sv;
10958 if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
10964 takes: pointer to position in buffer
10965 returns: pointer to new position in buffer
10966 side-effects: builds ops for the constant in pl_yylval.op
10968 Read a number in any of the formats that Perl accepts:
10970 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10971 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
10974 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10976 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10979 If it reads a number without a decimal point or an exponent, it will
10980 try converting the number to an integer and see if it can do so
10981 without loss of precision.
10985 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10988 const char *s = start; /* current position in buffer */
10989 char *d; /* destination in temp buffer */
10990 char *e; /* end of temp buffer */
10991 NV nv; /* number read, as a double */
10992 SV *sv = NULL; /* place to put the converted number */
10993 bool floatit; /* boolean: int or float? */
10994 const char *lastub = NULL; /* position of last underbar */
10995 static const char* const number_too_long = "Number too long";
10997 PERL_ARGS_ASSERT_SCAN_NUM;
10999 /* We use the first character to decide what type of number this is */
11003 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
11005 /* if it starts with a 0, it could be an octal number, a decimal in
11006 0.13 disguise, or a hexadecimal number, or a binary number. */
11010 u holds the "number so far"
11011 shift the power of 2 of the base
11012 (hex == 4, octal == 3, binary == 1)
11013 overflowed was the number more than we can hold?
11015 Shift is used when we add a digit. It also serves as an "are
11016 we in octal/hex/binary?" indicator to disallow hex characters
11017 when in octal mode.
11022 bool overflowed = FALSE;
11023 bool just_zero = TRUE; /* just plain 0 or binary number? */
11024 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11025 static const char* const bases[5] =
11026 { "", "binary", "", "octal", "hexadecimal" };
11027 static const char* const Bases[5] =
11028 { "", "Binary", "", "Octal", "Hexadecimal" };
11029 static const char* const maxima[5] =
11031 "0b11111111111111111111111111111111",
11035 const char *base, *Base, *max;
11037 /* check for hex */
11038 if (s[1] == 'x' || s[1] == 'X') {
11042 } else if (s[1] == 'b' || s[1] == 'B') {
11047 /* check for a decimal in disguise */
11048 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
11050 /* so it must be octal */
11057 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11058 "Misplaced _ in number");
11062 base = bases[shift];
11063 Base = Bases[shift];
11064 max = maxima[shift];
11066 /* read the rest of the number */
11068 /* x is used in the overflow test,
11069 b is the digit we're adding on. */
11074 /* if we don't mention it, we're done */
11078 /* _ are ignored -- but warned about if consecutive */
11080 if (lastub && s == lastub + 1)
11081 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11082 "Misplaced _ in number");
11086 /* 8 and 9 are not octal */
11087 case '8': case '9':
11089 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11093 case '2': case '3': case '4':
11094 case '5': case '6': case '7':
11096 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11099 case '0': case '1':
11100 b = *s++ & 15; /* ASCII digit -> value of digit */
11104 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11105 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11106 /* make sure they said 0x */
11109 b = (*s++ & 7) + 9;
11111 /* Prepare to put the digit we have onto the end
11112 of the number so far. We check for overflows.
11118 x = u << shift; /* make room for the digit */
11120 if ((x >> shift) != u
11121 && !(PL_hints & HINT_NEW_BINARY)) {
11124 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11125 "Integer overflow in %s number",
11128 u = x | b; /* add the digit to the end */
11131 n *= nvshift[shift];
11132 /* If an NV has not enough bits in its
11133 * mantissa to represent an UV this summing of
11134 * small low-order numbers is a waste of time
11135 * (because the NV cannot preserve the
11136 * low-order bits anyway): we could just
11137 * remember when did we overflow and in the
11138 * end just multiply n by the right
11146 /* if we get here, we had success: make a scalar value from
11151 /* final misplaced underbar check */
11152 if (s[-1] == '_') {
11153 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11157 if (n > 4294967295.0)
11158 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11159 "%s number > %s non-portable",
11165 if (u > 0xffffffff)
11166 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11167 "%s number > %s non-portable",
11172 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
11173 sv = new_constant(start, s - start, "integer",
11174 sv, NULL, NULL, 0);
11175 else if (PL_hints & HINT_NEW_BINARY)
11176 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
11181 handle decimal numbers.
11182 we're also sent here when we read a 0 as the first digit
11184 case '1': case '2': case '3': case '4': case '5':
11185 case '6': case '7': case '8': case '9': case '.':
11188 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
11191 /* read next group of digits and _ and copy into d */
11192 while (isDIGIT(*s) || *s == '_') {
11193 /* skip underscores, checking for misplaced ones
11197 if (lastub && s == lastub + 1)
11198 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11199 "Misplaced _ in number");
11203 /* check for end of fixed-length buffer */
11205 Perl_croak(aTHX_ "%s", number_too_long);
11206 /* if we're ok, copy the character */
11211 /* final misplaced underbar check */
11212 if (lastub && s == lastub + 1) {
11213 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11216 /* read a decimal portion if there is one. avoid
11217 3..5 being interpreted as the number 3. followed
11220 if (*s == '.' && s[1] != '.') {
11225 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11226 "Misplaced _ in number");
11230 /* copy, ignoring underbars, until we run out of digits.
11232 for (; isDIGIT(*s) || *s == '_'; s++) {
11233 /* fixed length buffer check */
11235 Perl_croak(aTHX_ "%s", number_too_long);
11237 if (lastub && s == lastub + 1)
11238 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11239 "Misplaced _ in number");
11245 /* fractional part ending in underbar? */
11246 if (s[-1] == '_') {
11247 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11248 "Misplaced _ in number");
11250 if (*s == '.' && isDIGIT(s[1])) {
11251 /* oops, it's really a v-string, but without the "v" */
11257 /* read exponent part, if present */
11258 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
11262 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
11263 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
11265 /* stray preinitial _ */
11267 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11268 "Misplaced _ in number");
11272 /* allow positive or negative exponent */
11273 if (*s == '+' || *s == '-')
11276 /* stray initial _ */
11278 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11279 "Misplaced _ in number");
11283 /* read digits of exponent */
11284 while (isDIGIT(*s) || *s == '_') {
11287 Perl_croak(aTHX_ "%s", number_too_long);
11291 if (((lastub && s == lastub + 1) ||
11292 (!isDIGIT(s[1]) && s[1] != '_')))
11293 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11294 "Misplaced _ in number");
11302 We try to do an integer conversion first if no characters
11303 indicating "float" have been found.
11308 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
11310 if (flags == IS_NUMBER_IN_UV) {
11312 sv = newSViv(uv); /* Prefer IVs over UVs. */
11315 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11316 if (uv <= (UV) IV_MIN)
11317 sv = newSViv(-(IV)uv);
11324 STORE_NUMERIC_LOCAL_SET_STANDARD();
11325 /* terminate the string */
11327 nv = Atof(PL_tokenbuf);
11328 RESTORE_NUMERIC_LOCAL();
11333 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
11334 const char *const key = floatit ? "float" : "integer";
11335 const STRLEN keylen = floatit ? 5 : 7;
11336 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
11337 key, keylen, sv, NULL, NULL, 0);
11341 /* if it starts with a v, it could be a v-string */
11344 sv = newSV(5); /* preallocate storage space */
11345 ENTER_with_name("scan_vstring");
11347 s = scan_vstring(s, PL_bufend, sv);
11348 SvREFCNT_inc_simple_void_NN(sv);
11349 LEAVE_with_name("scan_vstring");
11353 /* make the op for the constant and return */
11356 lvalp->opval = newSVOP(OP_CONST, 0, sv);
11358 lvalp->opval = NULL;
11364 S_scan_formline(pTHX_ char *s)
11369 SV * const stuff = newSVpvs("");
11370 bool needargs = FALSE;
11371 bool eofmt = FALSE;
11373 char *tokenstart = s;
11374 SV* savewhite = NULL;
11376 if (PL_madskills) {
11377 savewhite = PL_thiswhite;
11382 PERL_ARGS_ASSERT_SCAN_FORMLINE;
11384 while (!needargs) {
11387 #ifdef PERL_STRICT_CR
11388 while (SPACE_OR_TAB(*t))
11391 while (SPACE_OR_TAB(*t) || *t == '\r')
11394 if (*t == '\n' || t == PL_bufend) {
11399 eol = (char *) memchr(s,'\n',PL_bufend-s);
11403 for (t = s; t < eol; t++) {
11404 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
11406 goto enough; /* ~~ must be first line in formline */
11408 if (*t == '@' || *t == '^')
11412 sv_catpvn(stuff, s, eol-s);
11413 #ifndef PERL_STRICT_CR
11414 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
11415 char *end = SvPVX(stuff) + SvCUR(stuff);
11418 SvCUR_set(stuff, SvCUR(stuff) - 1);
11426 if ((PL_rsfp || PL_parser->filtered)
11427 && PL_parser->form_lex_state == LEX_NORMAL) {
11430 if (PL_madskills) {
11432 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
11434 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
11437 PL_bufptr = PL_bufend;
11438 COPLINE_INC_WITH_HERELINES;
11439 got_some = lex_next_chunk(0);
11440 CopLINE_dec(PL_curcop);
11443 tokenstart = PL_bufptr;
11451 if (!SvCUR(stuff) || needargs)
11452 PL_lex_state = PL_parser->form_lex_state;
11453 if (SvCUR(stuff)) {
11454 PL_expect = XSTATE;
11456 const char *s2 = s;
11457 while (*s2 == '\r' || *s2 == ' ' || *s2 == '\t' || *s2 == '\f'
11461 start_force(PL_curforce);
11462 PL_expect = XTERMBLOCK;
11463 NEXTVAL_NEXTTOKE.ival = 0;
11466 start_force(PL_curforce);
11467 NEXTVAL_NEXTTOKE.ival = 0;
11468 force_next(FORMLBRACK);
11471 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
11473 else if (PL_encoding)
11474 sv_recode_to_utf8(stuff, PL_encoding);
11476 start_force(PL_curforce);
11477 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
11481 SvREFCNT_dec(stuff);
11483 PL_lex_formbrack = 0;
11486 if (PL_madskills) {
11488 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
11490 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
11491 PL_thiswhite = savewhite;
11498 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
11501 const I32 oldsavestack_ix = PL_savestack_ix;
11502 CV* const outsidecv = PL_compcv;
11504 SAVEI32(PL_subline);
11505 save_item(PL_subname);
11506 SAVESPTR(PL_compcv);
11508 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
11509 CvFLAGS(PL_compcv) |= flags;
11511 PL_subline = CopLINE(PL_curcop);
11512 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
11513 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
11514 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
11515 if (outsidecv && CvPADLIST(outsidecv))
11516 CvPADLIST(PL_compcv)->xpadl_outid =
11517 PadlistNAMES(CvPADLIST(outsidecv));
11519 return oldsavestack_ix;
11523 S_yywarn(pTHX_ const char *const s, U32 flags)
11527 PERL_ARGS_ASSERT_YYWARN;
11529 PL_in_eval |= EVAL_WARNONLY;
11530 yyerror_pv(s, flags);
11531 PL_in_eval &= ~EVAL_WARNONLY;
11536 Perl_yyerror(pTHX_ const char *const s)
11538 PERL_ARGS_ASSERT_YYERROR;
11539 return yyerror_pvn(s, strlen(s), 0);
11543 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11545 PERL_ARGS_ASSERT_YYERROR_PV;
11546 return yyerror_pvn(s, strlen(s), flags);
11550 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11553 const char *context = NULL;
11556 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
11557 int yychar = PL_parser->yychar;
11559 PERL_ARGS_ASSERT_YYERROR_PVN;
11561 if (!yychar || (yychar == ';' && !PL_rsfp))
11562 sv_catpvs(where_sv, "at EOF");
11563 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
11564 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
11565 PL_oldbufptr != PL_bufptr) {
11568 The code below is removed for NetWare because it abends/crashes on NetWare
11569 when the script has error such as not having the closing quotes like:
11570 if ($var eq "value)
11571 Checking of white spaces is anyway done in NetWare code.
11574 while (isSPACE(*PL_oldoldbufptr))
11577 context = PL_oldoldbufptr;
11578 contlen = PL_bufptr - PL_oldoldbufptr;
11580 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
11581 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
11584 The code below is removed for NetWare because it abends/crashes on NetWare
11585 when the script has error such as not having the closing quotes like:
11586 if ($var eq "value)
11587 Checking of white spaces is anyway done in NetWare code.
11590 while (isSPACE(*PL_oldbufptr))
11593 context = PL_oldbufptr;
11594 contlen = PL_bufptr - PL_oldbufptr;
11596 else if (yychar > 255)
11597 sv_catpvs(where_sv, "next token ???");
11598 else if (yychar == -2) { /* YYEMPTY */
11599 if (PL_lex_state == LEX_NORMAL ||
11600 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
11601 sv_catpvs(where_sv, "at end of line");
11602 else if (PL_lex_inpat)
11603 sv_catpvs(where_sv, "within pattern");
11605 sv_catpvs(where_sv, "within string");
11608 sv_catpvs(where_sv, "next char ");
11610 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11611 else if (isPRINT_LC(yychar)) {
11612 const char string = yychar;
11613 sv_catpvn(where_sv, &string, 1);
11616 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11618 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
11619 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
11620 OutCopFILE(PL_curcop),
11621 (IV)(PL_parser->preambling == NOLINE
11622 ? CopLINE(PL_curcop)
11623 : PL_parser->preambling));
11625 Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
11626 UTF8fARG(UTF, contlen, context));
11628 Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
11629 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11630 Perl_sv_catpvf(aTHX_ msg,
11631 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
11632 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11635 if (PL_in_eval & EVAL_WARNONLY) {
11636 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
11640 if (PL_error_count >= 10) {
11642 if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
11643 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11644 SVfARG(errsv), OutCopFILE(PL_curcop));
11646 Perl_croak(aTHX_ "%s has too many errors.\n",
11647 OutCopFILE(PL_curcop));
11650 PL_in_my_stash = NULL;
11655 S_swallow_bom(pTHX_ U8 *s)
11658 const STRLEN slen = SvCUR(PL_linestr);
11660 PERL_ARGS_ASSERT_SWALLOW_BOM;
11664 if (s[1] == 0xFE) {
11665 /* UTF-16 little-endian? (or UTF-32LE?) */
11666 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
11667 /* diag_listed_as: Unsupported script encoding %s */
11668 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11669 #ifndef PERL_NO_UTF16_FILTER
11670 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11672 if (PL_bufend > (char*)s) {
11673 s = add_utf16_textfilter(s, TRUE);
11676 /* diag_listed_as: Unsupported script encoding %s */
11677 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11682 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
11683 #ifndef PERL_NO_UTF16_FILTER
11684 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11686 if (PL_bufend > (char *)s) {
11687 s = add_utf16_textfilter(s, FALSE);
11690 /* diag_listed_as: Unsupported script encoding %s */
11691 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11695 case BOM_UTF8_FIRST_BYTE: {
11696 const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
11697 if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
11698 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11699 s += len + 1; /* UTF-8 */
11706 if (s[2] == 0xFE && s[3] == 0xFF) {
11707 /* UTF-32 big-endian */
11708 /* diag_listed_as: Unsupported script encoding %s */
11709 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11712 else if (s[2] == 0 && s[3] != 0) {
11715 * are a good indicator of UTF-16BE. */
11716 #ifndef PERL_NO_UTF16_FILTER
11717 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11718 s = add_utf16_textfilter(s, FALSE);
11720 /* diag_listed_as: Unsupported script encoding %s */
11721 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11727 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11730 * are a good indicator of UTF-16LE. */
11731 #ifndef PERL_NO_UTF16_FILTER
11732 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11733 s = add_utf16_textfilter(s, TRUE);
11735 /* diag_listed_as: Unsupported script encoding %s */
11736 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11744 #ifndef PERL_NO_UTF16_FILTER
11746 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11749 SV *const filter = FILTER_DATA(idx);
11750 /* We re-use this each time round, throwing the contents away before we
11752 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11753 SV *const utf8_buffer = filter;
11754 IV status = IoPAGE(filter);
11755 const bool reverse = cBOOL(IoLINES(filter));
11758 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11760 /* As we're automatically added, at the lowest level, and hence only called
11761 from this file, we can be sure that we're not called in block mode. Hence
11762 don't bother writing code to deal with block mode. */
11764 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11767 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11769 DEBUG_P(PerlIO_printf(Perl_debug_log,
11770 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11771 FPTR2DPTR(void *, S_utf16_textfilter),
11772 reverse ? 'l' : 'b', idx, maxlen, status,
11773 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11780 /* First, look in our buffer of existing UTF-8 data: */
11781 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11785 } else if (status == 0) {
11787 IoPAGE(filter) = 0;
11788 nl = SvEND(utf8_buffer);
11791 STRLEN got = nl - SvPVX(utf8_buffer);
11792 /* Did we have anything to append? */
11794 sv_catpvn(sv, SvPVX(utf8_buffer), got);
11795 /* Everything else in this code works just fine if SVp_POK isn't
11796 set. This, however, needs it, and we need it to work, else
11797 we loop infinitely because the buffer is never consumed. */
11798 sv_chop(utf8_buffer, nl);
11802 /* OK, not a complete line there, so need to read some more UTF-16.
11803 Read an extra octect if the buffer currently has an odd number. */
11807 if (SvCUR(utf16_buffer) >= 2) {
11808 /* Location of the high octet of the last complete code point.
11809 Gosh, UTF-16 is a pain. All the benefits of variable length,
11810 *coupled* with all the benefits of partial reads and
11812 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11813 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11815 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11819 /* We have the first half of a surrogate. Read more. */
11820 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11823 status = FILTER_READ(idx + 1, utf16_buffer,
11824 160 + (SvCUR(utf16_buffer) & 1));
11825 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
11826 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11829 IoPAGE(filter) = status;
11834 chars = SvCUR(utf16_buffer) >> 1;
11835 have = SvCUR(utf8_buffer);
11836 SvGROW(utf8_buffer, have + chars * 3 + 1);
11839 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11840 (U8*)SvPVX_const(utf8_buffer) + have,
11841 chars * 2, &newlen);
11843 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11844 (U8*)SvPVX_const(utf8_buffer) + have,
11845 chars * 2, &newlen);
11847 SvCUR_set(utf8_buffer, have + newlen);
11850 /* No need to keep this SV "well-formed" with a '\0' after the end, as
11851 it's private to us, and utf16_to_utf8{,reversed} take a
11852 (pointer,length) pair, rather than a NUL-terminated string. */
11853 if(SvCUR(utf16_buffer) & 1) {
11854 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11855 SvCUR_set(utf16_buffer, 1);
11857 SvCUR_set(utf16_buffer, 0);
11860 DEBUG_P(PerlIO_printf(Perl_debug_log,
11861 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11863 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11864 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11869 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11871 SV *filter = filter_add(S_utf16_textfilter, NULL);
11873 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11875 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11876 sv_setpvs(filter, "");
11877 IoLINES(filter) = reversed;
11878 IoPAGE(filter) = 1; /* Not EOF */
11880 /* Sadly, we have to return a valid pointer, come what may, so we have to
11881 ignore any error return from this. */
11882 SvCUR_set(PL_linestr, 0);
11883 if (FILTER_READ(0, PL_linestr, 0)) {
11884 SvUTF8_on(PL_linestr);
11886 SvUTF8_on(PL_linestr);
11888 PL_bufend = SvEND(PL_linestr);
11889 return (U8*)SvPVX(PL_linestr);
11894 Returns a pointer to the next character after the parsed
11895 vstring, as well as updating the passed in sv.
11897 Function must be called like
11899 sv = sv_2mortal(newSV(5));
11900 s = scan_vstring(s,e,sv);
11902 where s and e are the start and end of the string.
11903 The sv should already be large enough to store the vstring
11904 passed in, for performance reasons.
11906 This function may croak if fatal warnings are enabled in the
11907 calling scope, hence the sv_2mortal in the example (to prevent
11908 a leak). Make sure to do SvREFCNT_inc afterwards if you use
11914 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11917 const char *pos = s;
11918 const char *start = s;
11920 PERL_ARGS_ASSERT_SCAN_VSTRING;
11922 if (*pos == 'v') pos++; /* get past 'v' */
11923 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11925 if ( *pos != '.') {
11926 /* this may not be a v-string if followed by => */
11927 const char *next = pos;
11928 while (next < e && isSPACE(*next))
11930 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11931 /* return string not v-string */
11932 sv_setpvn(sv,(char *)s,pos-s);
11933 return (char *)pos;
11937 if (!isALPHA(*pos)) {
11938 U8 tmpbuf[UTF8_MAXBYTES+1];
11941 s++; /* get past 'v' */
11946 /* this is atoi() that tolerates underscores */
11949 const char *end = pos;
11951 while (--end >= s) {
11953 const UV orev = rev;
11954 rev += (*end - '0') * mult;
11957 /* diag_listed_as: Integer overflow in %s number */
11958 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11959 "Integer overflow in decimal number");
11963 if (rev > 0x7FFFFFFF)
11964 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11966 /* Append native character for the rev point */
11967 tmpend = uvchr_to_utf8(tmpbuf, rev);
11968 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11969 if (!UVCHR_IS_INVARIANT(rev))
11971 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11977 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11981 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11988 Perl_keyword_plugin_standard(pTHX_
11989 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11991 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11992 PERL_UNUSED_CONTEXT;
11993 PERL_UNUSED_ARG(keyword_ptr);
11994 PERL_UNUSED_ARG(keyword_len);
11995 PERL_UNUSED_ARG(op_ptr);
11996 return KEYWORD_PLUGIN_DECLINE;
11999 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
12001 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
12003 SAVEI32(PL_lex_brackets);
12004 if (PL_lex_brackets > 100)
12005 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
12006 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
12007 SAVEI32(PL_lex_allbrackets);
12008 PL_lex_allbrackets = 0;
12009 SAVEI8(PL_lex_fakeeof);
12010 PL_lex_fakeeof = (U8)fakeeof;
12011 if(yyparse(gramtype) && !PL_parser->error_count)
12012 qerror(Perl_mess(aTHX_ "Parse error"));
12015 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
12017 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
12021 SAVEVPTR(PL_eval_root);
12022 PL_eval_root = NULL;
12023 parse_recdescent(gramtype, fakeeof);
12029 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
12031 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
12034 if (flags & ~PARSE_OPTIONAL)
12035 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
12036 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
12037 if (!exprop && !(flags & PARSE_OPTIONAL)) {
12038 if (!PL_parser->error_count)
12039 qerror(Perl_mess(aTHX_ "Parse error"));
12040 exprop = newOP(OP_NULL, 0);
12046 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
12048 Parse a Perl arithmetic expression. This may contain operators of precedence
12049 down to the bit shift operators. The expression must be followed (and thus
12050 terminated) either by a comparison or lower-precedence operator or by
12051 something that would normally terminate an expression such as semicolon.
12052 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12053 otherwise it is mandatory. It is up to the caller to ensure that the
12054 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12055 the source of the code to be parsed and the lexical context for the
12058 The op tree representing the expression is returned. If an optional
12059 expression is absent, a null pointer is returned, otherwise the pointer
12062 If an error occurs in parsing or compilation, in most cases a valid op
12063 tree is returned anyway. The error is reflected in the parser state,
12064 normally resulting in a single exception at the top level of parsing
12065 which covers all the compilation errors that occurred. Some compilation
12066 errors, however, will throw an exception immediately.
12072 Perl_parse_arithexpr(pTHX_ U32 flags)
12074 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
12078 =for apidoc Amx|OP *|parse_termexpr|U32 flags
12080 Parse a Perl term expression. This may contain operators of precedence
12081 down to the assignment operators. The expression must be followed (and thus
12082 terminated) either by a comma or lower-precedence operator or by
12083 something that would normally terminate an expression such as semicolon.
12084 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12085 otherwise it is mandatory. It is up to the caller to ensure that the
12086 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12087 the source of the code to be parsed and the lexical context for the
12090 The op tree representing the expression is returned. If an optional
12091 expression is absent, a null pointer is returned, otherwise the pointer
12094 If an error occurs in parsing or compilation, in most cases a valid op
12095 tree is returned anyway. The error is reflected in the parser state,
12096 normally resulting in a single exception at the top level of parsing
12097 which covers all the compilation errors that occurred. Some compilation
12098 errors, however, will throw an exception immediately.
12104 Perl_parse_termexpr(pTHX_ U32 flags)
12106 return parse_expr(LEX_FAKEEOF_COMMA, flags);
12110 =for apidoc Amx|OP *|parse_listexpr|U32 flags
12112 Parse a Perl list expression. This may contain operators of precedence
12113 down to the comma operator. The expression must be followed (and thus
12114 terminated) either by a low-precedence logic operator such as C<or> or by
12115 something that would normally terminate an expression such as semicolon.
12116 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12117 otherwise it is mandatory. It is up to the caller to ensure that the
12118 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12119 the source of the code to be parsed and the lexical context for the
12122 The op tree representing the expression is returned. If an optional
12123 expression is absent, a null pointer is returned, otherwise the pointer
12126 If an error occurs in parsing or compilation, in most cases a valid op
12127 tree is returned anyway. The error is reflected in the parser state,
12128 normally resulting in a single exception at the top level of parsing
12129 which covers all the compilation errors that occurred. Some compilation
12130 errors, however, will throw an exception immediately.
12136 Perl_parse_listexpr(pTHX_ U32 flags)
12138 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
12142 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
12144 Parse a single complete Perl expression. This allows the full
12145 expression grammar, including the lowest-precedence operators such
12146 as C<or>. The expression must be followed (and thus terminated) by a
12147 token that an expression would normally be terminated by: end-of-file,
12148 closing bracketing punctuation, semicolon, or one of the keywords that
12149 signals a postfix expression-statement modifier. If I<flags> includes
12150 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
12151 mandatory. It is up to the caller to ensure that the dynamic parser
12152 state (L</PL_parser> et al) is correctly set to reflect the source of
12153 the code to be parsed and the lexical context for the expression.
12155 The op tree representing the expression is returned. If an optional
12156 expression is absent, a null pointer is returned, otherwise the pointer
12159 If an error occurs in parsing or compilation, in most cases a valid op
12160 tree is returned anyway. The error is reflected in the parser state,
12161 normally resulting in a single exception at the top level of parsing
12162 which covers all the compilation errors that occurred. Some compilation
12163 errors, however, will throw an exception immediately.
12169 Perl_parse_fullexpr(pTHX_ U32 flags)
12171 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
12175 =for apidoc Amx|OP *|parse_block|U32 flags
12177 Parse a single complete Perl code block. This consists of an opening
12178 brace, a sequence of statements, and a closing brace. The block
12179 constitutes a lexical scope, so C<my> variables and various compile-time
12180 effects can be contained within it. It is up to the caller to ensure
12181 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12182 reflect the source of the code to be parsed and the lexical context for
12185 The op tree representing the code block is returned. This is always a
12186 real op, never a null pointer. It will normally be a C<lineseq> list,
12187 including C<nextstate> or equivalent ops. No ops to construct any kind
12188 of runtime scope are included by virtue of it being a block.
12190 If an error occurs in parsing or compilation, in most cases a valid op
12191 tree (most likely null) is returned anyway. The error is reflected in
12192 the parser state, normally resulting in a single exception at the top
12193 level of parsing which covers all the compilation errors that occurred.
12194 Some compilation errors, however, will throw an exception immediately.
12196 The I<flags> parameter is reserved for future use, and must always
12203 Perl_parse_block(pTHX_ U32 flags)
12206 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
12207 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
12211 =for apidoc Amx|OP *|parse_barestmt|U32 flags
12213 Parse a single unadorned Perl statement. This may be a normal imperative
12214 statement or a declaration that has compile-time effect. It does not
12215 include any label or other affixture. It is up to the caller to ensure
12216 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12217 reflect the source of the code to be parsed and the lexical context for
12220 The op tree representing the statement is returned. This may be a
12221 null pointer if the statement is null, for example if it was actually
12222 a subroutine definition (which has compile-time side effects). If not
12223 null, it will be ops directly implementing the statement, suitable to
12224 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
12225 equivalent op (except for those embedded in a scope contained entirely
12226 within the statement).
12228 If an error occurs in parsing or compilation, in most cases a valid op
12229 tree (most likely null) is returned anyway. The error is reflected in
12230 the parser state, normally resulting in a single exception at the top
12231 level of parsing which covers all the compilation errors that occurred.
12232 Some compilation errors, however, will throw an exception immediately.
12234 The I<flags> parameter is reserved for future use, and must always
12241 Perl_parse_barestmt(pTHX_ U32 flags)
12244 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
12245 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
12249 =for apidoc Amx|SV *|parse_label|U32 flags
12251 Parse a single label, possibly optional, of the type that may prefix a
12252 Perl statement. It is up to the caller to ensure that the dynamic parser
12253 state (L</PL_parser> et al) is correctly set to reflect the source of
12254 the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
12255 label is optional, otherwise it is mandatory.
12257 The name of the label is returned in the form of a fresh scalar. If an
12258 optional label is absent, a null pointer is returned.
12260 If an error occurs in parsing, which can only occur if the label is
12261 mandatory, a valid label is returned anyway. The error is reflected in
12262 the parser state, normally resulting in a single exception at the top
12263 level of parsing which covers all the compilation errors that occurred.
12269 Perl_parse_label(pTHX_ U32 flags)
12271 if (flags & ~PARSE_OPTIONAL)
12272 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
12273 if (PL_lex_state == LEX_KNOWNEXT) {
12274 PL_parser->yychar = yylex();
12275 if (PL_parser->yychar == LABEL) {
12276 char * const lpv = pl_yylval.pval;
12277 STRLEN llen = strlen(lpv);
12278 PL_parser->yychar = YYEMPTY;
12279 return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
12286 STRLEN wlen, bufptr_pos;
12289 if (!isIDFIRST_lazy_if(s, UTF))
12291 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
12292 if (word_takes_any_delimeter(s, wlen))
12294 bufptr_pos = s - SvPVX(PL_linestr);
12296 lex_read_space(LEX_KEEP_PREVIOUS);
12298 s = SvPVX(PL_linestr) + bufptr_pos;
12299 if (t[0] == ':' && t[1] != ':') {
12300 PL_oldoldbufptr = PL_oldbufptr;
12303 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
12307 if (flags & PARSE_OPTIONAL) {
12310 qerror(Perl_mess(aTHX_ "Parse error"));
12311 return newSVpvs("x");
12318 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
12320 Parse a single complete Perl statement. This may be a normal imperative
12321 statement or a declaration that has compile-time effect, and may include
12322 optional labels. It is up to the caller to ensure that the dynamic
12323 parser state (L</PL_parser> et al) is correctly set to reflect the source
12324 of the code to be parsed and the lexical context for the statement.
12326 The op tree representing the statement is returned. This may be a
12327 null pointer if the statement is null, for example if it was actually
12328 a subroutine definition (which has compile-time side effects). If not
12329 null, it will be the result of a L</newSTATEOP> call, normally including
12330 a C<nextstate> or equivalent op.
12332 If an error occurs in parsing or compilation, in most cases a valid op
12333 tree (most likely null) is returned anyway. The error is reflected in
12334 the parser state, normally resulting in a single exception at the top
12335 level of parsing which covers all the compilation errors that occurred.
12336 Some compilation errors, however, will throw an exception immediately.
12338 The I<flags> parameter is reserved for future use, and must always
12345 Perl_parse_fullstmt(pTHX_ U32 flags)
12348 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
12349 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
12353 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
12355 Parse a sequence of zero or more Perl statements. These may be normal
12356 imperative statements, including optional labels, or declarations
12357 that have compile-time effect, or any mixture thereof. The statement
12358 sequence ends when a closing brace or end-of-file is encountered in a
12359 place where a new statement could have validly started. It is up to
12360 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
12361 is correctly set to reflect the source of the code to be parsed and the
12362 lexical context for the statements.
12364 The op tree representing the statement sequence is returned. This may
12365 be a null pointer if the statements were all null, for example if there
12366 were no statements or if there were only subroutine definitions (which
12367 have compile-time side effects). If not null, it will be a C<lineseq>
12368 list, normally including C<nextstate> or equivalent ops.
12370 If an error occurs in parsing or compilation, in most cases a valid op
12371 tree is returned anyway. The error is reflected in the parser state,
12372 normally resulting in a single exception at the top level of parsing
12373 which covers all the compilation errors that occurred. Some compilation
12374 errors, however, will throw an exception immediately.
12376 The I<flags> parameter is reserved for future use, and must always
12383 Perl_parse_stmtseq(pTHX_ U32 flags)
12388 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
12389 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
12390 c = lex_peek_unichar(0);
12391 if (c != -1 && c != /*{*/'}')
12392 qerror(Perl_mess(aTHX_ "Parse error"));
12396 #define lex_token_boundary() S_lex_token_boundary(aTHX)
12398 S_lex_token_boundary(pTHX)
12400 PL_oldoldbufptr = PL_oldbufptr;
12401 PL_oldbufptr = PL_bufptr;
12404 #define parse_opt_lexvar() S_parse_opt_lexvar(aTHX)
12406 S_parse_opt_lexvar(pTHX)
12411 lex_token_boundary();
12412 sigil = lex_read_unichar(0);
12413 if (lex_peek_unichar(0) == '#') {
12414 qerror(Perl_mess(aTHX_ "Parse error"));
12418 c = lex_peek_unichar(0);
12419 if (c == -1 || !(UTF ? isIDFIRST_uni(c) : isIDFIRST_A(c)))
12422 d = PL_tokenbuf + 1;
12423 PL_tokenbuf[0] = (char)sigil;
12424 parse_ident(&s, &d, PL_tokenbuf + sizeof(PL_tokenbuf) - 1, 0, cBOOL(UTF));
12426 if (d == PL_tokenbuf+1)
12429 var = newOP(sigil == '$' ? OP_PADSV : sigil == '@' ? OP_PADAV : OP_PADHV,
12430 OPf_MOD | (OPpLVAL_INTRO<<8));
12431 var->op_targ = allocmy(PL_tokenbuf, d - PL_tokenbuf, UTF ? SVf_UTF8 : 0);
12436 Perl_parse_subsignature(pTHX)
12439 int prev_type = 0, pos = 0, min_arity = 0, max_arity = 0;
12440 OP *initops = NULL;
12442 c = lex_peek_unichar(0);
12443 while (c != /*(*/')') {
12447 if (prev_type == 2)
12448 qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
12449 var = parse_opt_lexvar();
12451 newBINOP(OP_AELEM, 0,
12452 ref(newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv)),
12454 newSVOP(OP_CONST, 0, newSViv(pos))) :
12457 c = lex_peek_unichar(0);
12459 lex_token_boundary();
12460 lex_read_unichar(0);
12462 c = lex_peek_unichar(0);
12463 if (c == ',' || c == /*(*/')') {
12465 qerror(Perl_mess(aTHX_ "Optional parameter "
12466 "lacks default expression"));
12468 OP *defexpr = parse_termexpr(0);
12469 if (defexpr->op_type == OP_UNDEF &&
12470 !(defexpr->op_flags & OPf_KIDS)) {
12475 scalar(newUNOP(OP_RV2AV, 0,
12476 newGVOP(OP_GV, 0, PL_defgv))),
12477 newSVOP(OP_CONST, 0, newSViv(pos+1)));
12479 newCONDOP(0, ifop, expr, defexpr) :
12480 newLOGOP(OP_OR, 0, ifop, defexpr);
12485 if (prev_type == 1)
12486 qerror(Perl_mess(aTHX_ "Mandatory parameter "
12487 "follows optional parameter"));
12489 min_arity = pos + 1;
12491 if (var) expr = newASSIGNOP(OPf_STACKED, var, 0, expr);
12493 initops = op_append_list(OP_LINESEQ, initops,
12494 newSTATEOP(0, NULL, expr));
12500 if (prev_type == 2)
12501 qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
12502 var = parse_opt_lexvar();
12504 OP *chkop = newLOGOP((pos & 1) ? OP_OR : OP_AND, 0,
12505 newBINOP(OP_BIT_AND, 0,
12506 scalar(newUNOP(OP_RV2AV, 0,
12507 newGVOP(OP_GV, 0, PL_defgv))),
12508 newSVOP(OP_CONST, 0, newSViv(1))),
12509 newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
12510 newSVOP(OP_CONST, 0,
12511 newSVpvs("Odd name/value argument "
12512 "for subroutine"))));
12513 if (pos != min_arity)
12514 chkop = newLOGOP(OP_AND, 0,
12516 scalar(newUNOP(OP_RV2AV, 0,
12517 newGVOP(OP_GV, 0, PL_defgv))),
12518 newSVOP(OP_CONST, 0, newSViv(pos))),
12520 initops = op_append_list(OP_LINESEQ,
12521 newSTATEOP(0, NULL, chkop),
12526 op_prepend_elem(OP_ASLICE,
12527 newOP(OP_PUSHMARK, 0),
12528 newLISTOP(OP_ASLICE, 0,
12530 newSVOP(OP_CONST, 0, newSViv(pos)),
12531 newUNOP(OP_AV2ARYLEN, 0,
12532 ref(newUNOP(OP_RV2AV, 0,
12533 newGVOP(OP_GV, 0, PL_defgv)),
12535 ref(newUNOP(OP_RV2AV, 0,
12536 newGVOP(OP_GV, 0, PL_defgv)),
12538 newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv));
12539 initops = op_append_list(OP_LINESEQ, initops,
12540 newSTATEOP(0, NULL,
12541 newASSIGNOP(OPf_STACKED, var, 0, slice)));
12548 qerror(Perl_mess(aTHX_ "Parse error"));
12552 c = lex_peek_unichar(0);
12554 case /*(*/')': break;
12557 lex_token_boundary();
12558 lex_read_unichar(0);
12560 c = lex_peek_unichar(0);
12561 } while (c == ',');
12567 if (min_arity != 0) {
12568 initops = op_append_list(OP_LINESEQ,
12569 newSTATEOP(0, NULL,
12572 scalar(newUNOP(OP_RV2AV, 0,
12573 newGVOP(OP_GV, 0, PL_defgv))),
12574 newSVOP(OP_CONST, 0, newSViv(min_arity))),
12575 newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
12576 newSVOP(OP_CONST, 0,
12577 newSVpvs("Too few arguments for subroutine"))))),
12580 if (max_arity != -1) {
12581 initops = op_append_list(OP_LINESEQ,
12582 newSTATEOP(0, NULL,
12585 scalar(newUNOP(OP_RV2AV, 0,
12586 newGVOP(OP_GV, 0, PL_defgv))),
12587 newSVOP(OP_CONST, 0, newSViv(max_arity))),
12588 newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
12589 newSVOP(OP_CONST, 0,
12590 newSVpvs("Too many arguments for subroutine"))))),
12598 * c-indentation-style: bsd
12599 * c-basic-offset: 4
12600 * indent-tabs-mode: nil
12603 * ex: set ts=8 sts=4 sw=4 et: