3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 =head1 Lexer interface
27 This is the lower layer of the Perl parser, managing characters and tokens.
29 =for apidoc AmU|yy_parser *|PL_parser
31 Pointer to a structure encapsulating the state of the parsing operation
32 currently in progress. The pointer can be locally changed to perform
33 a nested parse without interfering with the state of an outer parse.
34 Individual members of C<PL_parser> have their own documentation.
40 #define PERL_IN_TOKE_C
42 #include "dquote_static.c"
44 #define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
47 #define pl_yylval (PL_parser->yylval)
49 /* XXX temporary backwards compatibility */
50 #define PL_lex_brackets (PL_parser->lex_brackets)
51 #define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52 #define PL_lex_fakeeof (PL_parser->lex_fakeeof)
53 #define PL_lex_brackstack (PL_parser->lex_brackstack)
54 #define PL_lex_casemods (PL_parser->lex_casemods)
55 #define PL_lex_casestack (PL_parser->lex_casestack)
56 #define PL_lex_defer (PL_parser->lex_defer)
57 #define PL_lex_dojoin (PL_parser->lex_dojoin)
58 #define PL_lex_expect (PL_parser->lex_expect)
59 #define PL_lex_formbrack (PL_parser->lex_formbrack)
60 #define PL_lex_inpat (PL_parser->lex_inpat)
61 #define PL_lex_inwhat (PL_parser->lex_inwhat)
62 #define PL_lex_op (PL_parser->lex_op)
63 #define PL_lex_repl (PL_parser->lex_repl)
64 #define PL_lex_starts (PL_parser->lex_starts)
65 #define PL_lex_stuff (PL_parser->lex_stuff)
66 #define PL_multi_start (PL_parser->multi_start)
67 #define PL_multi_open (PL_parser->multi_open)
68 #define PL_multi_close (PL_parser->multi_close)
69 #define PL_preambled (PL_parser->preambled)
70 #define PL_sublex_info (PL_parser->sublex_info)
71 #define PL_linestr (PL_parser->linestr)
72 #define PL_expect (PL_parser->expect)
73 #define PL_copline (PL_parser->copline)
74 #define PL_bufptr (PL_parser->bufptr)
75 #define PL_oldbufptr (PL_parser->oldbufptr)
76 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
77 #define PL_linestart (PL_parser->linestart)
78 #define PL_bufend (PL_parser->bufend)
79 #define PL_last_uni (PL_parser->last_uni)
80 #define PL_last_lop (PL_parser->last_lop)
81 #define PL_last_lop_op (PL_parser->last_lop_op)
82 #define PL_lex_state (PL_parser->lex_state)
83 #define PL_rsfp (PL_parser->rsfp)
84 #define PL_rsfp_filters (PL_parser->rsfp_filters)
85 #define PL_in_my (PL_parser->in_my)
86 #define PL_in_my_stash (PL_parser->in_my_stash)
87 #define PL_tokenbuf (PL_parser->tokenbuf)
88 #define PL_multi_end (PL_parser->multi_end)
89 #define PL_error_count (PL_parser->error_count)
92 # define PL_endwhite (PL_parser->endwhite)
93 # define PL_faketokens (PL_parser->faketokens)
94 # define PL_lasttoke (PL_parser->lasttoke)
95 # define PL_nextwhite (PL_parser->nextwhite)
96 # define PL_realtokenstart (PL_parser->realtokenstart)
97 # define PL_skipwhite (PL_parser->skipwhite)
98 # define PL_thisclose (PL_parser->thisclose)
99 # define PL_thismad (PL_parser->thismad)
100 # define PL_thisopen (PL_parser->thisopen)
101 # define PL_thisstuff (PL_parser->thisstuff)
102 # define PL_thistoken (PL_parser->thistoken)
103 # define PL_thiswhite (PL_parser->thiswhite)
104 # define PL_thiswhite (PL_parser->thiswhite)
105 # define PL_nexttoke (PL_parser->nexttoke)
106 # define PL_curforce (PL_parser->curforce)
108 # define PL_nexttoke (PL_parser->nexttoke)
109 # define PL_nexttype (PL_parser->nexttype)
110 # define PL_nextval (PL_parser->nextval)
113 static const char* const ident_too_long = "Identifier too long";
116 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
117 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
119 # define CURMAD(slot,sv)
120 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
123 #define XENUMMASK 0x3f
124 #define XFAKEEOF 0x40
125 #define XFAKEBRACK 0x80
127 #ifdef USE_UTF8_SCRIPTS
128 # define UTF (!IN_BYTES)
130 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
133 /* The maximum number of characters preceding the unrecognized one to display */
134 #define UNRECOGNIZED_PRECEDE_COUNT 10
136 /* In variables named $^X, these are the legal values for X.
137 * 1999-02-27 mjd-perl-patch@plover.com */
138 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
140 #define SPACE_OR_TAB(c) isBLANK_A(c)
142 /* LEX_* are values for PL_lex_state, the state of the lexer.
143 * They are arranged oddly so that the guard on the switch statement
144 * can get by with a single comparison (if the compiler is smart enough).
146 * These values refer to the various states within a sublex parse,
147 * i.e. within a double quotish string
150 /* #define LEX_NOTPARSING 11 is done in perl.h. */
152 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
153 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
154 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
155 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
156 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
158 /* at end of code, eg "$x" followed by: */
159 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
160 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
162 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
163 string or after \E, $foo, etc */
164 #define LEX_INTERPCONST 2 /* NOT USED */
165 #define LEX_FORMLINE 1 /* expecting a format line */
166 #define LEX_KNOWNEXT 0 /* next token known; just return it */
170 static const char* const lex_state_names[] = {
185 #include "keywords.h"
187 /* CLINE is a macro that ensures PL_copline has a sane value */
189 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
192 # define SKIPSPACE0(s) skipspace0(s)
193 # define SKIPSPACE1(s) skipspace1(s)
194 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
195 # define PEEKSPACE(s) skipspace2(s,0)
197 # define SKIPSPACE0(s) skipspace(s)
198 # define SKIPSPACE1(s) skipspace(s)
199 # define SKIPSPACE2(s,tsv) skipspace(s)
200 # define PEEKSPACE(s) skipspace(s)
204 * Convenience functions to return different tokens and prime the
205 * lexer for the next token. They all take an argument.
207 * TOKEN : generic token (used for '(', DOLSHARP, etc)
208 * OPERATOR : generic operator
209 * AOPERATOR : assignment operator
210 * PREBLOCK : beginning the block after an if, while, foreach, ...
211 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
212 * PREREF : *EXPR where EXPR is not a simple identifier
213 * TERM : expression term
214 * POSTDEREF : postfix dereference (->$* ->@[...] etc.)
215 * LOOPX : loop exiting command (goto, last, dump, etc)
216 * FTST : file test operator
217 * FUN0 : zero-argument function
218 * FUN0OP : zero-argument function, with its op created in this file
219 * FUN1 : not used, except for not, which isn't a UNIOP
220 * BOop : bitwise or or xor
222 * SHop : shift operator
223 * PWop : power operator
224 * PMop : pattern-matching operator
225 * Aop : addition-level operator
226 * Mop : multiplication-level operator
227 * Eop : equality-testing operator
228 * Rop : relational operator <= != gt
230 * Also see LOP and lop() below.
233 #ifdef DEBUGGING /* Serve -DT. */
234 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
236 # define REPORT(retval) (retval)
239 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
240 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
241 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
242 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
243 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
244 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
245 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
246 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
247 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
248 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
249 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
250 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
251 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
252 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
253 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
254 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
255 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
256 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
257 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
258 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
259 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
260 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
262 /* This bit of chicanery makes a unary function followed by
263 * a parenthesis into a function with one argument, highest precedence.
264 * The UNIDOR macro is for unary functions that can be followed by the //
265 * operator (such as C<shift // 0>).
267 #define UNI3(f,x,have_x) { \
268 pl_yylval.ival = f; \
269 if (have_x) PL_expect = x; \
271 PL_last_uni = PL_oldbufptr; \
272 PL_last_lop_op = f; \
274 return REPORT( (int)FUNC1 ); \
276 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
278 #define UNI(f) UNI3(f,XTERM,1)
279 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
280 #define UNIPROTO(f,optional) { \
281 if (optional) PL_last_uni = PL_oldbufptr; \
285 #define UNIBRACK(f) UNI3(f,0,0)
287 /* grandfather return to old style */
290 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
291 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
292 pl_yylval.ival = (f); \
298 #define COPLINE_INC_WITH_HERELINES \
300 CopLINE_inc(PL_curcop); \
301 if (PL_parser->herelines) \
302 CopLINE(PL_curcop) += PL_parser->herelines, \
303 PL_parser->herelines = 0; \
305 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
306 * is no sublex_push to follow. */
307 #define COPLINE_SET_FROM_MULTI_END \
309 CopLINE_set(PL_curcop, PL_multi_end); \
310 if (PL_multi_end != PL_multi_start) \
311 PL_parser->herelines = 0; \
317 /* how to interpret the pl_yylval associated with the token */
321 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
326 static struct debug_tokens {
328 enum token_type type;
330 } const debug_tokens[] =
332 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
333 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
334 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
335 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
336 { ARROW, TOKENTYPE_NONE, "ARROW" },
337 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
338 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
339 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
340 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
341 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
342 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
343 { DO, TOKENTYPE_NONE, "DO" },
344 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
345 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
346 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
347 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
348 { ELSE, TOKENTYPE_NONE, "ELSE" },
349 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
350 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
351 { FOR, TOKENTYPE_IVAL, "FOR" },
352 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
353 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
354 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
355 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
356 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
357 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
358 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
359 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
360 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
361 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
362 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
363 { IF, TOKENTYPE_IVAL, "IF" },
364 { LABEL, TOKENTYPE_PVAL, "LABEL" },
365 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
366 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
367 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
368 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
369 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
370 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
371 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
372 { MY, TOKENTYPE_IVAL, "MY" },
373 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
374 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
375 { OROP, TOKENTYPE_IVAL, "OROP" },
376 { OROR, TOKENTYPE_NONE, "OROR" },
377 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
378 { PEG, TOKENTYPE_NONE, "PEG" },
379 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
380 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
381 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
382 { POSTJOIN, TOKENTYPE_NONE, "POSTJOIN" },
383 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
384 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
385 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
386 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
387 { PREINC, TOKENTYPE_NONE, "PREINC" },
388 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
389 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
390 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
391 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
392 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
393 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
394 { SUB, TOKENTYPE_NONE, "SUB" },
395 { THING, TOKENTYPE_OPVAL, "THING" },
396 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
397 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
398 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
399 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
400 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
401 { USE, TOKENTYPE_IVAL, "USE" },
402 { WHEN, TOKENTYPE_IVAL, "WHEN" },
403 { WHILE, TOKENTYPE_IVAL, "WHILE" },
404 { WORD, TOKENTYPE_OPVAL, "WORD" },
405 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
406 { 0, TOKENTYPE_NONE, NULL }
409 /* dump the returned token in rv, plus any optional arg in pl_yylval */
412 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
416 PERL_ARGS_ASSERT_TOKEREPORT;
419 const char *name = NULL;
420 enum token_type type = TOKENTYPE_NONE;
421 const struct debug_tokens *p;
422 SV* const report = newSVpvs("<== ");
424 for (p = debug_tokens; p->token; p++) {
425 if (p->token == (int)rv) {
432 Perl_sv_catpv(aTHX_ report, name);
433 else if ((char)rv > ' ' && (char)rv <= '~')
435 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
437 sv_catpvs(report, " (pending identifier)");
440 sv_catpvs(report, "EOF");
442 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
447 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
449 case TOKENTYPE_OPNUM:
450 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
451 PL_op_name[lvalp->ival]);
454 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
456 case TOKENTYPE_OPVAL:
458 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
459 PL_op_name[lvalp->opval->op_type]);
460 if (lvalp->opval->op_type == OP_CONST) {
461 Perl_sv_catpvf(aTHX_ report, " %s",
462 SvPEEK(cSVOPx_sv(lvalp->opval)));
467 sv_catpvs(report, "(opval=null)");
470 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
476 /* print the buffer with suitable escapes */
479 S_printbuf(pTHX_ const char *const fmt, const char *const s)
481 SV* const tmp = newSVpvs("");
483 PERL_ARGS_ASSERT_PRINTBUF;
485 GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */
486 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
494 S_deprecate_commaless_var_list(pTHX) {
496 deprecate("comma-less variable list");
497 return REPORT(','); /* grandfather non-comma-format format */
503 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
504 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
508 S_ao(pTHX_ int toketype)
511 if (*PL_bufptr == '=') {
513 if (toketype == ANDAND)
514 pl_yylval.ival = OP_ANDASSIGN;
515 else if (toketype == OROR)
516 pl_yylval.ival = OP_ORASSIGN;
517 else if (toketype == DORDOR)
518 pl_yylval.ival = OP_DORASSIGN;
526 * When Perl expects an operator and finds something else, no_op
527 * prints the warning. It always prints "<something> found where
528 * operator expected. It prints "Missing semicolon on previous line?"
529 * if the surprise occurs at the start of the line. "do you need to
530 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
531 * where the compiler doesn't know if foo is a method call or a function.
532 * It prints "Missing operator before end of line" if there's nothing
533 * after the missing operator, or "... before <...>" if there is something
534 * after the missing operator.
538 S_no_op(pTHX_ const char *const what, char *s)
541 char * const oldbp = PL_bufptr;
542 const bool is_first = (PL_oldbufptr == PL_linestart);
544 PERL_ARGS_ASSERT_NO_OP;
550 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
551 if (ckWARN_d(WARN_SYNTAX)) {
553 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
554 "\t(Missing semicolon on previous line?)\n");
555 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
557 for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
558 t += UTF ? UTF8SKIP(t) : 1)
560 if (t < PL_bufptr && isSPACE(*t))
561 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
562 "\t(Do you need to predeclare %"UTF8f"?)\n",
563 UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
567 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
568 "\t(Missing operator before %"UTF8f"?)\n",
569 UTF8fARG(UTF, s - oldbp, oldbp));
577 * Complain about missing quote/regexp/heredoc terminator.
578 * If it's called with NULL then it cauterizes the line buffer.
579 * If we're in a delimited string and the delimiter is a control
580 * character, it's reformatted into a two-char sequence like ^C.
585 S_missingterm(pTHX_ char *s)
591 char * const nl = strrchr(s,'\n');
595 else if ((U8) PL_multi_close < 32) {
597 tmpbuf[1] = (char)toCTRL(PL_multi_close);
602 *tmpbuf = (char)PL_multi_close;
606 q = strchr(s,'"') ? '\'' : '"';
607 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
613 * Check whether the named feature is enabled.
616 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
619 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
621 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
623 assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
625 if (namelen > MAX_FEATURE_LEN)
627 memcpy(&he_name[8], name, namelen);
629 return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
630 REFCOUNTED_HE_EXISTS));
634 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
635 * utf16-to-utf8-reversed.
638 #ifdef PERL_CR_FILTER
642 const char *s = SvPVX_const(sv);
643 const char * const e = s + SvCUR(sv);
645 PERL_ARGS_ASSERT_STRIP_RETURN;
647 /* outer loop optimized to do nothing if there are no CR-LFs */
649 if (*s++ == '\r' && *s == '\n') {
650 /* hit a CR-LF, need to copy the rest */
654 if (*s == '\r' && s[1] == '\n')
665 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
667 const I32 count = FILTER_READ(idx+1, sv, maxlen);
668 if (count > 0 && !maxlen)
675 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
677 Creates and initialises a new lexer/parser state object, supplying
678 a context in which to lex and parse from a new source of Perl code.
679 A pointer to the new state object is placed in L</PL_parser>. An entry
680 is made on the save stack so that upon unwinding the new state object
681 will be destroyed and the former value of L</PL_parser> will be restored.
682 Nothing else need be done to clean up the parsing context.
684 The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
685 non-null, provides a string (in SV form) containing code to be parsed.
686 A copy of the string is made, so subsequent modification of I<line>
687 does not affect parsing. I<rsfp>, if non-null, provides an input stream
688 from which code will be read to be parsed. If both are non-null, the
689 code in I<line> comes first and must consist of complete lines of input,
690 and I<rsfp> supplies the remainder of the source.
692 The I<flags> parameter is reserved for future use. Currently it is only
693 used by perl internally, so extensions should always pass zero.
698 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
699 can share filters with the current parser.
700 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
701 caller, hence isn't owned by the parser, so shouldn't be closed on parser
702 destruction. This is used to handle the case of defaulting to reading the
703 script from the standard input because no filename was given on the command
704 line (without getting confused by situation where STDIN has been closed, so
705 the script handle is opened on fd 0) */
708 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
711 const char *s = NULL;
712 yy_parser *parser, *oparser;
713 if (flags && flags & ~LEX_START_FLAGS)
714 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
716 /* create and initialise a parser */
718 Newxz(parser, 1, yy_parser);
719 parser->old_parser = oparser = PL_parser;
722 parser->stack = NULL;
724 parser->stack_size = 0;
726 /* on scope exit, free this parser and restore any outer one */
728 parser->saved_curcop = PL_curcop;
730 /* initialise lexer state */
733 parser->curforce = -1;
735 parser->nexttoke = 0;
737 parser->error_count = oparser ? oparser->error_count : 0;
738 parser->copline = parser->preambling = NOLINE;
739 parser->lex_state = LEX_NORMAL;
740 parser->expect = XSTATE;
742 parser->rsfp_filters =
743 !(flags & LEX_START_SAME_FILTER) || !oparser
745 : MUTABLE_AV(SvREFCNT_inc(
746 oparser->rsfp_filters
747 ? oparser->rsfp_filters
748 : (oparser->rsfp_filters = newAV())
751 Newx(parser->lex_brackstack, 120, char);
752 Newx(parser->lex_casestack, 12, char);
753 *parser->lex_casestack = '\0';
754 Newxz(parser->lex_shared, 1, LEXSHARED);
758 s = SvPV_const(line, len);
759 parser->linestr = flags & LEX_START_COPIED
760 ? SvREFCNT_inc_simple_NN(line)
761 : newSVpvn_flags(s, len, SvUTF8(line));
762 sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2);
764 parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
766 parser->oldoldbufptr =
769 parser->linestart = SvPVX(parser->linestr);
770 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
771 parser->last_lop = parser->last_uni = NULL;
773 assert(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
774 |LEX_DONT_CLOSE_RSFP));
775 parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
776 |LEX_DONT_CLOSE_RSFP));
778 parser->in_pod = parser->filtered = 0;
782 /* delete a parser object */
785 Perl_parser_free(pTHX_ const yy_parser *parser)
787 PERL_ARGS_ASSERT_PARSER_FREE;
789 PL_curcop = parser->saved_curcop;
790 SvREFCNT_dec(parser->linestr);
792 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
793 PerlIO_clearerr(parser->rsfp);
794 else if (parser->rsfp && (!parser->old_parser ||
795 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
796 PerlIO_close(parser->rsfp);
797 SvREFCNT_dec(parser->rsfp_filters);
798 SvREFCNT_dec(parser->lex_stuff);
799 SvREFCNT_dec(parser->sublex_info.repl);
801 Safefree(parser->lex_brackstack);
802 Safefree(parser->lex_casestack);
803 Safefree(parser->lex_shared);
804 PL_parser = parser->old_parser;
809 Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab)
812 I32 nexttoke = parser->lasttoke;
814 I32 nexttoke = parser->nexttoke;
816 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
819 if (S_is_opval_token(parser->nexttoke[nexttoke].next_type
821 && parser->nexttoke[nexttoke].next_val.opval
822 && parser->nexttoke[nexttoke].next_val.opval->op_slabbed
823 && OpSLAB(parser->nexttoke[nexttoke].next_val.opval) == slab) {
824 op_free(parser->nexttoke[nexttoke].next_val.opval);
825 parser->nexttoke[nexttoke].next_val.opval = NULL;
828 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
829 && parser->nextval[nexttoke].opval
830 && parser->nextval[nexttoke].opval->op_slabbed
831 && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
832 op_free(parser->nextval[nexttoke].opval);
833 parser->nextval[nexttoke].opval = NULL;
841 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
843 Buffer scalar containing the chunk currently under consideration of the
844 text currently being lexed. This is always a plain string scalar (for
845 which C<SvPOK> is true). It is not intended to be used as a scalar by
846 normal scalar means; instead refer to the buffer directly by the pointer
847 variables described below.
849 The lexer maintains various C<char*> pointers to things in the
850 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
851 reallocated, all of these pointers must be updated. Don't attempt to
852 do this manually, but rather use L</lex_grow_linestr> if you need to
853 reallocate the buffer.
855 The content of the text chunk in the buffer is commonly exactly one
856 complete line of input, up to and including a newline terminator,
857 but there are situations where it is otherwise. The octets of the
858 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
859 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
860 flag on this scalar, which may disagree with it.
862 For direct examination of the buffer, the variable
863 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
864 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
865 of these pointers is usually preferable to examination of the scalar
866 through normal scalar means.
868 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
870 Direct pointer to the end of the chunk of text currently being lexed, the
871 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
872 + SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
873 always located at the end of the buffer, and does not count as part of
874 the buffer's contents.
876 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
878 Points to the current position of lexing inside the lexer buffer.
879 Characters around this point may be freely examined, within
880 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
881 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
882 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
884 Lexing code (whether in the Perl core or not) moves this pointer past
885 the characters that it consumes. It is also expected to perform some
886 bookkeeping whenever a newline character is consumed. This movement
887 can be more conveniently performed by the function L</lex_read_to>,
888 which handles newlines appropriately.
890 Interpretation of the buffer's octets can be abstracted out by
891 using the slightly higher-level functions L</lex_peek_unichar> and
892 L</lex_read_unichar>.
894 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
896 Points to the start of the current line inside the lexer buffer.
897 This is useful for indicating at which column an error occurred, and
898 not much else. This must be updated by any lexing code that consumes
899 a newline; the function L</lex_read_to> handles this detail.
905 =for apidoc Amx|bool|lex_bufutf8
907 Indicates whether the octets in the lexer buffer
908 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
909 of Unicode characters. If not, they should be interpreted as Latin-1
910 characters. This is analogous to the C<SvUTF8> flag for scalars.
912 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
913 contains valid UTF-8. Lexing code must be robust in the face of invalid
916 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
917 is significant, but not the whole story regarding the input character
918 encoding. Normally, when a file is being read, the scalar contains octets
919 and its C<SvUTF8> flag is off, but the octets should be interpreted as
920 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
921 however, the scalar may have the C<SvUTF8> flag on, and in this case its
922 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
923 is in effect. This logic may change in the future; use this function
924 instead of implementing the logic yourself.
930 Perl_lex_bufutf8(pTHX)
936 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
938 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
939 at least I<len> octets (including terminating NUL). Returns a
940 pointer to the reallocated buffer. This is necessary before making
941 any direct modification of the buffer that would increase its length.
942 L</lex_stuff_pvn> provides a more convenient way to insert text into
945 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
946 this function updates all of the lexer's variables that point directly
953 Perl_lex_grow_linestr(pTHX_ STRLEN len)
957 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
958 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
959 linestr = PL_parser->linestr;
960 buf = SvPVX(linestr);
961 if (len <= SvLEN(linestr))
963 bufend_pos = PL_parser->bufend - buf;
964 bufptr_pos = PL_parser->bufptr - buf;
965 oldbufptr_pos = PL_parser->oldbufptr - buf;
966 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
967 linestart_pos = PL_parser->linestart - buf;
968 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
969 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
970 re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
971 PL_parser->lex_shared->re_eval_start - buf : 0;
973 buf = sv_grow(linestr, len);
975 PL_parser->bufend = buf + bufend_pos;
976 PL_parser->bufptr = buf + bufptr_pos;
977 PL_parser->oldbufptr = buf + oldbufptr_pos;
978 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
979 PL_parser->linestart = buf + linestart_pos;
980 if (PL_parser->last_uni)
981 PL_parser->last_uni = buf + last_uni_pos;
982 if (PL_parser->last_lop)
983 PL_parser->last_lop = buf + last_lop_pos;
984 if (PL_parser->lex_shared->re_eval_start)
985 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
990 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
992 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
993 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
994 reallocating the buffer if necessary. This means that lexing code that
995 runs later will see the characters as if they had appeared in the input.
996 It is not recommended to do this as part of normal parsing, and most
997 uses of this facility run the risk of the inserted characters being
998 interpreted in an unintended manner.
1000 The string to be inserted is represented by I<len> octets starting
1001 at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
1002 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
1003 The characters are recoded for the lexer buffer, according to how the
1004 buffer is currently being interpreted (L</lex_bufutf8>). If a string
1005 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1006 function is more convenient.
1012 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1016 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1017 if (flags & ~(LEX_STUFF_UTF8))
1018 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1020 if (flags & LEX_STUFF_UTF8) {
1023 STRLEN highhalf = 0; /* Count of variants */
1024 const char *p, *e = pv+len;
1025 for (p = pv; p != e; p++) {
1026 if (! UTF8_IS_INVARIANT(*p)) {
1032 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1033 bufptr = PL_parser->bufptr;
1034 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1035 SvCUR_set(PL_parser->linestr,
1036 SvCUR(PL_parser->linestr) + len+highhalf);
1037 PL_parser->bufend += len+highhalf;
1038 for (p = pv; p != e; p++) {
1040 if (! UTF8_IS_INVARIANT(c)) {
1041 *bufptr++ = UTF8_TWO_BYTE_HI(c);
1042 *bufptr++ = UTF8_TWO_BYTE_LO(c);
1044 *bufptr++ = (char)c;
1049 if (flags & LEX_STUFF_UTF8) {
1050 STRLEN highhalf = 0;
1051 const char *p, *e = pv+len;
1052 for (p = pv; p != e; p++) {
1054 if (UTF8_IS_ABOVE_LATIN1(c)) {
1055 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1056 "non-Latin-1 character into Latin-1 input");
1057 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1060 } else if (! UTF8_IS_INVARIANT(c)) {
1061 /* malformed UTF-8 */
1063 SAVESPTR(PL_warnhook);
1064 PL_warnhook = PERL_WARNHOOK_FATAL;
1065 utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
1071 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1072 bufptr = PL_parser->bufptr;
1073 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1074 SvCUR_set(PL_parser->linestr,
1075 SvCUR(PL_parser->linestr) + len-highhalf);
1076 PL_parser->bufend += len-highhalf;
1079 if (UTF8_IS_INVARIANT(*p)) {
1085 *bufptr++ = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
1091 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1092 bufptr = PL_parser->bufptr;
1093 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1094 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1095 PL_parser->bufend += len;
1096 Copy(pv, bufptr, len, char);
1102 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1104 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1105 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1106 reallocating the buffer if necessary. This means that lexing code that
1107 runs later will see the characters as if they had appeared in the input.
1108 It is not recommended to do this as part of normal parsing, and most
1109 uses of this facility run the risk of the inserted characters being
1110 interpreted in an unintended manner.
1112 The string to be inserted is represented by octets starting at I<pv>
1113 and continuing to the first nul. These octets are interpreted as either
1114 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1115 in I<flags>. The characters are recoded for the lexer buffer, according
1116 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1117 If it is not convenient to nul-terminate a string to be inserted, the
1118 L</lex_stuff_pvn> function is more appropriate.
1124 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1126 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1127 lex_stuff_pvn(pv, strlen(pv), flags);
1131 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1133 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1134 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1135 reallocating the buffer if necessary. This means that lexing code that
1136 runs later will see the characters as if they had appeared in the input.
1137 It is not recommended to do this as part of normal parsing, and most
1138 uses of this facility run the risk of the inserted characters being
1139 interpreted in an unintended manner.
1141 The string to be inserted is the string value of I<sv>. The characters
1142 are recoded for the lexer buffer, according to how the buffer is currently
1143 being interpreted (L</lex_bufutf8>). If a string to be inserted is
1144 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1145 need to construct a scalar.
1151 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1155 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1157 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1159 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1163 =for apidoc Amx|void|lex_unstuff|char *ptr
1165 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1166 I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1167 This hides the discarded text from any lexing code that runs later,
1168 as if the text had never appeared.
1170 This is not the normal way to consume lexed text. For that, use
1177 Perl_lex_unstuff(pTHX_ char *ptr)
1181 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1182 buf = PL_parser->bufptr;
1184 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1187 bufend = PL_parser->bufend;
1189 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1190 unstuff_len = ptr - buf;
1191 Move(ptr, buf, bufend+1-ptr, char);
1192 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1193 PL_parser->bufend = bufend - unstuff_len;
1197 =for apidoc Amx|void|lex_read_to|char *ptr
1199 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1200 to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1201 performing the correct bookkeeping whenever a newline character is passed.
1202 This is the normal way to consume lexed text.
1204 Interpretation of the buffer's octets can be abstracted out by
1205 using the slightly higher-level functions L</lex_peek_unichar> and
1206 L</lex_read_unichar>.
1212 Perl_lex_read_to(pTHX_ char *ptr)
1215 PERL_ARGS_ASSERT_LEX_READ_TO;
1216 s = PL_parser->bufptr;
1217 if (ptr < s || ptr > PL_parser->bufend)
1218 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1219 for (; s != ptr; s++)
1221 COPLINE_INC_WITH_HERELINES;
1222 PL_parser->linestart = s+1;
1224 PL_parser->bufptr = ptr;
1228 =for apidoc Amx|void|lex_discard_to|char *ptr
1230 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1231 up to I<ptr>. The remaining content of the buffer will be moved, and
1232 all pointers into the buffer updated appropriately. I<ptr> must not
1233 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1234 it is not permitted to discard text that has yet to be lexed.
1236 Normally it is not necessarily to do this directly, because it suffices to
1237 use the implicit discarding behaviour of L</lex_next_chunk> and things
1238 based on it. However, if a token stretches across multiple lines,
1239 and the lexing code has kept multiple lines of text in the buffer for
1240 that purpose, then after completion of the token it would be wise to
1241 explicitly discard the now-unneeded earlier lines, to avoid future
1242 multi-line tokens growing the buffer without bound.
1248 Perl_lex_discard_to(pTHX_ char *ptr)
1252 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1253 buf = SvPVX(PL_parser->linestr);
1255 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1258 if (ptr > PL_parser->bufptr)
1259 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1260 discard_len = ptr - buf;
1261 if (PL_parser->oldbufptr < ptr)
1262 PL_parser->oldbufptr = ptr;
1263 if (PL_parser->oldoldbufptr < ptr)
1264 PL_parser->oldoldbufptr = ptr;
1265 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1266 PL_parser->last_uni = NULL;
1267 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1268 PL_parser->last_lop = NULL;
1269 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1270 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1271 PL_parser->bufend -= discard_len;
1272 PL_parser->bufptr -= discard_len;
1273 PL_parser->oldbufptr -= discard_len;
1274 PL_parser->oldoldbufptr -= discard_len;
1275 if (PL_parser->last_uni)
1276 PL_parser->last_uni -= discard_len;
1277 if (PL_parser->last_lop)
1278 PL_parser->last_lop -= discard_len;
1282 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1284 Reads in the next chunk of text to be lexed, appending it to
1285 L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1286 looked to the end of the current chunk and wants to know more. It is
1287 usual, but not necessary, for lexing to have consumed the entirety of
1288 the current chunk at this time.
1290 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1291 chunk (i.e., the current chunk has been entirely consumed), normally the
1292 current chunk will be discarded at the same time that the new chunk is
1293 read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1294 will not be discarded. If the current chunk has not been entirely
1295 consumed, then it will not be discarded regardless of the flag.
1297 Returns true if some new text was added to the buffer, or false if the
1298 buffer has reached the end of the input text.
1303 #define LEX_FAKE_EOF 0x80000000
1304 #define LEX_NO_TERM 0x40000000
1307 Perl_lex_next_chunk(pTHX_ U32 flags)
1311 STRLEN old_bufend_pos, new_bufend_pos;
1312 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1313 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1314 bool got_some_for_debugger = 0;
1316 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1317 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1318 linestr = PL_parser->linestr;
1319 buf = SvPVX(linestr);
1320 if (!(flags & LEX_KEEP_PREVIOUS) &&
1321 PL_parser->bufptr == PL_parser->bufend) {
1322 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1324 if (PL_parser->last_uni != PL_parser->bufend)
1325 PL_parser->last_uni = NULL;
1326 if (PL_parser->last_lop != PL_parser->bufend)
1327 PL_parser->last_lop = NULL;
1328 last_uni_pos = last_lop_pos = 0;
1332 old_bufend_pos = PL_parser->bufend - buf;
1333 bufptr_pos = PL_parser->bufptr - buf;
1334 oldbufptr_pos = PL_parser->oldbufptr - buf;
1335 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1336 linestart_pos = PL_parser->linestart - buf;
1337 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1338 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1340 if (flags & LEX_FAKE_EOF) {
1342 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1344 } else if (filter_gets(linestr, old_bufend_pos)) {
1346 got_some_for_debugger = 1;
1347 } else if (flags & LEX_NO_TERM) {
1350 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1351 sv_setpvs(linestr, "");
1353 /* End of real input. Close filehandle (unless it was STDIN),
1354 * then add implicit termination.
1356 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1357 PerlIO_clearerr(PL_parser->rsfp);
1358 else if (PL_parser->rsfp)
1359 (void)PerlIO_close(PL_parser->rsfp);
1360 PL_parser->rsfp = NULL;
1361 PL_parser->in_pod = PL_parser->filtered = 0;
1363 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1366 if (!PL_in_eval && PL_minus_p) {
1368 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1369 PL_minus_n = PL_minus_p = 0;
1370 } else if (!PL_in_eval && PL_minus_n) {
1371 sv_catpvs(linestr, /*{*/";}");
1374 sv_catpvs(linestr, ";");
1377 buf = SvPVX(linestr);
1378 new_bufend_pos = SvCUR(linestr);
1379 PL_parser->bufend = buf + new_bufend_pos;
1380 PL_parser->bufptr = buf + bufptr_pos;
1381 PL_parser->oldbufptr = buf + oldbufptr_pos;
1382 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1383 PL_parser->linestart = buf + linestart_pos;
1384 if (PL_parser->last_uni)
1385 PL_parser->last_uni = buf + last_uni_pos;
1386 if (PL_parser->last_lop)
1387 PL_parser->last_lop = buf + last_lop_pos;
1388 if (PL_parser->preambling != NOLINE) {
1389 CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1390 PL_parser->preambling = NOLINE;
1392 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1393 PL_curstash != PL_debstash) {
1394 /* debugger active and we're not compiling the debugger code,
1395 * so store the line into the debugger's array of lines
1397 update_debugger_info(NULL, buf+old_bufend_pos,
1398 new_bufend_pos-old_bufend_pos);
1404 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1406 Looks ahead one (Unicode) character in the text currently being lexed.
1407 Returns the codepoint (unsigned integer value) of the next character,
1408 or -1 if lexing has reached the end of the input text. To consume the
1409 peeked character, use L</lex_read_unichar>.
1411 If the next character is in (or extends into) the next chunk of input
1412 text, the next chunk will be read in. Normally the current chunk will be
1413 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1414 then the current chunk will not be discarded.
1416 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1417 is encountered, an exception is generated.
1423 Perl_lex_peek_unichar(pTHX_ U32 flags)
1427 if (flags & ~(LEX_KEEP_PREVIOUS))
1428 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1429 s = PL_parser->bufptr;
1430 bufend = PL_parser->bufend;
1436 if (!lex_next_chunk(flags))
1438 s = PL_parser->bufptr;
1439 bufend = PL_parser->bufend;
1442 if (UTF8_IS_INVARIANT(head))
1444 if (UTF8_IS_START(head)) {
1445 len = UTF8SKIP(&head);
1446 while ((STRLEN)(bufend-s) < len) {
1447 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1449 s = PL_parser->bufptr;
1450 bufend = PL_parser->bufend;
1453 unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1454 if (retlen == (STRLEN)-1) {
1455 /* malformed UTF-8 */
1457 SAVESPTR(PL_warnhook);
1458 PL_warnhook = PERL_WARNHOOK_FATAL;
1459 utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
1465 if (!lex_next_chunk(flags))
1467 s = PL_parser->bufptr;
1474 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1476 Reads the next (Unicode) character in the text currently being lexed.
1477 Returns the codepoint (unsigned integer value) of the character read,
1478 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1479 if lexing has reached the end of the input text. To non-destructively
1480 examine the next character, use L</lex_peek_unichar> instead.
1482 If the next character is in (or extends into) the next chunk of input
1483 text, the next chunk will be read in. Normally the current chunk will be
1484 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1485 then the current chunk will not be discarded.
1487 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1488 is encountered, an exception is generated.
1494 Perl_lex_read_unichar(pTHX_ U32 flags)
1497 if (flags & ~(LEX_KEEP_PREVIOUS))
1498 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1499 c = lex_peek_unichar(flags);
1502 COPLINE_INC_WITH_HERELINES;
1504 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1506 ++(PL_parser->bufptr);
1512 =for apidoc Amx|void|lex_read_space|U32 flags
1514 Reads optional spaces, in Perl style, in the text currently being
1515 lexed. The spaces may include ordinary whitespace characters and
1516 Perl-style comments. C<#line> directives are processed if encountered.
1517 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1518 at a non-space character (or the end of the input text).
1520 If spaces extend into the next chunk of input text, the next chunk will
1521 be read in. Normally the current chunk will be discarded at the same
1522 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1523 chunk will not be discarded.
1528 #define LEX_NO_INCLINE 0x40000000
1529 #define LEX_NO_NEXT_CHUNK 0x80000000
1532 Perl_lex_read_space(pTHX_ U32 flags)
1535 const bool can_incline = !(flags & LEX_NO_INCLINE);
1536 bool need_incline = 0;
1537 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1538 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1541 sv_free(PL_skipwhite);
1542 PL_skipwhite = NULL;
1545 PL_skipwhite = newSVpvs("");
1546 #endif /* PERL_MAD */
1547 s = PL_parser->bufptr;
1548 bufend = PL_parser->bufend;
1554 } while (!(c == '\n' || (c == 0 && s == bufend)));
1555 } else if (c == '\n') {
1558 PL_parser->linestart = s;
1564 } else if (isSPACE(c)) {
1566 } else if (c == 0 && s == bufend) {
1571 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1572 #endif /* PERL_MAD */
1573 if (flags & LEX_NO_NEXT_CHUNK)
1575 PL_parser->bufptr = s;
1576 l = CopLINE(PL_curcop);
1577 CopLINE(PL_curcop) += PL_parser->herelines + 1;
1578 got_more = lex_next_chunk(flags);
1579 CopLINE_set(PL_curcop, l);
1580 s = PL_parser->bufptr;
1581 bufend = PL_parser->bufend;
1584 if (can_incline && need_incline && PL_parser->rsfp) {
1594 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1595 #endif /* PERL_MAD */
1596 PL_parser->bufptr = s;
1601 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1603 This function performs syntax checking on a prototype, C<proto>.
1604 If C<warn> is true, any illegal characters or mismatched brackets
1605 will trigger illegalproto warnings, declaring that they were
1606 detected in the prototype for C<name>.
1608 The return value is C<true> if this is a valid prototype, and
1609 C<false> if it is not, regardless of whether C<warn> was C<true> or
1612 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1619 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
1621 STRLEN len, origlen;
1622 char *p = proto ? SvPV(proto, len) : NULL;
1623 bool bad_proto = FALSE;
1624 bool in_brackets = FALSE;
1625 bool after_slash = FALSE;
1626 char greedy_proto = ' ';
1627 bool proto_after_greedy_proto = FALSE;
1628 bool must_be_last = FALSE;
1629 bool underscore = FALSE;
1630 bool bad_proto_after_underscore = FALSE;
1632 PERL_ARGS_ASSERT_VALIDATE_PROTO;
1638 for (; len--; p++) {
1641 proto_after_greedy_proto = TRUE;
1643 if (!strchr(";@%", *p))
1644 bad_proto_after_underscore = TRUE;
1647 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1654 in_brackets = FALSE;
1655 else if ((*p == '@' || *p == '%') &&
1658 must_be_last = TRUE;
1667 after_slash = FALSE;
1672 SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1675 ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1676 origlen, UNI_DISPLAY_ISPRINT)
1677 : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1679 if (proto_after_greedy_proto)
1680 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1681 "Prototype after '%c' for %"SVf" : %s",
1682 greedy_proto, SVfARG(name), p);
1684 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1685 "Missing ']' in prototype for %"SVf" : %s",
1688 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1689 "Illegal character in prototype for %"SVf" : %s",
1691 if (bad_proto_after_underscore)
1692 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1693 "Illegal character after '_' in prototype for %"SVf" : %s",
1697 return (! (proto_after_greedy_proto || bad_proto) );
1702 * This subroutine has nothing to do with tilting, whether at windmills
1703 * or pinball tables. Its name is short for "increment line". It
1704 * increments the current line number in CopLINE(PL_curcop) and checks
1705 * to see whether the line starts with a comment of the form
1706 * # line 500 "foo.pm"
1707 * If so, it sets the current line number and file to the values in the comment.
1711 S_incline(pTHX_ const char *s)
1719 PERL_ARGS_ASSERT_INCLINE;
1721 COPLINE_INC_WITH_HERELINES;
1722 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1723 && s+1 == PL_bufend && *s == ';') {
1724 /* fake newline in string eval */
1725 CopLINE_dec(PL_curcop);
1730 while (SPACE_OR_TAB(*s))
1732 if (strnEQ(s, "line", 4))
1736 if (SPACE_OR_TAB(*s))
1740 while (SPACE_OR_TAB(*s))
1748 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1750 while (SPACE_OR_TAB(*s))
1752 if (*s == '"' && (t = strchr(s+1, '"'))) {
1758 while (!isSPACE(*t))
1762 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1764 if (*e != '\n' && *e != '\0')
1765 return; /* false alarm */
1767 line_num = atoi(n)-1;
1770 const STRLEN len = t - s;
1772 if (!PL_rsfp && !PL_parser->filtered) {
1773 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1774 * to *{"::_<newfilename"} */
1775 /* However, the long form of evals is only turned on by the
1776 debugger - usually they're "(eval %lu)" */
1777 GV * const cfgv = CopFILEGV(PL_curcop);
1780 STRLEN tmplen2 = len;
1784 if (tmplen2 + 2 <= sizeof smallbuf)
1787 Newx(tmpbuf2, tmplen2 + 2, char);
1792 memcpy(tmpbuf2 + 2, s, tmplen2);
1795 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1797 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1798 /* adjust ${"::_<newfilename"} to store the new file name */
1799 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1800 /* The line number may differ. If that is the case,
1801 alias the saved lines that are in the array.
1802 Otherwise alias the whole array. */
1803 if (CopLINE(PL_curcop) == line_num) {
1804 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1805 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1807 else if (GvAV(cfgv)) {
1808 AV * const av = GvAV(cfgv);
1809 const I32 start = CopLINE(PL_curcop)+1;
1810 I32 items = AvFILLp(av) - start;
1812 AV * const av2 = GvAVn(gv2);
1813 SV **svp = AvARRAY(av) + start;
1814 I32 l = (I32)line_num+1;
1816 av_store(av2, l++, SvREFCNT_inc(*svp++));
1821 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1824 CopFILE_free(PL_curcop);
1825 CopFILE_setn(PL_curcop, s, len);
1827 CopLINE_set(PL_curcop, line_num);
1830 #define skipspace(s) skipspace_flags(s, 0)
1833 /* skip space before PL_thistoken */
1836 S_skipspace0(pTHX_ char *s)
1838 PERL_ARGS_ASSERT_SKIPSPACE0;
1845 PL_thiswhite = newSVpvs("");
1846 sv_catsv(PL_thiswhite, PL_skipwhite);
1847 sv_free(PL_skipwhite);
1850 PL_realtokenstart = s - SvPVX(PL_linestr);
1854 /* skip space after PL_thistoken */
1857 S_skipspace1(pTHX_ char *s)
1859 const char *start = s;
1860 I32 startoff = start - SvPVX(PL_linestr);
1862 PERL_ARGS_ASSERT_SKIPSPACE1;
1867 start = SvPVX(PL_linestr) + startoff;
1868 if (!PL_thistoken && PL_realtokenstart >= 0) {
1869 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1870 PL_thistoken = newSVpvn(tstart, start - tstart);
1872 PL_realtokenstart = -1;
1875 PL_nextwhite = newSVpvs("");
1876 sv_catsv(PL_nextwhite, PL_skipwhite);
1877 sv_free(PL_skipwhite);
1884 S_skipspace2(pTHX_ char *s, SV **svp)
1887 const I32 startoff = s - SvPVX(PL_linestr);
1889 PERL_ARGS_ASSERT_SKIPSPACE2;
1892 if (!PL_madskills || !svp)
1894 start = SvPVX(PL_linestr) + startoff;
1895 if (!PL_thistoken && PL_realtokenstart >= 0) {
1896 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1897 PL_thistoken = newSVpvn(tstart, start - tstart);
1898 PL_realtokenstart = -1;
1902 *svp = newSVpvs("");
1903 sv_setsv(*svp, PL_skipwhite);
1904 sv_free(PL_skipwhite);
1913 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1915 AV *av = CopFILEAVx(PL_curcop);
1918 if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1920 sv = *av_fetch(av, 0, 1);
1921 SvUPGRADE(sv, SVt_PVMG);
1923 if (!SvPOK(sv)) sv_setpvs(sv,"");
1925 sv_catsv(sv, orig_sv);
1927 sv_catpvn(sv, buf, len);
1932 if (PL_parser->preambling == NOLINE)
1933 av_store(av, CopLINE(PL_curcop), sv);
1939 * Called to gobble the appropriate amount and type of whitespace.
1940 * Skips comments as well.
1944 S_skipspace_flags(pTHX_ char *s, U32 flags)
1948 #endif /* PERL_MAD */
1949 PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1952 sv_free(PL_skipwhite);
1953 PL_skipwhite = NULL;
1955 #endif /* PERL_MAD */
1956 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1957 while (s < PL_bufend && SPACE_OR_TAB(*s))
1960 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1962 lex_read_space(flags | LEX_KEEP_PREVIOUS |
1963 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1964 LEX_NO_NEXT_CHUNK : 0));
1966 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1967 if (PL_linestart > PL_bufptr)
1968 PL_bufptr = PL_linestart;
1973 PL_skipwhite = newSVpvn(start, s-start);
1974 #endif /* PERL_MAD */
1980 * Check the unary operators to ensure there's no ambiguity in how they're
1981 * used. An ambiguous piece of code would be:
1983 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1984 * the +5 is its argument.
1994 if (PL_oldoldbufptr != PL_last_uni)
1996 while (isSPACE(*PL_last_uni))
1999 while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
2001 if ((t = strchr(s, '(')) && t < PL_bufptr)
2004 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
2005 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
2006 (int)(s - PL_last_uni), PL_last_uni);
2010 * LOP : macro to build a list operator. Its behaviour has been replaced
2011 * with a subroutine, S_lop() for which LOP is just another name.
2014 #define LOP(f,x) return lop(f,x,s)
2018 * Build a list operator (or something that might be one). The rules:
2019 * - if we have a next token, then it's a list operator [why?]
2020 * - if the next thing is an opening paren, then it's a function
2021 * - else it's a list operator
2025 S_lop(pTHX_ I32 f, int x, char *s)
2029 PERL_ARGS_ASSERT_LOP;
2035 PL_last_lop = PL_oldbufptr;
2036 PL_last_lop_op = (OPCODE)f;
2045 return REPORT(FUNC);
2048 return REPORT(FUNC);
2051 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
2052 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2053 return REPORT(LSTOP);
2060 * Sets up for an eventual force_next(). start_force(0) basically does
2061 * an unshift, while start_force(-1) does a push. yylex removes items
2066 S_start_force(pTHX_ int where)
2070 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
2071 where = PL_lasttoke;
2072 assert(PL_curforce < 0 || PL_curforce == where);
2073 if (PL_curforce != where) {
2074 for (i = PL_lasttoke; i > where; --i) {
2075 PL_nexttoke[i] = PL_nexttoke[i-1];
2079 if (PL_curforce < 0) /* in case of duplicate start_force() */
2080 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
2081 PL_curforce = where;
2084 curmad('^', newSVpvs(""));
2085 CURMAD('_', PL_nextwhite);
2090 S_curmad(pTHX_ char slot, SV *sv)
2096 if (PL_curforce < 0)
2097 where = &PL_thismad;
2099 where = &PL_nexttoke[PL_curforce].next_mad;
2105 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
2107 else if (PL_encoding) {
2108 sv_recode_to_utf8(sv, PL_encoding);
2113 /* keep a slot open for the head of the list? */
2114 if (slot != '_' && *where && (*where)->mad_key == '^') {
2115 (*where)->mad_key = slot;
2116 sv_free(MUTABLE_SV(((*where)->mad_val)));
2117 (*where)->mad_val = (void*)sv;
2120 addmad(newMADsv(slot, sv), where, 0);
2123 # define start_force(where) NOOP
2124 # define curmad(slot, sv) NOOP
2129 * When the lexer realizes it knows the next token (for instance,
2130 * it is reordering tokens for the parser) then it can call S_force_next
2131 * to know what token to return the next time the lexer is called. Caller
2132 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
2133 * and possibly PL_expect to ensure the lexer handles the token correctly.
2137 S_force_next(pTHX_ I32 type)
2142 PerlIO_printf(Perl_debug_log, "### forced token:\n");
2143 tokereport(type, &NEXTVAL_NEXTTOKE);
2147 if (PL_curforce < 0)
2148 start_force(PL_lasttoke);
2149 PL_nexttoke[PL_curforce].next_type = type;
2150 if (PL_lex_state != LEX_KNOWNEXT)
2151 PL_lex_defer = PL_lex_state;
2152 PL_lex_state = LEX_KNOWNEXT;
2153 PL_lex_expect = PL_expect;
2156 PL_nexttype[PL_nexttoke] = type;
2158 if (PL_lex_state != LEX_KNOWNEXT) {
2159 PL_lex_defer = PL_lex_state;
2160 PL_lex_expect = PL_expect;
2161 PL_lex_state = LEX_KNOWNEXT;
2169 * This subroutine handles postfix deref syntax after the arrow has already
2170 * been emitted. @* $* etc. are emitted as two separate token right here.
2171 * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2172 * only the first, leaving yylex to find the next.
2176 S_postderef(pTHX_ int const funny, char const next)
2179 assert(funny == DOLSHARP || strchr("$@%&*", funny));
2180 assert(strchr("*[{", next));
2182 PL_expect = XOPERATOR;
2183 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2184 assert('@' == funny || '$' == funny || DOLSHARP == funny);
2185 PL_lex_state = LEX_INTERPEND;
2186 start_force(PL_curforce);
2187 force_next(POSTJOIN);
2189 start_force(PL_curforce);
2194 if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
2195 && !PL_lex_brackets)
2197 PL_expect = XOPERATOR;
2206 int yyc = PL_parser->yychar;
2207 if (yyc != YYEMPTY) {
2210 NEXTVAL_NEXTTOKE = PL_parser->yylval;
2211 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2212 PL_lex_allbrackets--;
2214 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2215 } else if (yyc == '('/*)*/) {
2216 PL_lex_allbrackets--;
2221 PL_parser->yychar = YYEMPTY;
2226 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2229 SV * const sv = newSVpvn_utf8(start, len,
2232 && !is_ascii_string((const U8*)start, len)
2233 && is_utf8_string((const U8*)start, len));
2239 * When the lexer knows the next thing is a word (for instance, it has
2240 * just seen -> and it knows that the next char is a word char, then
2241 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2245 * char *start : buffer position (must be within PL_linestr)
2246 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2247 * int check_keyword : if true, Perl checks to make sure the word isn't
2248 * a keyword (do this if the word is a label, e.g. goto FOO)
2249 * int allow_pack : if true, : characters will also be allowed (require,
2250 * use, etc. do this)
2251 * int allow_initial_tick : used by the "sub" lexer only.
2255 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2261 PERL_ARGS_ASSERT_FORCE_WORD;
2263 start = SKIPSPACE1(start);
2265 if (isIDFIRST_lazy_if(s,UTF) ||
2266 (allow_pack && *s == ':') )
2268 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2269 if (check_keyword) {
2270 char *s2 = PL_tokenbuf;
2271 if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
2273 if (keyword(s2, len, 0))
2276 start_force(PL_curforce);
2278 curmad('X', newSVpvn(start,s-start));
2279 if (token == METHOD) {
2284 PL_expect = XOPERATOR;
2288 curmad('g', newSVpvs( "forced" ));
2289 NEXTVAL_NEXTTOKE.opval
2290 = (OP*)newSVOP(OP_CONST,0,
2291 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2292 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2300 * Called when the lexer wants $foo *foo &foo etc, but the program
2301 * text only contains the "foo" portion. The first argument is a pointer
2302 * to the "foo", and the second argument is the type symbol to prefix.
2303 * Forces the next token to be a "WORD".
2304 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2308 S_force_ident(pTHX_ const char *s, int kind)
2312 PERL_ARGS_ASSERT_FORCE_IDENT;
2315 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2316 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2317 UTF ? SVf_UTF8 : 0));
2318 start_force(PL_curforce);
2319 NEXTVAL_NEXTTOKE.opval = o;
2322 o->op_private = OPpCONST_ENTERED;
2323 /* XXX see note in pp_entereval() for why we forgo typo
2324 warnings if the symbol must be introduced in an eval.
2326 gv_fetchpvn_flags(s, len,
2327 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2328 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2329 kind == '$' ? SVt_PV :
2330 kind == '@' ? SVt_PVAV :
2331 kind == '%' ? SVt_PVHV :
2339 S_force_ident_maybe_lex(pTHX_ char pit)
2341 start_force(PL_curforce);
2342 NEXTVAL_NEXTTOKE.ival = pit;
2347 Perl_str_to_version(pTHX_ SV *sv)
2352 const char *start = SvPV_const(sv,len);
2353 const char * const end = start + len;
2354 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2356 PERL_ARGS_ASSERT_STR_TO_VERSION;
2358 while (start < end) {
2362 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2367 retval += ((NV)n)/nshift;
2376 * Forces the next token to be a version number.
2377 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2378 * and if "guessing" is TRUE, then no new token is created (and the caller
2379 * must use an alternative parsing method).
2383 S_force_version(pTHX_ char *s, int guessing)
2389 I32 startoff = s - SvPVX(PL_linestr);
2392 PERL_ARGS_ASSERT_FORCE_VERSION;
2400 while (isDIGIT(*d) || *d == '_' || *d == '.')
2404 start_force(PL_curforce);
2405 curmad('X', newSVpvn(s,d-s));
2408 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2410 #ifdef USE_LOCALE_NUMERIC
2411 char *loc = savepv(setlocale(LC_NUMERIC, NULL));
2412 setlocale(LC_NUMERIC, "C");
2414 s = scan_num(s, &pl_yylval);
2415 #ifdef USE_LOCALE_NUMERIC
2416 setlocale(LC_NUMERIC, loc);
2419 version = pl_yylval.opval;
2420 ver = cSVOPx(version)->op_sv;
2421 if (SvPOK(ver) && !SvNIOK(ver)) {
2422 SvUPGRADE(ver, SVt_PVNV);
2423 SvNV_set(ver, str_to_version(ver));
2424 SvNOK_on(ver); /* hint that it is a version */
2427 else if (guessing) {
2430 sv_free(PL_nextwhite); /* let next token collect whitespace */
2432 s = SvPVX(PL_linestr) + startoff;
2440 if (PL_madskills && !version) {
2441 sv_free(PL_nextwhite); /* let next token collect whitespace */
2443 s = SvPVX(PL_linestr) + startoff;
2446 /* NOTE: The parser sees the package name and the VERSION swapped */
2447 start_force(PL_curforce);
2448 NEXTVAL_NEXTTOKE.opval = version;
2455 * S_force_strict_version
2456 * Forces the next token to be a version number using strict syntax rules.
2460 S_force_strict_version(pTHX_ char *s)
2465 I32 startoff = s - SvPVX(PL_linestr);
2467 const char *errstr = NULL;
2469 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2471 while (isSPACE(*s)) /* leading whitespace */
2474 if (is_STRICT_VERSION(s,&errstr)) {
2476 s = (char *)scan_version(s, ver, 0);
2477 version = newSVOP(OP_CONST, 0, ver);
2479 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2480 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2484 yyerror(errstr); /* version required */
2489 if (PL_madskills && !version) {
2490 sv_free(PL_nextwhite); /* let next token collect whitespace */
2492 s = SvPVX(PL_linestr) + startoff;
2495 /* NOTE: The parser sees the package name and the VERSION swapped */
2496 start_force(PL_curforce);
2497 NEXTVAL_NEXTTOKE.opval = version;
2505 * Tokenize a quoted string passed in as an SV. It finds the next
2506 * chunk, up to end of string or a backslash. It may make a new
2507 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2512 S_tokeq(pTHX_ SV *sv)
2520 PERL_ARGS_ASSERT_TOKEQ;
2524 assert (!SvIsCOW(sv));
2525 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2529 /* This is relying on the SV being "well formed" with a trailing '\0' */
2530 while (s < send && !(*s == '\\' && s[1] == '\\'))
2535 if ( PL_hints & HINT_NEW_STRING ) {
2536 pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2537 SVs_TEMP | SvUTF8(sv));
2541 if (s + 1 < send && (s[1] == '\\'))
2542 s++; /* all that, just for this */
2547 SvCUR_set(sv, d - SvPVX_const(sv));
2549 if ( PL_hints & HINT_NEW_STRING )
2550 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2555 * Now come three functions related to double-quote context,
2556 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2557 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2558 * interact with PL_lex_state, and create fake ( ... ) argument lists
2559 * to handle functions and concatenation.
2563 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
2568 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2570 * Pattern matching will set PL_lex_op to the pattern-matching op to
2571 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2573 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2575 * Everything else becomes a FUNC.
2577 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2578 * had an OP_CONST or OP_READLINE). This just sets us up for a
2579 * call to S_sublex_push().
2583 S_sublex_start(pTHX)
2586 const I32 op_type = pl_yylval.ival;
2588 if (op_type == OP_NULL) {
2589 pl_yylval.opval = PL_lex_op;
2593 if (op_type == OP_CONST) {
2594 SV *sv = tokeq(PL_lex_stuff);
2596 if (SvTYPE(sv) == SVt_PVIV) {
2597 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2599 const char * const p = SvPV_const(sv, len);
2600 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2604 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2605 PL_lex_stuff = NULL;
2609 PL_sublex_info.super_state = PL_lex_state;
2610 PL_sublex_info.sub_inwhat = (U16)op_type;
2611 PL_sublex_info.sub_op = PL_lex_op;
2612 PL_lex_state = LEX_INTERPPUSH;
2616 pl_yylval.opval = PL_lex_op;
2626 * Create a new scope to save the lexing state. The scope will be
2627 * ended in S_sublex_done. Returns a '(', starting the function arguments
2628 * to the uc, lc, etc. found before.
2629 * Sets PL_lex_state to LEX_INTERPCONCAT.
2637 const bool is_heredoc = PL_multi_close == '<';
2640 PL_lex_state = PL_sublex_info.super_state;
2641 SAVEI8(PL_lex_dojoin);
2642 SAVEI32(PL_lex_brackets);
2643 SAVEI32(PL_lex_allbrackets);
2644 SAVEI32(PL_lex_formbrack);
2645 SAVEI8(PL_lex_fakeeof);
2646 SAVEI32(PL_lex_casemods);
2647 SAVEI32(PL_lex_starts);
2648 SAVEI8(PL_lex_state);
2649 SAVESPTR(PL_lex_repl);
2650 SAVEVPTR(PL_lex_inpat);
2651 SAVEI16(PL_lex_inwhat);
2654 SAVECOPLINE(PL_curcop);
2655 SAVEI32(PL_multi_end);
2656 SAVEI32(PL_parser->herelines);
2657 PL_parser->herelines = 0;
2659 SAVEI8(PL_multi_close);
2660 SAVEPPTR(PL_bufptr);
2661 SAVEPPTR(PL_bufend);
2662 SAVEPPTR(PL_oldbufptr);
2663 SAVEPPTR(PL_oldoldbufptr);
2664 SAVEPPTR(PL_last_lop);
2665 SAVEPPTR(PL_last_uni);
2666 SAVEPPTR(PL_linestart);
2667 SAVESPTR(PL_linestr);
2668 SAVEGENERICPV(PL_lex_brackstack);
2669 SAVEGENERICPV(PL_lex_casestack);
2670 SAVEGENERICPV(PL_parser->lex_shared);
2671 SAVEBOOL(PL_parser->lex_re_reparsing);
2672 SAVEI32(PL_copline);
2674 /* The here-doc parser needs to be able to peek into outer lexing
2675 scopes to find the body of the here-doc. So we put PL_linestr and
2676 PL_bufptr into lex_shared, to ‘share’ those values.
2678 PL_parser->lex_shared->ls_linestr = PL_linestr;
2679 PL_parser->lex_shared->ls_bufptr = PL_bufptr;
2681 PL_linestr = PL_lex_stuff;
2682 PL_lex_repl = PL_sublex_info.repl;
2683 PL_lex_stuff = NULL;
2684 PL_sublex_info.repl = NULL;
2686 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2687 = SvPVX(PL_linestr);
2688 PL_bufend += SvCUR(PL_linestr);
2689 PL_last_lop = PL_last_uni = NULL;
2690 SAVEFREESV(PL_linestr);
2691 if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2693 PL_lex_dojoin = FALSE;
2694 PL_lex_brackets = PL_lex_formbrack = 0;
2695 PL_lex_allbrackets = 0;
2696 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2697 Newx(PL_lex_brackstack, 120, char);
2698 Newx(PL_lex_casestack, 12, char);
2699 PL_lex_casemods = 0;
2700 *PL_lex_casestack = '\0';
2702 PL_lex_state = LEX_INTERPCONCAT;
2704 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2705 PL_copline = NOLINE;
2707 Newxz(shared, 1, LEXSHARED);
2708 shared->ls_prev = PL_parser->lex_shared;
2709 PL_parser->lex_shared = shared;
2711 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2712 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2713 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2714 PL_lex_inpat = PL_sublex_info.sub_op;
2716 PL_lex_inpat = NULL;
2718 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2719 PL_in_eval &= ~EVAL_RE_REPARSING;
2726 * Restores lexer state after a S_sublex_push.
2733 if (!PL_lex_starts++) {
2734 SV * const sv = newSVpvs("");
2735 if (SvUTF8(PL_linestr))
2737 PL_expect = XOPERATOR;
2738 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2742 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2743 PL_lex_state = LEX_INTERPCASEMOD;
2747 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2748 assert(PL_lex_inwhat != OP_TRANSR);
2750 assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2751 PL_linestr = PL_lex_repl;
2753 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2754 PL_bufend += SvCUR(PL_linestr);
2755 PL_last_lop = PL_last_uni = NULL;
2756 PL_lex_dojoin = FALSE;
2757 PL_lex_brackets = 0;
2758 PL_lex_allbrackets = 0;
2759 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2760 PL_lex_casemods = 0;
2761 *PL_lex_casestack = '\0';
2763 if (SvEVALED(PL_lex_repl)) {
2764 PL_lex_state = LEX_INTERPNORMAL;
2766 /* we don't clear PL_lex_repl here, so that we can check later
2767 whether this is an evalled subst; that means we rely on the
2768 logic to ensure sublex_done() is called again only via the
2769 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2772 PL_lex_state = LEX_INTERPCONCAT;
2775 if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2776 CopLINE(PL_curcop) +=
2777 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xpad_cop_seq.xlow
2778 + PL_parser->herelines;
2779 PL_parser->herelines = 0;
2784 const line_t l = CopLINE(PL_curcop);
2789 PL_endwhite = newSVpvs("");
2790 sv_catsv(PL_endwhite, PL_thiswhite);
2794 sv_setpvs(PL_thistoken,"");
2796 PL_realtokenstart = -1;
2800 if (PL_multi_close == '<')
2801 PL_parser->herelines += l - PL_multi_end;
2802 PL_bufend = SvPVX(PL_linestr);
2803 PL_bufend += SvCUR(PL_linestr);
2804 PL_expect = XOPERATOR;
2805 PL_sublex_info.sub_inwhat = 0;
2810 PERL_STATIC_INLINE SV*
2811 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2813 /* <s> points to first character of interior of \N{}, <e> to one beyond the
2814 * interior, hence to the "}". Finds what the name resolves to, returning
2815 * an SV* containing it; NULL if no valid one found */
2817 SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2824 const U8* first_bad_char_loc;
2825 const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2827 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2829 if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2831 &first_bad_char_loc))
2833 /* If warnings are on, this will print a more detailed analysis of what
2834 * is wrong than the error message below */
2835 utf8n_to_uvchr(first_bad_char_loc,
2836 e - ((char *) first_bad_char_loc),
2839 /* We deliberately don't try to print the malformed character, which
2840 * might not print very well; it also may be just the first of many
2841 * malformations, so don't print what comes after it */
2842 yyerror(Perl_form(aTHX_
2843 "Malformed UTF-8 character immediately after '%.*s'",
2844 (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
2848 res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2849 /* include the <}> */
2850 e - backslash_ptr + 1);
2852 SvREFCNT_dec_NN(res);
2856 /* See if the charnames handler is the Perl core's, and if so, we can skip
2857 * the validation needed for a user-supplied one, as Perl's does its own
2859 table = GvHV(PL_hintgv); /* ^H */
2860 cvp = hv_fetchs(table, "charnames", FALSE);
2861 if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2862 SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2864 const char * const name = HvNAME(stash);
2865 if (HvNAMELEN(stash) == sizeof("_charnames")-1
2866 && strEQ(name, "_charnames")) {
2871 /* Here, it isn't Perl's charname handler. We can't rely on a
2872 * user-supplied handler to validate the input name. For non-ut8 input,
2873 * look to see that the first character is legal. Then loop through the
2874 * rest checking that each is a continuation */
2876 /* This code needs to be sync'ed with a regex in _charnames.pm which does
2880 if (! isALPHAU(*s)) {
2885 if (! isCHARNAME_CONT(*s)) {
2888 if (*s == ' ' && *(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2889 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2890 "A sequence of multiple spaces in a charnames "
2891 "alias definition is deprecated");
2895 if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2896 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2897 "Trailing white-space in a charnames alias "
2898 "definition is deprecated");
2902 /* Similarly for utf8. For invariants can check directly; for other
2903 * Latin1, can calculate their code point and check; otherwise use a
2905 if (UTF8_IS_INVARIANT(*s)) {
2906 if (! isALPHAU(*s)) {
2910 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2911 if (! isALPHAU(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) {
2917 if (! PL_utf8_charname_begin) {
2918 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2919 PL_utf8_charname_begin = _core_swash_init("utf8",
2920 "_Perl_Charname_Begin",
2922 1, 0, NULL, &flags);
2924 if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2931 if (UTF8_IS_INVARIANT(*s)) {
2932 if (! isCHARNAME_CONT(*s)) {
2935 if (*s == ' ' && *(s-1) == ' '
2936 && ckWARN_d(WARN_DEPRECATED)) {
2937 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2938 "A sequence of multiple spaces in a charnam"
2939 "es alias definition is deprecated");
2943 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2944 if (! isCHARNAME_CONT(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1))))
2951 if (! PL_utf8_charname_continue) {
2952 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2953 PL_utf8_charname_continue = _core_swash_init("utf8",
2954 "_Perl_Charname_Continue",
2956 1, 0, NULL, &flags);
2958 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2964 if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2965 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2966 "Trailing white-space in a charnames alias "
2967 "definition is deprecated");
2971 if (SvUTF8(res)) { /* Don't accept malformed input */
2972 const U8* first_bad_char_loc;
2974 const char* const str = SvPV_const(res, len);
2975 if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2976 /* If warnings are on, this will print a more detailed analysis of
2977 * what is wrong than the error message below */
2978 utf8n_to_uvchr(first_bad_char_loc,
2979 (char *) first_bad_char_loc - str,
2982 /* We deliberately don't try to print the malformed character,
2983 * which might not print very well; it also may be just the first
2984 * of many malformations, so don't print what comes after it */
2987 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2988 (int) (e - backslash_ptr + 1), backslash_ptr,
2989 (int) ((char *) first_bad_char_loc - str), str
2999 int bad_char_size = ((UTF) ? UTF8SKIP(s) : 1);
3001 /* The final %.*s makes sure that should the trailing NUL be missing
3002 * that this print won't run off the end of the string */
3005 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
3006 (int)(s - backslash_ptr + bad_char_size), backslash_ptr,
3007 (int)(e - s + bad_char_size), s + bad_char_size
3009 UTF ? SVf_UTF8 : 0);
3017 Extracts the next constant part of a pattern, double-quoted string,
3018 or transliteration. This is terrifying code.
3020 For example, in parsing the double-quoted string "ab\x63$d", it would
3021 stop at the '$' and return an OP_CONST containing 'abc'.
3023 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3024 processing a pattern (PL_lex_inpat is true), a transliteration
3025 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
3027 Returns a pointer to the character scanned up to. If this is
3028 advanced from the start pointer supplied (i.e. if anything was
3029 successfully parsed), will leave an OP_CONST for the substring scanned
3030 in pl_yylval. Caller must intuit reason for not parsing further
3031 by looking at the next characters herself.
3035 \N{FOO} => \N{U+hex_for_character_FOO}
3036 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
3039 all other \-char, including \N and \N{ apart from \N{ABC}
3042 @ and $ where it appears to be a var, but not for $ as tail anchor
3047 In transliterations:
3048 characters are VERY literal, except for - not at the start or end
3049 of the string, which indicates a range. If the range is in bytes,
3050 scan_const expands the range to the full set of intermediate
3051 characters. If the range is in utf8, the hyphen is replaced with
3052 a certain range mark which will be handled by pmtrans() in op.c.
3054 In double-quoted strings:
3056 double-quoted style: \r and \n
3057 constants: \x31, etc.
3058 deprecated backrefs: \1 (in substitution replacements)
3059 case and quoting: \U \Q \E
3062 scan_const does *not* construct ops to handle interpolated strings.
3063 It stops processing as soon as it finds an embedded $ or @ variable
3064 and leaves it to the caller to work out what's going on.
3066 embedded arrays (whether in pattern or not) could be:
3067 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
3069 $ in double-quoted strings must be the symbol of an embedded scalar.
3071 $ in pattern could be $foo or could be tail anchor. Assumption:
3072 it's a tail anchor if $ is the last thing in the string, or if it's
3073 followed by one of "()| \r\n\t"
3075 \1 (backreferences) are turned into $1 in substitutions
3077 The structure of the code is
3078 while (there's a character to process) {
3079 handle transliteration ranges
3080 skip regexp comments /(?#comment)/ and codes /(?{code})/
3081 skip #-initiated comments in //x patterns
3082 check for embedded arrays
3083 check for embedded scalars
3085 deprecate \1 in substitution replacements
3086 handle string-changing backslashes \l \U \Q \E, etc.
3087 switch (what was escaped) {
3088 handle \- in a transliteration (becomes a literal -)
3089 if a pattern and not \N{, go treat as regular character
3090 handle \132 (octal characters)
3091 handle \x15 and \x{1234} (hex characters)
3092 handle \N{name} (named characters, also \N{3,5} in a pattern)
3093 handle \cV (control characters)
3094 handle printf-style backslashes (\f, \r, \n, etc)
3097 } (end if backslash)
3098 handle regular character
3099 } (end while character to read)
3104 S_scan_const(pTHX_ char *start)
3107 char *send = PL_bufend; /* end of the constant */
3108 SV *sv = newSV(send - start); /* sv for the constant. See
3109 note below on sizing. */
3110 char *s = start; /* start of the constant */
3111 char *d = SvPVX(sv); /* destination for copies */
3112 bool dorange = FALSE; /* are we in a translit range? */
3113 bool didrange = FALSE; /* did we just finish a range? */
3114 bool in_charclass = FALSE; /* within /[...]/ */
3115 bool has_utf8 = FALSE; /* Output constant is UTF8 */
3116 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
3117 to be UTF8? But, this can
3118 show as true when the source
3119 isn't utf8, as for example
3120 when it is entirely composed
3122 SV *res; /* result from charnames */
3124 /* Note on sizing: The scanned constant is placed into sv, which is
3125 * initialized by newSV() assuming one byte of output for every byte of
3126 * input. This routine expects newSV() to allocate an extra byte for a
3127 * trailing NUL, which this routine will append if it gets to the end of
3128 * the input. There may be more bytes of input than output (eg., \N{LATIN
3129 * CAPITAL LETTER A}), or more output than input if the constant ends up
3130 * recoded to utf8, but each time a construct is found that might increase
3131 * the needed size, SvGROW() is called. Its size parameter each time is
3132 * based on the best guess estimate at the time, namely the length used so
3133 * far, plus the length the current construct will occupy, plus room for
3134 * the trailing NUL, plus one byte for every input byte still unscanned */
3136 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3139 UV literal_endpoint = 0;
3140 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
3143 PERL_ARGS_ASSERT_SCAN_CONST;
3145 assert(PL_lex_inwhat != OP_TRANSR);
3146 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3147 /* If we are doing a trans and we know we want UTF8 set expectation */
3148 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
3149 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3152 /* Protect sv from errors and fatal warnings. */
3153 ENTER_with_name("scan_const");
3156 while (s < send || dorange) {
3158 /* get transliterations out of the way (they're most literal) */
3159 if (PL_lex_inwhat == OP_TRANS) {
3160 /* expand a range A-Z to the full set of characters. AIE! */
3162 I32 i; /* current expanded character */
3163 I32 min; /* first character in range */
3164 I32 max; /* last character in range */
3175 char * const c = (char*)utf8_hop((U8*)d, -1);
3179 *c = (char) ILLEGAL_UTF8_BYTE;
3180 /* mark the range as done, and continue */
3186 i = d - SvPVX_const(sv); /* remember current offset */
3189 SvLEN(sv) + (has_utf8 ?
3190 (512 - UTF_CONTINUATION_MARK +
3193 /* How many two-byte within 0..255: 128 in UTF-8,
3194 * 96 in UTF-8-mod. */
3196 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
3198 d = SvPVX(sv) + i; /* refresh d after realloc */
3202 for (j = 0; j <= 1; j++) {
3203 char * const c = (char*)utf8_hop((U8*)d, -1);
3204 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
3210 max = (U8)0xff; /* only to \xff */
3211 uvmax = uv; /* \x{100} to uvmax */
3213 d = c; /* eat endpoint chars */
3218 d -= 2; /* eat the first char and the - */
3219 min = (U8)*d; /* first char in range */
3220 max = (U8)d[1]; /* last char in range */
3227 "Invalid range \"%c-%c\" in transliteration operator",
3228 (char)min, (char)max);
3232 if (literal_endpoint == 2 &&
3233 ((isLOWER_A(min) && isLOWER_A(max)) ||
3234 (isUPPER_A(min) && isUPPER_A(max))))
3236 for (i = min; i <= max; i++) {
3243 for (i = min; i <= max; i++)
3246 append_utf8_from_native_byte(i, &d);
3254 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
3256 *d++ = (char) ILLEGAL_UTF8_BYTE;
3258 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
3262 /* mark the range as done, and continue */
3266 literal_endpoint = 0;
3271 /* range begins (ignore - as first or last char) */
3272 else if (*s == '-' && s+1 < send && s != start) {
3274 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
3281 *d++ = (char) ILLEGAL_UTF8_BYTE; /* use illegal utf8 byte--see pmtrans */
3291 literal_endpoint = 0;
3292 native_range = TRUE;
3297 /* if we get here, we're not doing a transliteration */
3299 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3302 while (s1 >= start && *s1-- == '\\')
3305 in_charclass = TRUE;
3308 else if (*s == ']' && PL_lex_inpat && in_charclass) {
3311 while (s1 >= start && *s1-- == '\\')
3314 in_charclass = FALSE;
3317 /* skip for regexp comments /(?#comment)/, except for the last
3318 * char, which will be done separately.
3319 * Stop on (?{..}) and friends */
3321 else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3323 while (s+1 < send && *s != ')')
3326 else if (!PL_lex_casemods &&
3327 ( s[2] == '{' /* This should match regcomp.c */
3328 || (s[2] == '?' && s[3] == '{')))
3334 /* likewise skip #-initiated comments in //x patterns */
3335 else if (*s == '#' && PL_lex_inpat && !in_charclass &&
3336 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
3337 while (s+1 < send && *s != '\n')
3341 /* no further processing of single-quoted regex */
3342 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3343 goto default_action;
3345 /* check for embedded arrays
3346 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3348 else if (*s == '@' && s[1]) {
3349 if (isWORDCHAR_lazy_if(s+1,UTF))
3351 if (strchr(":'{$", s[1]))
3353 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3354 break; /* in regexp, neither @+ nor @- are interpolated */
3357 /* check for embedded scalars. only stop if we're sure it's a
3360 else if (*s == '$') {
3361 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
3363 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3365 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3366 "Possible unintended interpolation of $\\ in regex");
3368 break; /* in regexp, $ might be tail anchor */
3372 /* End of else if chain - OP_TRANS rejoin rest */
3375 if (*s == '\\' && s+1 < send) {
3376 char* e; /* Can be used for ending '}', etc. */
3380 /* warn on \1 - \9 in substitution replacements, but note that \11
3381 * is an octal; and \19 is \1 followed by '9' */
3382 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
3383 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
3385 /* diag_listed_as: \%d better written as $%d */
3386 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3391 /* string-change backslash escapes */
3392 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3396 /* In a pattern, process \N, but skip any other backslash escapes.
3397 * This is because we don't want to translate an escape sequence
3398 * into a meta symbol and have the regex compiler use the meta
3399 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3400 * in spite of this, we do have to process \N here while the proper
3401 * charnames handler is in scope. See bugs #56444 and #62056.
3402 * There is a complication because \N in a pattern may also stand
3403 * for 'match a non-nl', and not mean a charname, in which case its
3404 * processing should be deferred to the regex compiler. To be a
3405 * charname it must be followed immediately by a '{', and not look
3406 * like \N followed by a curly quantifier, i.e., not something like
3407 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3409 else if (PL_lex_inpat
3412 || regcurly(s + 1, FALSE)))
3415 goto default_action;
3420 /* quoted - in transliterations */
3422 if (PL_lex_inwhat == OP_TRANS) {
3429 if ((isALPHANUMERIC(*s)))
3430 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3431 "Unrecognized escape \\%c passed through",
3433 /* default action is to copy the quoted character */
3434 goto default_action;
3437 /* eg. \132 indicates the octal constant 0132 */
3438 case '0': case '1': case '2': case '3':
3439 case '4': case '5': case '6': case '7':
3441 I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3443 uv = grok_oct(s, &len, &flags, NULL);
3445 if (len < 3 && s < send && isDIGIT(*s)
3446 && ckWARN(WARN_MISC))
3448 Perl_warner(aTHX_ packWARN(WARN_MISC),
3449 "%s", form_short_octal_warning(s, len));
3452 goto NUM_ESCAPE_INSERT;
3454 /* eg. \o{24} indicates the octal constant \024 */
3459 bool valid = grok_bslash_o(&s, &uv, &error,
3460 TRUE, /* Output warning */
3461 FALSE, /* Not strict */
3462 TRUE, /* Output warnings for
3469 goto NUM_ESCAPE_INSERT;
3472 /* eg. \x24 indicates the hex constant 0x24 */
3477 bool valid = grok_bslash_x(&s, &uv, &error,
3478 TRUE, /* Output warning */
3479 FALSE, /* Not strict */
3480 TRUE, /* Output warnings for
3490 /* Insert oct or hex escaped character. There will always be
3491 * enough room in sv since such escapes will be longer than any
3492 * UTF-8 sequence they can end up as, except if they force us
3493 * to recode the rest of the string into utf8 */
3495 /* Here uv is the ordinal of the next character being added */
3496 if (!UVCHR_IS_INVARIANT(uv)) {
3497 if (!has_utf8 && uv > 255) {
3498 /* Might need to recode whatever we have accumulated so
3499 * far if it contains any chars variant in utf8 or
3502 SvCUR_set(sv, d - SvPVX_const(sv));
3505 /* See Note on sizing above. */
3506 sv_utf8_upgrade_flags_grow(sv,
3507 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3508 UNISKIP(uv) + (STRLEN)(send - s) + 1);
3509 d = SvPVX(sv) + SvCUR(sv);
3514 d = (char*)uvchr_to_utf8((U8*)d, uv);
3515 if (PL_lex_inwhat == OP_TRANS &&
3516 PL_sublex_info.sub_op) {
3517 PL_sublex_info.sub_op->op_private |=
3518 (PL_lex_repl ? OPpTRANS_FROM_UTF
3522 if (uv > 255 && !dorange)
3523 native_range = FALSE;
3536 /* In a non-pattern \N must be a named character, like \N{LATIN
3537 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3538 * mean to match a non-newline. For non-patterns, named
3539 * characters are converted to their string equivalents. In
3540 * patterns, named characters are not converted to their
3541 * ultimate forms for the same reasons that other escapes
3542 * aren't. Instead, they are converted to the \N{U+...} form
3543 * to get the value from the charnames that is in effect right
3544 * now, while preserving the fact that it was a named character
3545 * so that the regex compiler knows this */
3547 /* The structure of this section of code (besides checking for
3548 * errors and upgrading to utf8) is:
3549 * Further disambiguate between the two meanings of \N, and if
3550 * not a charname, go process it elsewhere
3551 * If of form \N{U+...}, pass it through if a pattern;
3552 * otherwise convert to utf8
3553 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3554 * pattern; otherwise convert to utf8 */
3556 /* Here, s points to the 'N'; the test below is guaranteed to
3557 * succeed if we are being called on a pattern as we already
3558 * know from a test above that the next character is a '{'.
3559 * On a non-pattern \N must mean 'named sequence, which
3560 * requires braces */
3563 yyerror("Missing braces on \\N{}");
3568 /* If there is no matching '}', it is an error. */
3569 if (! (e = strchr(s, '}'))) {
3570 if (! PL_lex_inpat) {
3571 yyerror("Missing right brace on \\N{}");
3573 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3578 /* Here it looks like a named character */
3580 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3581 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3582 | PERL_SCAN_DISALLOW_PREFIX;
3585 /* For \N{U+...}, the '...' is a unicode value even on
3586 * EBCDIC machines */
3587 s += 2; /* Skip to next char after the 'U+' */
3589 uv = grok_hex(s, &len, &flags, NULL);
3590 if (len == 0 || len != (STRLEN)(e - s)) {
3591 yyerror("Invalid hexadecimal number in \\N{U+...}");
3598 /* On non-EBCDIC platforms, pass through to the regex
3599 * compiler unchanged. The reason we evaluated the
3600 * number above is to make sure there wasn't a syntax
3601 * error. But on EBCDIC we convert to native so
3602 * downstream code can continue to assume it's native
3604 s -= 5; /* Include the '\N{U+' */
3606 d += my_snprintf(d, e - s + 1 + 1, /* includes the }
3609 (unsigned int) UNI_TO_NATIVE(uv));
3611 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3615 else { /* Not a pattern: convert the hex to string */
3617 /* If destination is not in utf8, unconditionally
3618 * recode it to be so. This is because \N{} implies
3619 * Unicode semantics, and scalars have to be in utf8
3620 * to guarantee those semantics */
3622 SvCUR_set(sv, d - SvPVX_const(sv));
3625 /* See Note on sizing above. */
3626 sv_utf8_upgrade_flags_grow(
3628 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3629 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3630 d = SvPVX(sv) + SvCUR(sv);
3634 /* Add the (Unicode) code point to the output. */
3635 if (UNI_IS_INVARIANT(uv)) {
3636 *d++ = (char) LATIN1_TO_NATIVE(uv);
3639 d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3643 else /* Here is \N{NAME} but not \N{U+...}. */
3644 if ((res = get_and_check_backslash_N_name(s, e)))
3647 const char *str = SvPV_const(res, len);
3650 if (! len) { /* The name resolved to an empty string */
3651 Copy("\\N{}", d, 4, char);
3655 /* In order to not lose information for the regex
3656 * compiler, pass the result in the specially made
3657 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3658 * the code points in hex of each character
3659 * returned by charnames */
3661 const char *str_end = str + len;
3662 const STRLEN off = d - SvPVX_const(sv);
3664 if (! SvUTF8(res)) {
3665 /* For the non-UTF-8 case, we can determine the
3666 * exact length needed without having to parse
3667 * through the string. Each character takes up
3668 * 2 hex digits plus either a trailing dot or
3670 d = off + SvGROW(sv, off
3672 + 6 /* For the "\N{U+", and
3674 + (STRLEN)(send - e));
3675 Copy("\\N{U+", d, 5, char);
3677 while (str < str_end) {
3679 my_snprintf(hex_string, sizeof(hex_string),
3680 "%02X.", (U8) *str);
3681 Copy(hex_string, d, 3, char);
3685 d--; /* We will overwrite below the final
3686 dot with a right brace */
3689 STRLEN char_length; /* cur char's byte length */
3691 /* and the number of bytes after this is
3692 * translated into hex digits */
3693 STRLEN output_length;
3695 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3696 * for max('U+', '.'); and 1 for NUL */
3697 char hex_string[2 * UTF8_MAXBYTES + 5];
3699 /* Get the first character of the result. */
3700 U32 uv = utf8n_to_uvchr((U8 *) str,
3704 /* Convert first code point to hex, including
3705 * the boiler plate before it. */
3707 my_snprintf(hex_string, sizeof(hex_string),
3711 /* Make sure there is enough space to hold it */
3712 d = off + SvGROW(sv, off
3714 + (STRLEN)(send - e)
3715 + 2); /* '}' + NUL */
3717 Copy(hex_string, d, output_length, char);
3720 /* For each subsequent character, append dot and
3721 * its ordinal in hex */
3722 while ((str += char_length) < str_end) {
3723 const STRLEN off = d - SvPVX_const(sv);
3724 U32 uv = utf8n_to_uvchr((U8 *) str,
3729 my_snprintf(hex_string,
3734 d = off + SvGROW(sv, off
3736 + (STRLEN)(send - e)
3737 + 2); /* '}' + NUL */
3738 Copy(hex_string, d, output_length, char);
3743 *d++ = '}'; /* Done. Add the trailing brace */
3746 else { /* Here, not in a pattern. Convert the name to a
3749 /* If destination is not in utf8, unconditionally
3750 * recode it to be so. This is because \N{} implies
3751 * Unicode semantics, and scalars have to be in utf8
3752 * to guarantee those semantics */
3754 SvCUR_set(sv, d - SvPVX_const(sv));
3757 /* See Note on sizing above. */
3758 sv_utf8_upgrade_flags_grow(sv,
3759 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3760 len + (STRLEN)(send - s) + 1);
3761 d = SvPVX(sv) + SvCUR(sv);
3763 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3765 /* See Note on sizing above. (NOTE: SvCUR() is not
3766 * set correctly here). */
3767 const STRLEN off = d - SvPVX_const(sv);
3768 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3770 Copy(str, d, len, char);
3776 } /* End \N{NAME} */
3779 native_range = FALSE; /* \N{} is defined to be Unicode */
3781 s = e + 1; /* Point to just after the '}' */
3784 /* \c is a control character */
3788 *d++ = grok_bslash_c(*s++, has_utf8, 1);
3791 yyerror("Missing control char name in \\c");
3795 /* printf-style backslashes, formfeeds, newlines, etc */
3812 *d++ = ASCII_TO_NATIVE('\033');
3821 } /* end if (backslash) */
3828 /* If we started with encoded form, or already know we want it,
3829 then encode the next character */
3830 if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3834 /* One might think that it is wasted effort in the case of the
3835 * source being utf8 (this_utf8 == TRUE) to take the next character
3836 * in the source, convert it to an unsigned value, and then convert
3837 * it back again. But the source has not been validated here. The
3838 * routine that does the conversion checks for errors like
3841 const UV nextuv = (this_utf8)
3842 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
3844 const STRLEN need = UNISKIP(nextuv);
3846 SvCUR_set(sv, d - SvPVX_const(sv));
3849 /* See Note on sizing above. */
3850 sv_utf8_upgrade_flags_grow(sv,
3851 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3852 need + (STRLEN)(send - s) + 1);
3853 d = SvPVX(sv) + SvCUR(sv);
3855 } else if (need > len) {
3856 /* encoded value larger than old, may need extra space (NOTE:
3857 * SvCUR() is not set correctly here). See Note on sizing
3859 const STRLEN off = d - SvPVX_const(sv);
3860 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3864 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3866 if (uv > 255 && !dorange)
3867 native_range = FALSE;
3873 } /* while loop to process each character */
3875 /* terminate the string and set up the sv */
3877 SvCUR_set(sv, d - SvPVX_const(sv));
3878 if (SvCUR(sv) >= SvLEN(sv))
3879 Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3880 " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3883 if (PL_encoding && !has_utf8) {
3884 sv_recode_to_utf8(sv, PL_encoding);
3890 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3891 PL_sublex_info.sub_op->op_private |=
3892 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3896 /* shrink the sv if we allocated more than we used */
3897 if (SvCUR(sv) + 5 < SvLEN(sv)) {
3898 SvPV_shrink_to_cur(sv);
3901 /* return the substring (via pl_yylval) only if we parsed anything */
3904 for (; s2 < s; s2++) {
3906 COPLINE_INC_WITH_HERELINES;
3908 SvREFCNT_inc_simple_void_NN(sv);
3909 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
3910 && ! PL_parser->lex_re_reparsing)
3912 const char *const key = PL_lex_inpat ? "qr" : "q";
3913 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3917 if (PL_lex_inwhat == OP_TRANS) {
3920 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3923 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3931 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3934 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3936 LEAVE_with_name("scan_const");
3941 * Returns TRUE if there's more to the expression (e.g., a subscript),
3944 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3946 * ->[ and ->{ return TRUE
3947 * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
3948 * { and [ outside a pattern are always subscripts, so return TRUE
3949 * if we're outside a pattern and it's not { or [, then return FALSE
3950 * if we're in a pattern and the first char is a {
3951 * {4,5} (any digits around the comma) returns FALSE
3952 * if we're in a pattern and the first char is a [
3954 * [SOMETHING] has a funky algorithm to decide whether it's a
3955 * character class or not. It has to deal with things like
3956 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3957 * anything else returns TRUE
3960 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3963 S_intuit_more(pTHX_ char *s)
3967 PERL_ARGS_ASSERT_INTUIT_MORE;
3969 if (PL_lex_brackets)
3971 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3973 if (*s == '-' && s[1] == '>'
3974 && FEATURE_POSTDEREF_QQ_IS_ENABLED
3975 && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
3976 ||(s[2] == '@' && strchr("*[{",s[3])) ))
3978 if (*s != '{' && *s != '[')
3983 /* In a pattern, so maybe we have {n,m}. */
3985 if (regcurly(s, FALSE)) {
3991 /* On the other hand, maybe we have a character class */
3994 if (*s == ']' || *s == '^')
3997 /* this is terrifying, and it works */
4000 const char * const send = strchr(s,']');
4001 unsigned char un_char, last_un_char;
4002 char tmpbuf[sizeof PL_tokenbuf * 4];
4004 if (!send) /* has to be an expression */
4006 weight = 2; /* let's weigh the evidence */
4010 else if (isDIGIT(*s)) {
4012 if (isDIGIT(s[1]) && s[2] == ']')
4018 Zero(seen,256,char);
4020 for (; s < send; s++) {
4021 last_un_char = un_char;
4022 un_char = (unsigned char)*s;
4027 weight -= seen[un_char] * 10;
4028 if (isWORDCHAR_lazy_if(s+1,UTF)) {
4030 char *tmp = PL_bufend;
4031 PL_bufend = (char*)send;
4032 scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4034 len = (int)strlen(tmpbuf);
4035 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
4036 UTF ? SVf_UTF8 : 0, SVt_PV))
4041 else if (*s == '$' && s[1] &&
4042 strchr("[#!%*<>()-=",s[1])) {
4043 if (/*{*/ strchr("])} =",s[2]))
4052 if (strchr("wds]",s[1]))
4054 else if (seen[(U8)'\''] || seen[(U8)'"'])
4056 else if (strchr("rnftbxcav",s[1]))
4058 else if (isDIGIT(s[1])) {
4060 while (s[1] && isDIGIT(s[1]))
4070 if (strchr("aA01! ",last_un_char))
4072 if (strchr("zZ79~",s[1]))
4074 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4075 weight -= 5; /* cope with negative subscript */
4078 if (!isWORDCHAR(last_un_char)
4079 && !(last_un_char == '$' || last_un_char == '@'
4080 || last_un_char == '&')
4081 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4086 if (keyword(tmpbuf, d - tmpbuf, 0))
4089 if (un_char == last_un_char + 1)
4091 weight -= seen[un_char];
4096 if (weight >= 0) /* probably a character class */
4106 * Does all the checking to disambiguate
4108 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
4109 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4111 * First argument is the stuff after the first token, e.g. "bar".
4113 * Not a method if foo is a filehandle.
4114 * Not a method if foo is a subroutine prototyped to take a filehandle.
4115 * Not a method if it's really "Foo $bar"
4116 * Method if it's "foo $bar"
4117 * Not a method if it's really "print foo $bar"
4118 * Method if it's really "foo package::" (interpreted as package->foo)
4119 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4120 * Not a method if bar is a filehandle or package, but is quoted with
4125 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
4128 char *s = start + (*start == '$');
4129 char tmpbuf[sizeof PL_tokenbuf];
4136 PERL_ARGS_ASSERT_INTUIT_METHOD;
4138 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4140 if (cv && SvPOK(cv)) {
4141 const char *proto = CvPROTO(cv);
4143 while (*proto && (isSPACE(*proto) || *proto == ';'))
4150 if (*start == '$') {
4151 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
4152 isUPPER(*PL_tokenbuf))
4155 len = start - SvPVX(PL_linestr);
4159 start = SvPVX(PL_linestr) + len;
4163 return *s == '(' ? FUNCMETH : METHOD;
4166 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4167 /* start is the beginning of the possible filehandle/object,
4168 * and s is the end of it
4169 * tmpbuf is a copy of it (but with single quotes as double colons)
4172 if (!keyword(tmpbuf, len, 0)) {
4173 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4177 soff = s - SvPVX(PL_linestr);
4181 indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
4182 if (indirgv && GvCVu(indirgv))
4184 /* filehandle or package name makes it a method */
4185 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4187 soff = s - SvPVX(PL_linestr);
4190 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4191 return 0; /* no assumptions -- "=>" quotes bareword */
4193 start_force(PL_curforce);
4194 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
4195 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4196 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4198 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
4199 ( UTF ? SVf_UTF8 : 0 )));
4204 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
4206 return *s == '(' ? FUNCMETH : METHOD;
4212 /* Encoded script support. filter_add() effectively inserts a
4213 * 'pre-processing' function into the current source input stream.
4214 * Note that the filter function only applies to the current source file
4215 * (e.g., it will not affect files 'require'd or 'use'd by this one).
4217 * The datasv parameter (which may be NULL) can be used to pass
4218 * private data to this instance of the filter. The filter function
4219 * can recover the SV using the FILTER_DATA macro and use it to
4220 * store private buffers and state information.
4222 * The supplied datasv parameter is upgraded to a PVIO type
4223 * and the IoDIRP/IoANY field is used to store the function pointer,
4224 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4225 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4226 * private use must be set using malloc'd pointers.
4230 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4239 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4240 Perl_croak(aTHX_ "Source filters apply only to byte streams");
4242 if (!PL_rsfp_filters)
4243 PL_rsfp_filters = newAV();
4246 SvUPGRADE(datasv, SVt_PVIO);
4247 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4248 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4249 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4250 FPTR2DPTR(void *, IoANY(datasv)),
4251 SvPV_nolen(datasv)));
4252 av_unshift(PL_rsfp_filters, 1);
4253 av_store(PL_rsfp_filters, 0, datasv) ;
4255 !PL_parser->filtered
4256 && PL_parser->lex_flags & LEX_EVALBYTES
4257 && PL_bufptr < PL_bufend
4259 const char *s = PL_bufptr;
4260 while (s < PL_bufend) {
4262 SV *linestr = PL_parser->linestr;
4263 char *buf = SvPVX(linestr);
4264 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4265 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4266 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4267 STRLEN const linestart_pos = PL_parser->linestart - buf;
4268 STRLEN const last_uni_pos =
4269 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4270 STRLEN const last_lop_pos =
4271 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4272 av_push(PL_rsfp_filters, linestr);
4273 PL_parser->linestr =
4274 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4275 buf = SvPVX(PL_parser->linestr);
4276 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4277 PL_parser->bufptr = buf + bufptr_pos;
4278 PL_parser->oldbufptr = buf + oldbufptr_pos;
4279 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4280 PL_parser->linestart = buf + linestart_pos;
4281 if (PL_parser->last_uni)
4282 PL_parser->last_uni = buf + last_uni_pos;
4283 if (PL_parser->last_lop)
4284 PL_parser->last_lop = buf + last_lop_pos;
4285 SvLEN(linestr) = SvCUR(linestr);
4286 SvCUR(linestr) = s-SvPVX(linestr);
4287 PL_parser->filtered = 1;
4297 /* Delete most recently added instance of this filter function. */
4299 Perl_filter_del(pTHX_ filter_t funcp)
4304 PERL_ARGS_ASSERT_FILTER_DEL;
4307 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4308 FPTR2DPTR(void*, funcp)));
4310 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4312 /* if filter is on top of stack (usual case) just pop it off */
4313 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4314 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4315 sv_free(av_pop(PL_rsfp_filters));
4319 /* we need to search for the correct entry and clear it */
4320 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4324 /* Invoke the idxth filter function for the current rsfp. */
4325 /* maxlen 0 = read one text line */
4327 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4332 /* This API is bad. It should have been using unsigned int for maxlen.
4333 Not sure if we want to change the API, but if not we should sanity
4334 check the value here. */
4335 unsigned int correct_length = maxlen < 0 ? PERL_INT_MAX : maxlen;
4337 PERL_ARGS_ASSERT_FILTER_READ;
4339 if (!PL_parser || !PL_rsfp_filters)
4341 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
4342 /* Provide a default input filter to make life easy. */
4343 /* Note that we append to the line. This is handy. */
4344 DEBUG_P(PerlIO_printf(Perl_debug_log,
4345 "filter_read %d: from rsfp\n", idx));
4346 if (correct_length) {
4349 const int old_len = SvCUR(buf_sv);
4351 /* ensure buf_sv is large enough */
4352 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4353 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4354 correct_length)) <= 0) {
4355 if (PerlIO_error(PL_rsfp))
4356 return -1; /* error */
4358 return 0 ; /* end of file */
4360 SvCUR_set(buf_sv, old_len + len) ;
4361 SvPVX(buf_sv)[old_len + len] = '\0';
4364 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4365 if (PerlIO_error(PL_rsfp))
4366 return -1; /* error */
4368 return 0 ; /* end of file */
4371 return SvCUR(buf_sv);
4373 /* Skip this filter slot if filter has been deleted */
4374 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4375 DEBUG_P(PerlIO_printf(Perl_debug_log,
4376 "filter_read %d: skipped (filter deleted)\n",
4378 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4380 if (SvTYPE(datasv) != SVt_PVIO) {
4381 if (correct_length) {
4383 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4384 if (!remainder) return 0; /* eof */
4385 if (correct_length > remainder) correct_length = remainder;
4386 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4387 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4390 const char *s = SvEND(datasv);
4391 const char *send = SvPVX(datasv) + SvLEN(datasv);
4399 if (s == send) return 0; /* eof */
4400 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4401 SvCUR_set(datasv, s-SvPVX(datasv));
4403 return SvCUR(buf_sv);
4405 /* Get function pointer hidden within datasv */
4406 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4407 DEBUG_P(PerlIO_printf(Perl_debug_log,
4408 "filter_read %d: via function %p (%s)\n",
4409 idx, (void*)datasv, SvPV_nolen_const(datasv)));
4410 /* Call function. The function is expected to */
4411 /* call "FILTER_READ(idx+1, buf_sv)" first. */
4412 /* Return: <0:error, =0:eof, >0:not eof */
4413 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4417 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4421 PERL_ARGS_ASSERT_FILTER_GETS;
4423 #ifdef PERL_CR_FILTER
4424 if (!PL_rsfp_filters) {
4425 filter_add(S_cr_textfilter,NULL);
4428 if (PL_rsfp_filters) {
4430 SvCUR_set(sv, 0); /* start with empty line */
4431 if (FILTER_READ(0, sv, 0) > 0)
4432 return ( SvPVX(sv) ) ;
4437 return (sv_gets(sv, PL_rsfp, append));
4441 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4446 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4448 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4452 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4453 (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4455 return GvHV(gv); /* Foo:: */
4458 /* use constant CLASS => 'MyClass' */
4459 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4460 if (gv && GvCV(gv)) {
4461 SV * const sv = cv_const_sv(GvCV(gv));
4463 pkgname = SvPV_const(sv, len);
4466 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4472 * The intent of this yylex wrapper is to minimize the changes to the
4473 * tokener when we aren't interested in collecting madprops. It remains
4474 * to be seen how successful this strategy will be...
4481 char *s = PL_bufptr;
4483 /* make sure PL_thiswhite is initialized */
4487 /* previous token ate up our whitespace? */
4488 if (!PL_lasttoke && PL_nextwhite) {
4489 PL_thiswhite = PL_nextwhite;
4493 /* isolate the token, and figure out where it is without whitespace */
4494 PL_realtokenstart = -1;
4498 assert(PL_curforce < 0);
4500 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
4501 if (!PL_thistoken) {
4502 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4503 PL_thistoken = newSVpvs("");
4505 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4506 PL_thistoken = newSVpvn(tstart, s - tstart);
4509 if (PL_thismad) /* install head */
4510 CURMAD('X', PL_thistoken);
4513 /* last whitespace of a sublex? */
4514 if (optype == ')' && PL_endwhite) {
4515 CURMAD('X', PL_endwhite);
4520 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
4521 if (!PL_thiswhite && !PL_endwhite && !optype) {
4522 sv_free(PL_thistoken);
4527 /* put off final whitespace till peg */
4528 if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
4529 PL_nextwhite = PL_thiswhite;
4532 else if (PL_thisopen) {
4533 CURMAD('q', PL_thisopen);
4535 sv_free(PL_thistoken);
4539 /* Store actual token text as madprop X */
4540 CURMAD('X', PL_thistoken);
4544 /* add preceding whitespace as madprop _ */
4545 CURMAD('_', PL_thiswhite);
4549 /* add quoted material as madprop = */
4550 CURMAD('=', PL_thisstuff);
4554 /* add terminating quote as madprop Q */
4555 CURMAD('Q', PL_thisclose);
4559 /* special processing based on optype */
4563 /* opval doesn't need a TOKEN since it can already store mp */
4573 if (pl_yylval.opval)
4574 append_madprops(PL_thismad, pl_yylval.opval, 0);
4582 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4595 /* remember any fake bracket that lexer is about to discard */
4596 if (PL_lex_brackets == 1 &&
4597 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4600 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4603 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4604 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4607 break; /* don't bother looking for trailing comment */
4616 /* attach a trailing comment to its statement instead of next token */
4620 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4622 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4624 if (*s == '\n' || *s == '#') {
4625 while (s < PL_bufend && *s != '\n')
4629 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4630 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4643 /* Create new token struct. Note: opvals return early above. */
4644 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4651 S_tokenize_use(pTHX_ int is_use, char *s) {
4654 PERL_ARGS_ASSERT_TOKENIZE_USE;
4656 if (PL_expect != XSTATE)
4657 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4658 is_use ? "use" : "no"));
4661 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4662 s = force_version(s, TRUE);
4663 if (*s == ';' || *s == '}'
4664 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4665 start_force(PL_curforce);
4666 NEXTVAL_NEXTTOKE.opval = NULL;
4669 else if (*s == 'v') {
4670 s = force_word(s,WORD,FALSE,TRUE);
4671 s = force_version(s, FALSE);
4675 s = force_word(s,WORD,FALSE,TRUE);
4676 s = force_version(s, FALSE);
4678 pl_yylval.ival = is_use;
4682 static const char* const exp_name[] =
4683 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4684 "ATTRTERM", "TERMBLOCK", "POSTDEREF", "TERMORDORDOR"
4688 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4690 S_word_takes_any_delimeter(char *p, STRLEN len)
4692 return (len == 1 && strchr("msyq", p[0])) ||
4694 (p[0] == 't' && p[1] == 'r') ||
4695 (p[0] == 'q' && strchr("qwxr", p[1]))));
4699 S_check_scalar_slice(pTHX_ char *s)
4702 while (*s == ' ' || *s == '\t') s++;
4703 if (*s == 'q' && s[1] == 'w'
4704 && !isWORDCHAR_lazy_if(s+2,UTF))
4706 while (*s && (isWORDCHAR_lazy_if(s,UTF) || strchr(" \t$#+-'\"", *s)))
4707 s += UTF ? UTF8SKIP(s) : 1;
4708 if (*s == '}' || *s == ']')
4709 pl_yylval.ival = OPpSLICEWARNING;
4715 Works out what to call the token just pulled out of the input
4716 stream. The yacc parser takes care of taking the ops we return and
4717 stitching them into a tree.
4720 The type of the next token
4723 Switch based on the current state:
4724 - if we already built the token before, use it
4725 - if we have a case modifier in a string, deal with that
4726 - handle other cases of interpolation inside a string
4727 - scan the next line if we are inside a format
4728 In the normal state switch on the next character:
4730 if alphabetic, go to key lookup
4731 unrecoginized character - croak
4732 - 0/4/26: handle end-of-line or EOF
4733 - cases for whitespace
4734 - \n and #: handle comments and line numbers
4735 - various operators, brackets and sigils
4738 - 'v': vstrings (or go to key lookup)
4739 - 'x' repetition operator (or go to key lookup)
4740 - other ASCII alphanumerics (key lookup begins here):
4743 scan built-in keyword (but do nothing with it yet)
4744 check for statement label
4745 check for lexical subs
4746 goto just_a_word if there is one
4747 see whether built-in keyword is overridden
4748 switch on keyword number:
4749 - default: just_a_word:
4750 not a built-in keyword; handle bareword lookup
4751 disambiguate between method and sub call
4752 fall back to bareword
4753 - cases for built-in keywords
4761 char *s = PL_bufptr;
4765 const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil);
4769 /* orig_keyword, gvp, and gv are initialized here because
4770 * jump to the label just_a_word_zero can bypass their
4771 * initialization later. */
4772 I32 orig_keyword = 0;
4777 SV* tmp = newSVpvs("");
4778 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4779 (IV)CopLINE(PL_curcop),
4780 lex_state_names[PL_lex_state],
4781 exp_name[PL_expect],
4782 pv_display(tmp, s, strlen(s), 0, 60));
4786 switch (PL_lex_state) {
4788 case LEX_INTERPNORMAL:
4791 /* when we've already built the next token, just pull it out of the queue */
4795 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4797 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4798 PL_nexttoke[PL_lasttoke].next_mad = 0;
4799 if (PL_thismad && PL_thismad->mad_key == '_') {
4800 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4801 PL_thismad->mad_val = 0;
4802 mad_free(PL_thismad);
4807 PL_lex_state = PL_lex_defer;
4808 PL_expect = PL_lex_expect;
4809 PL_lex_defer = LEX_NORMAL;
4810 if (!PL_nexttoke[PL_lasttoke].next_type)
4815 pl_yylval = PL_nextval[PL_nexttoke];
4817 PL_lex_state = PL_lex_defer;
4818 PL_expect = PL_lex_expect;
4819 PL_lex_defer = LEX_NORMAL;
4825 next_type = PL_nexttoke[PL_lasttoke].next_type;
4827 next_type = PL_nexttype[PL_nexttoke];
4829 if (next_type & (7<<24)) {
4830 if (next_type & (1<<24)) {
4831 if (PL_lex_brackets > 100)
4832 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4833 PL_lex_brackstack[PL_lex_brackets++] =
4834 (char) ((next_type >> 16) & 0xff);
4836 if (next_type & (2<<24))
4837 PL_lex_allbrackets++;
4838 if (next_type & (4<<24))
4839 PL_lex_allbrackets--;
4840 next_type &= 0xffff;
4842 return REPORT(next_type == 'p' ? pending_ident() : next_type);
4845 /* interpolated case modifiers like \L \U, including \Q and \E.
4846 when we get here, PL_bufptr is at the \
4848 case LEX_INTERPCASEMOD:
4850 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4852 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4853 PL_bufptr, PL_bufend, *PL_bufptr);
4855 /* handle \E or end of string */
4856 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4858 if (PL_lex_casemods) {
4859 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4860 PL_lex_casestack[PL_lex_casemods] = '\0';
4862 if (PL_bufptr != PL_bufend
4863 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4864 || oldmod == 'F')) {
4866 PL_lex_state = LEX_INTERPCONCAT;
4869 PL_thistoken = newSVpvs("\\E");
4872 PL_lex_allbrackets--;
4875 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4876 /* Got an unpaired \E */
4877 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4878 "Useless use of \\E");
4881 while (PL_bufptr != PL_bufend &&
4882 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4885 PL_thiswhite = newSVpvs("");
4886 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4891 if (PL_bufptr != PL_bufend)
4894 PL_lex_state = LEX_INTERPCONCAT;
4898 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4899 "### Saw case modifier\n"); });
4901 if (s[1] == '\\' && s[2] == 'E') {
4905 PL_thiswhite = newSVpvs("");
4906 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4910 PL_lex_state = LEX_INTERPCONCAT;
4915 if (!PL_madskills) /* when just compiling don't need correct */
4916 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4917 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
4918 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4919 (strchr(PL_lex_casestack, 'L')
4920 || strchr(PL_lex_casestack, 'U')
4921 || strchr(PL_lex_casestack, 'F'))) {
4922 PL_lex_casestack[--PL_lex_casemods] = '\0';
4923 PL_lex_allbrackets--;
4926 if (PL_lex_casemods > 10)
4927 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4928 PL_lex_casestack[PL_lex_casemods++] = *s;
4929 PL_lex_casestack[PL_lex_casemods] = '\0';
4930 PL_lex_state = LEX_INTERPCONCAT;
4931 start_force(PL_curforce);
4932 NEXTVAL_NEXTTOKE.ival = 0;
4933 force_next((2<<24)|'(');
4934 start_force(PL_curforce);
4936 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4938 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4940 NEXTVAL_NEXTTOKE.ival = OP_LC;
4942 NEXTVAL_NEXTTOKE.ival = OP_UC;
4944 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4946 NEXTVAL_NEXTTOKE.ival = OP_FC;
4948 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4950 SV* const tmpsv = newSVpvs("\\ ");
4951 /* replace the space with the character we want to escape
4953 SvPVX(tmpsv)[1] = *s;
4959 if (PL_lex_starts) {
4965 sv_free(PL_thistoken);
4966 PL_thistoken = newSVpvs("");
4969 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4970 if (PL_lex_casemods == 1 && PL_lex_inpat)
4979 case LEX_INTERPPUSH:
4980 return REPORT(sublex_push());
4982 case LEX_INTERPSTART:
4983 if (PL_bufptr == PL_bufend)
4984 return REPORT(sublex_done());
4985 DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4986 "### Interpolated variable\n"); });
4988 /* for /@a/, we leave the joining for the regex engine to do
4989 * (unless we're within \Q etc) */
4990 PL_lex_dojoin = (*PL_bufptr == '@'
4991 && (!PL_lex_inpat || PL_lex_casemods));
4992 PL_lex_state = LEX_INTERPNORMAL;
4993 if (PL_lex_dojoin) {
4994 start_force(PL_curforce);
4995 NEXTVAL_NEXTTOKE.ival = 0;
4997 start_force(PL_curforce);
4998 force_ident("\"", '$');
4999 start_force(PL_curforce);
5000 NEXTVAL_NEXTTOKE.ival = 0;
5002 start_force(PL_curforce);
5003 NEXTVAL_NEXTTOKE.ival = 0;
5004 force_next((2<<24)|'(');
5005 start_force(PL_curforce);
5006 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
5009 /* Convert (?{...}) and friends to 'do {...}' */
5010 if (PL_lex_inpat && *PL_bufptr == '(') {
5011 PL_parser->lex_shared->re_eval_start = PL_bufptr;
5013 if (*PL_bufptr != '{')
5015 start_force(PL_curforce);
5016 /* XXX probably need a CURMAD(something) here */
5017 PL_expect = XTERMBLOCK;
5021 if (PL_lex_starts++) {
5026 sv_free(PL_thistoken);
5027 PL_thistoken = newSVpvs("");
5030 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5031 if (!PL_lex_casemods && PL_lex_inpat)
5038 case LEX_INTERPENDMAYBE:
5039 if (intuit_more(PL_bufptr)) {
5040 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
5046 if (PL_lex_dojoin) {
5047 const U8 dojoin_was = PL_lex_dojoin;
5048 PL_lex_dojoin = FALSE;
5049 PL_lex_state = LEX_INTERPCONCAT;
5053 sv_free(PL_thistoken);
5054 PL_thistoken = newSVpvs("");
5057 PL_lex_allbrackets--;
5058 return REPORT(dojoin_was == 1 ? ')' : POSTJOIN);
5060 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
5061 && SvEVALED(PL_lex_repl))
5063 if (PL_bufptr != PL_bufend)
5064 Perl_croak(aTHX_ "Bad evalled substitution pattern");
5067 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
5068 re_eval_str. If the here-doc body’s length equals the previous
5069 value of re_eval_start, re_eval_start will now be null. So
5070 check re_eval_str as well. */
5071 if (PL_parser->lex_shared->re_eval_start
5072 || PL_parser->lex_shared->re_eval_str) {
5074 if (*PL_bufptr != ')')
5075 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
5077 /* having compiled a (?{..}) expression, return the original
5078 * text too, as a const */
5079 if (PL_parser->lex_shared->re_eval_str) {
5080 sv = PL_parser->lex_shared->re_eval_str;
5081 PL_parser->lex_shared->re_eval_str = NULL;
5083 PL_bufptr - PL_parser->lex_shared->re_eval_start);
5084 SvPV_shrink_to_cur(sv);
5086 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
5087 PL_bufptr - PL_parser->lex_shared->re_eval_start);
5088 start_force(PL_curforce);
5089 /* XXX probably need a CURMAD(something) here */
5090 NEXTVAL_NEXTTOKE.opval =
5091 (OP*)newSVOP(OP_CONST, 0,
5094 PL_parser->lex_shared->re_eval_start = NULL;
5100 case LEX_INTERPCONCAT:
5102 if (PL_lex_brackets)
5103 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
5104 (long) PL_lex_brackets);
5106 if (PL_bufptr == PL_bufend)
5107 return REPORT(sublex_done());
5109 /* m'foo' still needs to be parsed for possible (?{...}) */
5110 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
5111 SV *sv = newSVsv(PL_linestr);
5113 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5117 s = scan_const(PL_bufptr);
5119 PL_lex_state = LEX_INTERPCASEMOD;
5121 PL_lex_state = LEX_INTERPSTART;
5124 if (s != PL_bufptr) {
5125 start_force(PL_curforce);
5127 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
5129 NEXTVAL_NEXTTOKE = pl_yylval;
5132 if (PL_lex_starts++) {
5136 sv_free(PL_thistoken);
5137 PL_thistoken = newSVpvs("");
5140 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5141 if (!PL_lex_casemods && PL_lex_inpat)
5154 s = scan_formline(PL_bufptr);
5155 if (!PL_lex_formbrack)
5164 /* We really do *not* want PL_linestr ever becoming a COW. */
5165 assert (!SvIsCOW(PL_linestr));
5167 PL_oldoldbufptr = PL_oldbufptr;
5169 PL_parser->saw_infix_sigil = 0;
5174 sv_free(PL_thistoken);
5177 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5181 if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
5184 SV *dsv = newSVpvs_flags("", SVs_TEMP);
5185 const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s,
5187 SVs_TEMP | SVf_UTF8),
5188 10, UNI_DISPLAY_ISPRINT)
5189 : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
5190 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
5191 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
5192 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
5196 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %"UTF8f"<-- HERE near column %d", c,
5197 UTF8fARG(UTF, (s - d), d),
5202 goto fake_eof; /* emulate EOF on ^D or ^Z */
5208 if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
5211 if (PL_lex_brackets &&
5212 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
5213 yyerror((const char *)
5215 ? "Format not terminated"
5216 : "Missing right curly or square bracket"));
5218 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5219 "### Tokener got EOF\n");
5223 if (s++ < PL_bufend)
5224 goto retry; /* ignore stray nulls */
5227 if (!PL_in_eval && !PL_preambled) {
5228 PL_preambled = TRUE;
5234 /* Generate a string of Perl code to load the debugger.
5235 * If PERL5DB is set, it will return the contents of that,
5236 * otherwise a compile-time require of perl5db.pl. */
5238 const char * const pdb = PerlEnv_getenv("PERL5DB");
5241 sv_setpv(PL_linestr, pdb);
5242 sv_catpvs(PL_linestr,";");
5244 SETERRNO(0,SS_NORMAL);
5245 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
5247 PL_parser->preambling = CopLINE(PL_curcop);
5249 sv_setpvs(PL_linestr,"");
5250 if (PL_preambleav) {
5251 SV **svp = AvARRAY(PL_preambleav);
5252 SV **const end = svp + AvFILLp(PL_preambleav);
5254 sv_catsv(PL_linestr, *svp);
5256 sv_catpvs(PL_linestr, ";");
5258 sv_free(MUTABLE_SV(PL_preambleav));
5259 PL_preambleav = NULL;
5262 sv_catpvs(PL_linestr,
5263 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
5264 if (PL_minus_n || PL_minus_p) {
5265 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
5267 sv_catpvs(PL_linestr,"chomp;");
5270 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
5271 || *PL_splitstr == '"')
5272 && strchr(PL_splitstr + 1, *PL_splitstr))
5273 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
5275 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
5276 bytes can be used as quoting characters. :-) */
5277 const char *splits = PL_splitstr;
5278 sv_catpvs(PL_linestr, "our @F=split(q\0");
5281 if (*splits == '\\')
5282 sv_catpvn(PL_linestr, splits, 1);
5283 sv_catpvn(PL_linestr, splits, 1);
5284 } while (*splits++);
5285 /* This loop will embed the trailing NUL of
5286 PL_linestr as the last thing it does before
5288 sv_catpvs(PL_linestr, ");");
5292 sv_catpvs(PL_linestr,"our @F=split(' ');");
5295 sv_catpvs(PL_linestr, "\n");
5296 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5297 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5298 PL_last_lop = PL_last_uni = NULL;
5299 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5300 update_debugger_info(PL_linestr, NULL, 0);
5305 bof = PL_rsfp ? TRUE : FALSE;
5308 fake_eof = LEX_FAKE_EOF;
5310 PL_bufptr = PL_bufend;
5311 COPLINE_INC_WITH_HERELINES;
5312 if (!lex_next_chunk(fake_eof)) {
5313 CopLINE_dec(PL_curcop);
5315 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
5317 CopLINE_dec(PL_curcop);
5320 PL_realtokenstart = -1;
5323 /* If it looks like the start of a BOM or raw UTF-16,
5324 * check if it in fact is. */
5325 if (bof && PL_rsfp &&
5327 *(U8*)s == BOM_UTF8_FIRST_BYTE ||
5330 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
5331 bof = (offset == (Off_t)SvCUR(PL_linestr));
5332 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5333 /* offset may include swallowed CR */
5335 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
5338 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5339 s = swallow_bom((U8*)s);
5342 if (PL_parser->in_pod) {
5343 /* Incest with pod. */
5346 sv_catsv(PL_thiswhite, PL_linestr);
5348 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
5349 sv_setpvs(PL_linestr, "");
5350 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5351 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5352 PL_last_lop = PL_last_uni = NULL;
5353 PL_parser->in_pod = 0;
5356 if (PL_rsfp || PL_parser->filtered)
5358 } while (PL_parser->in_pod);
5359 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5360 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5361 PL_last_lop = PL_last_uni = NULL;
5362 if (CopLINE(PL_curcop) == 1) {
5363 while (s < PL_bufend && isSPACE(*s))
5365 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5369 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5373 if (*s == '#' && *(s+1) == '!')
5375 #ifdef ALTERNATE_SHEBANG
5377 static char const as[] = ALTERNATE_SHEBANG;
5378 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5379 d = s + (sizeof(as) - 1);
5381 #endif /* ALTERNATE_SHEBANG */
5390 while (*d && !isSPACE(*d))
5394 #ifdef ARG_ZERO_IS_SCRIPT
5395 if (ipathend > ipath) {
5397 * HP-UX (at least) sets argv[0] to the script name,
5398 * which makes $^X incorrect. And Digital UNIX and Linux,
5399 * at least, set argv[0] to the basename of the Perl
5400 * interpreter. So, having found "#!", we'll set it right.
5402 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5404 assert(SvPOK(x) || SvGMAGICAL(x));
5405 if (sv_eq(x, CopFILESV(PL_curcop))) {
5406 sv_setpvn(x, ipath, ipathend - ipath);
5412 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
5413 const char * const lstart = SvPV_const(x,llen);
5415 bstart += blen - llen;
5416 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5417 sv_setpvn(x, ipath, ipathend - ipath);
5422 TAINT_NOT; /* $^X is always tainted, but that's OK */
5424 #endif /* ARG_ZERO_IS_SCRIPT */
5429 d = instr(s,"perl -");
5431 d = instr(s,"perl");
5433 /* avoid getting into infinite loops when shebang
5434 * line contains "Perl" rather than "perl" */
5436 for (d = ipathend-4; d >= ipath; --d) {
5437 if ((*d == 'p' || *d == 'P')
5438 && !ibcmp(d, "perl", 4))
5448 #ifdef ALTERNATE_SHEBANG
5450 * If the ALTERNATE_SHEBANG on this system starts with a
5451 * character that can be part of a Perl expression, then if
5452 * we see it but not "perl", we're probably looking at the
5453 * start of Perl code, not a request to hand off to some
5454 * other interpreter. Similarly, if "perl" is there, but
5455 * not in the first 'word' of the line, we assume the line
5456 * contains the start of the Perl program.
5458 if (d && *s != '#') {
5459 const char *c = ipath;
5460 while (*c && !strchr("; \t\r\n\f\v#", *c))
5463 d = NULL; /* "perl" not in first word; ignore */
5465 *s = '#'; /* Don't try to parse shebang line */
5467 #endif /* ALTERNATE_SHEBANG */
5472 !instr(s,"indir") &&
5473 instr(PL_origargv[0],"perl"))
5480 while (s < PL_bufend && isSPACE(*s))
5482 if (s < PL_bufend) {
5483 Newx(newargv,PL_origargc+3,char*);
5485 while (s < PL_bufend && !isSPACE(*s))
5488 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5491 newargv = PL_origargv;
5494 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5496 Perl_croak(aTHX_ "Can't exec %s", ipath);
5499 while (*d && !isSPACE(*d))
5501 while (SPACE_OR_TAB(*d))
5505 const bool switches_done = PL_doswitches;
5506 const U32 oldpdb = PL_perldb;
5507 const bool oldn = PL_minus_n;
5508 const bool oldp = PL_minus_p;
5512 bool baduni = FALSE;
5514 const char *d2 = d1 + 1;
5515 if (parse_unicode_opts((const char **)&d2)
5519 if (baduni || *d1 == 'M' || *d1 == 'm') {
5520 const char * const m = d1;
5521 while (*d1 && !isSPACE(*d1))
5523 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5526 d1 = moreswitches(d1);
5528 if (PL_doswitches && !switches_done) {
5529 int argc = PL_origargc;
5530 char **argv = PL_origargv;
5533 } while (argc && argv[0][0] == '-' && argv[0][1]);
5534 init_argv_symbols(argc,argv);
5536 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5537 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5538 /* if we have already added "LINE: while (<>) {",
5539 we must not do it again */
5541 sv_setpvs(PL_linestr, "");
5542 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5543 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5544 PL_last_lop = PL_last_uni = NULL;
5545 PL_preambled = FALSE;
5546 if (PERLDB_LINE || PERLDB_SAVESRC)
5547 (void)gv_fetchfile(PL_origfilename);
5554 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5555 PL_lex_state = LEX_FORMLINE;
5556 start_force(PL_curforce);
5557 NEXTVAL_NEXTTOKE.ival = 0;
5558 force_next(FORMRBRACK);
5563 #ifdef PERL_STRICT_CR
5564 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5566 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5568 case ' ': case '\t': case '\f': case 013:
5570 PL_realtokenstart = -1;
5573 PL_thiswhite = newSVpvs("");
5574 sv_catpvn(PL_thiswhite, s, 1);
5582 PL_realtokenstart = -1;
5586 if (PL_lex_state != LEX_NORMAL ||
5587 (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5588 if (*s == '#' && s == PL_linestart && PL_in_eval
5589 && !PL_rsfp && !PL_parser->filtered) {
5590 /* handle eval qq[#line 1 "foo"\n ...] */
5591 CopLINE_dec(PL_curcop);
5594 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5596 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5600 const bool in_comment = *s == '#';
5602 while (d < PL_bufend && *d != '\n')
5606 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5607 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5611 PL_thiswhite = newSVpvn(s, d - s);
5614 if (in_comment && d == PL_bufend
5615 && PL_lex_state == LEX_INTERPNORMAL
5616 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5617 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5620 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5621 PL_lex_state = LEX_FORMLINE;
5622 start_force(PL_curforce);
5623 NEXTVAL_NEXTTOKE.ival = 0;
5624 force_next(FORMRBRACK);
5630 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5631 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5634 TOKEN(PEG); /* make sure any #! line is accessible */
5640 if (PL_madskills) d = s;
5641 while (s < PL_bufend && *s != '\n')
5649 else if (s > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5650 Perl_croak(aTHX_ "panic: input overflow");
5652 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5654 PL_thiswhite = newSVpvs("");
5655 if (CopLINE(PL_curcop) == 1) {
5656 sv_setpvs(PL_thiswhite, "");
5659 sv_catpvn(PL_thiswhite, d, s - d);
5666 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5674 while (s < PL_bufend && SPACE_OR_TAB(*s))
5677 if (strnEQ(s,"=>",2)) {
5678 s = force_word(PL_bufptr,WORD,FALSE,FALSE);
5679 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5680 OPERATOR('-'); /* unary minus */
5683 case 'r': ftst = OP_FTEREAD; break;
5684 case 'w': ftst = OP_FTEWRITE; break;
5685 case 'x': ftst = OP_FTEEXEC; break;
5686 case 'o': ftst = OP_FTEOWNED; break;
5687 case 'R': ftst = OP_FTRREAD; break;
5688 case 'W': ftst = OP_FTRWRITE; break;
5689 case 'X': ftst = OP_FTREXEC; break;
5690 case 'O': ftst = OP_FTROWNED; break;
5691 case 'e': ftst = OP_FTIS; break;
5692 case 'z': ftst = OP_FTZERO; break;
5693 case 's': ftst = OP_FTSIZE; break;
5694 case 'f': ftst = OP_FTFILE; break;
5695 case 'd': ftst = OP_FTDIR; break;
5696 case 'l': ftst = OP_FTLINK; break;
5697 case 'p': ftst = OP_FTPIPE; break;
5698 case 'S': ftst = OP_FTSOCK; break;
5699 case 'u': ftst = OP_FTSUID; break;
5700 case 'g': ftst = OP_FTSGID; break;
5701 case 'k': ftst = OP_FTSVTX; break;
5702 case 'b': ftst = OP_FTBLK; break;
5703 case 'c': ftst = OP_FTCHR; break;
5704 case 't': ftst = OP_FTTTY; break;
5705 case 'T': ftst = OP_FTTEXT; break;
5706 case 'B': ftst = OP_FTBINARY; break;
5707 case 'M': case 'A': case 'C':
5708 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5710 case 'M': ftst = OP_FTMTIME; break;
5711 case 'A': ftst = OP_FTATIME; break;
5712 case 'C': ftst = OP_FTCTIME; break;
5720 PL_last_uni = PL_oldbufptr;
5721 PL_last_lop_op = (OPCODE)ftst;
5722 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5723 "### Saw file test %c\n", (int)tmp);
5728 /* Assume it was a minus followed by a one-letter named
5729 * subroutine call (or a -bareword), then. */
5730 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5731 "### '-%c' looked like a file test but was not\n",
5738 const char tmp = *s++;
5741 if (PL_expect == XOPERATOR)
5746 else if (*s == '>') {
5749 if (FEATURE_POSTDEREF_IS_ENABLED && (
5750 ((*s == '$' || *s == '&') && s[1] == '*')
5751 ||(*s == '$' && s[1] == '#' && s[2] == '*')
5752 ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
5753 ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5756 Perl_ck_warner_d(aTHX_
5757 packWARN(WARN_EXPERIMENTAL__POSTDEREF),
5758 "Postfix dereference is experimental"
5760 PL_expect = XPOSTDEREF;
5763 if (isIDFIRST_lazy_if(s,UTF)) {
5764 s = force_word(s,METHOD,FALSE,TRUE);
5772 if (PL_expect == XOPERATOR) {
5773 if (*s == '=' && !PL_lex_allbrackets &&
5774 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5781 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5783 OPERATOR('-'); /* unary minus */
5789 const char tmp = *s++;
5792 if (PL_expect == XOPERATOR)
5797 if (PL_expect == XOPERATOR) {
5798 if (*s == '=' && !PL_lex_allbrackets &&
5799 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5806 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5813 if (PL_expect == XPOSTDEREF) POSTDEREF('*');
5814 if (PL_expect != XOPERATOR) {
5815 s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5816 PL_expect = XOPERATOR;
5817 force_ident(PL_tokenbuf, '*');
5825 if (*s == '=' && !PL_lex_allbrackets &&
5826 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5832 if (*s == '=' && !PL_lex_allbrackets &&
5833 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5837 PL_parser->saw_infix_sigil = 1;
5842 if (PL_expect == XOPERATOR) {
5843 if (s[1] == '=' && !PL_lex_allbrackets &&
5844 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5847 PL_parser->saw_infix_sigil = 1;
5850 else if (PL_expect == XPOSTDEREF) POSTDEREF('%');
5851 PL_tokenbuf[0] = '%';
5852 s = scan_ident(s, PL_tokenbuf + 1,
5853 sizeof PL_tokenbuf - 1, FALSE);
5855 if (!PL_tokenbuf[1]) {
5858 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5860 PL_tokenbuf[0] = '@';
5862 PL_expect = XOPERATOR;
5863 force_ident_maybe_lex('%');
5867 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5868 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5873 if (PL_lex_brackets > 100)
5874 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5875 PL_lex_brackstack[PL_lex_brackets++] = 0;
5876 PL_lex_allbrackets++;
5878 const char tmp = *s++;
5883 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5885 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5888 Perl_ck_warner_d(aTHX_
5889 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5890 "Smartmatch is experimental");
5896 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5903 goto just_a_word_zero_gv;
5906 switch (PL_expect) {
5912 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5914 PL_bufptr = s; /* update in case we back off */
5917 "Use of := for an empty attribute list is not allowed");
5924 PL_expect = XTERMBLOCK;
5927 stuffstart = s - SvPVX(PL_linestr) - 1;
5931 while (isIDFIRST_lazy_if(s,UTF)) {
5934 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5935 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5936 if (tmp < 0) tmp = -tmp;
5951 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5953 d = scan_str(d,TRUE,TRUE,FALSE,FALSE,NULL);
5954 COPLINE_SET_FROM_MULTI_END;
5956 /* MUST advance bufptr here to avoid bogus
5957 "at end of line" context messages from yyerror().
5959 PL_bufptr = s + len;
5960 yyerror("Unterminated attribute parameter in attribute list");
5964 return REPORT(0); /* EOF indicator */
5968 sv_catsv(sv, PL_lex_stuff);
5969 attrs = op_append_elem(OP_LIST, attrs,
5970 newSVOP(OP_CONST, 0, sv));
5971 SvREFCNT_dec(PL_lex_stuff);
5972 PL_lex_stuff = NULL;
5975 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5977 if (PL_in_my == KEY_our) {
5978 deprecate(":unique");
5981 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5984 /* NOTE: any CV attrs applied here need to be part of
5985 the CVf_BUILTIN_ATTRS define in cv.h! */
5986 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5988 CvLVALUE_on(PL_compcv);
5990 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5992 deprecate(":locked");
5994 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5996 CvMETHOD_on(PL_compcv);
5998 /* After we've set the flags, it could be argued that
5999 we don't need to do the attributes.pm-based setting
6000 process, and shouldn't bother appending recognized
6001 flags. To experiment with that, uncomment the
6002 following "else". (Note that's already been
6003 uncommented. That keeps the above-applied built-in
6004 attributes from being intercepted (and possibly
6005 rejected) by a package's attribute routines, but is
6006 justified by the performance win for the common case
6007 of applying only built-in attributes.) */
6009 attrs = op_append_elem(OP_LIST, attrs,
6010 newSVOP(OP_CONST, 0,
6014 if (*s == ':' && s[1] != ':')
6017 break; /* require real whitespace or :'s */
6018 /* XXX losing whitespace on sequential attributes here */
6022 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
6023 if (*s != ';' && *s != '}' && *s != tmp
6024 && (tmp != '=' || *s != ')')) {
6025 const char q = ((*s == '\'') ? '"' : '\'');
6026 /* If here for an expression, and parsed no attrs, back
6028 if (tmp == '=' && !attrs) {
6032 /* MUST advance bufptr here to avoid bogus "at end of line"
6033 context messages from yyerror().
6036 yyerror( (const char *)
6038 ? Perl_form(aTHX_ "Invalid separator character "
6039 "%c%c%c in attribute list", q, *s, q)
6040 : "Unterminated attribute list" ) );
6048 start_force(PL_curforce);
6049 NEXTVAL_NEXTTOKE.opval = attrs;
6050 CURMAD('_', PL_nextwhite);
6055 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
6056 (s - SvPVX(PL_linestr)) - stuffstart);
6061 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
6065 PL_lex_allbrackets--;
6069 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6070 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
6074 PL_lex_allbrackets++;
6077 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6083 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6086 PL_lex_allbrackets--;
6092 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6095 if (PL_lex_brackets <= 0)
6096 /* diag_listed_as: Unmatched right %s bracket */
6097 yyerror("Unmatched right square bracket");
6100 PL_lex_allbrackets--;
6101 if (PL_lex_state == LEX_INTERPNORMAL) {
6102 if (PL_lex_brackets == 0) {
6103 if (*s == '-' && s[1] == '>')
6104 PL_lex_state = LEX_INTERPENDMAYBE;
6105 else if (*s != '[' && *s != '{')
6106 PL_lex_state = LEX_INTERPEND;
6113 if (PL_lex_brackets > 100) {
6114 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6116 switch (PL_expect) {
6118 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6119 PL_lex_allbrackets++;
6120 OPERATOR(HASHBRACK);
6122 while (s < PL_bufend && SPACE_OR_TAB(*s))
6125 PL_tokenbuf[0] = '\0';
6126 if (d < PL_bufend && *d == '-') {
6127 PL_tokenbuf[0] = '-';
6129 while (d < PL_bufend && SPACE_OR_TAB(*d))
6132 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
6133 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6135 while (d < PL_bufend && SPACE_OR_TAB(*d))
6138 const char minus = (PL_tokenbuf[0] == '-');
6139 s = force_word(s + minus, WORD, FALSE, TRUE);
6147 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6148 PL_lex_allbrackets++;
6153 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6154 PL_lex_allbrackets++;
6159 if (PL_oldoldbufptr == PL_last_lop)
6160 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6162 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6163 PL_lex_allbrackets++;
6166 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6168 /* This hack is to get the ${} in the message. */
6170 yyerror("syntax error");
6173 OPERATOR(HASHBRACK);
6175 /* This hack serves to disambiguate a pair of curlies
6176 * as being a block or an anon hash. Normally, expectation
6177 * determines that, but in cases where we're not in a
6178 * position to expect anything in particular (like inside
6179 * eval"") we have to resolve the ambiguity. This code
6180 * covers the case where the first term in the curlies is a
6181 * quoted string. Most other cases need to be explicitly
6182 * disambiguated by prepending a "+" before the opening
6183 * curly in order to force resolution as an anon hash.
6185 * XXX should probably propagate the outer expectation
6186 * into eval"" to rely less on this hack, but that could
6187 * potentially break current behavior of eval"".
6191 if (*s == '\'' || *s == '"' || *s == '`') {
6192 /* common case: get past first string, handling escapes */
6193 for (t++; t < PL_bufend && *t != *s;)
6194 if (*t++ == '\\' && (*t == '\\' || *t == *s))
6198 else if (*s == 'q') {
6201 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6202 && !isWORDCHAR(*t))))
6204 /* skip q//-like construct */
6206 char open, close, term;
6209 while (t < PL_bufend && isSPACE(*t))
6211 /* check for q => */
6212 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6213 OPERATOR(HASHBRACK);
6217 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
6221 for (t++; t < PL_bufend; t++) {
6222 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6224 else if (*t == open)
6228 for (t++; t < PL_bufend; t++) {
6229 if (*t == '\\' && t+1 < PL_bufend)
6231 else if (*t == close && --brackets <= 0)
6233 else if (*t == open)
6240 /* skip plain q word */
6241 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
6244 else if (isWORDCHAR_lazy_if(t,UTF)) {
6246 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
6249 while (t < PL_bufend && isSPACE(*t))
6251 /* if comma follows first term, call it an anon hash */
6252 /* XXX it could be a comma expression with loop modifiers */
6253 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6254 || (*t == '=' && t[1] == '>')))
6255 OPERATOR(HASHBRACK);
6256 if (PL_expect == XREF)
6259 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6265 pl_yylval.ival = CopLINE(PL_curcop);
6266 if (isSPACE(*s) || *s == '#')
6267 PL_copline = NOLINE; /* invalidate current command line number */
6268 TOKEN(formbrack ? '=' : '{');
6270 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6274 if (PL_lex_brackets <= 0)
6275 /* diag_listed_as: Unmatched right %s bracket */
6276 yyerror("Unmatched right curly bracket");
6278 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6279 PL_lex_allbrackets--;
6280 if (PL_lex_state == LEX_INTERPNORMAL) {
6281 if (PL_lex_brackets == 0) {
6282 if (PL_expect & XFAKEBRACK) {
6283 PL_expect &= XENUMMASK;
6284 PL_lex_state = LEX_INTERPEND;
6289 PL_thiswhite = newSVpvs("");
6290 sv_catpvs(PL_thiswhite,"}");
6293 return yylex(); /* ignore fake brackets */
6295 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6296 && SvEVALED(PL_lex_repl))
6297 PL_lex_state = LEX_INTERPEND;
6298 else if (*s == '-' && s[1] == '>')
6299 PL_lex_state = LEX_INTERPENDMAYBE;
6300 else if (*s != '[' && *s != '{')
6301 PL_lex_state = LEX_INTERPEND;
6304 if (PL_expect & XFAKEBRACK) {
6305 PL_expect &= XENUMMASK;
6307 return yylex(); /* ignore fake brackets */
6309 start_force(PL_curforce);
6311 curmad('X', newSVpvn(s-1,1));
6312 CURMAD('_', PL_thiswhite);
6314 force_next(formbrack ? '.' : '}');
6315 if (formbrack) LEAVE;
6317 if (PL_madskills && !PL_thistoken)
6318 PL_thistoken = newSVpvs("");
6320 if (formbrack == 2) { /* means . where arguments were expected */
6321 start_force(PL_curforce);
6327 if (PL_expect == XPOSTDEREF) POSTDEREF('&');
6330 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6331 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6338 if (PL_expect == XOPERATOR) {
6339 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
6340 && isIDFIRST_lazy_if(s,UTF))
6342 CopLINE_dec(PL_curcop);
6343 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6344 CopLINE_inc(PL_curcop);
6346 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6347 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6351 PL_parser->saw_infix_sigil = 1;
6355 PL_tokenbuf[0] = '&';
6356 s = scan_ident(s - 1, PL_tokenbuf + 1,
6357 sizeof PL_tokenbuf - 1, TRUE);
6358 if (PL_tokenbuf[1]) {
6359 PL_expect = XOPERATOR;
6360 force_ident_maybe_lex('&');
6364 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6370 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6371 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6378 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6379 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6387 const char tmp = *s++;
6389 if (!PL_lex_allbrackets &&
6390 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6397 if (!PL_lex_allbrackets &&
6398 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
6406 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6407 && strchr("+-*/%.^&|<",tmp))
6408 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6409 "Reversed %c= operator",(int)tmp);
6411 if (PL_expect == XSTATE && isALPHA(tmp) &&
6412 (s == PL_linestart+1 || s[-2] == '\n') )
6414 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6415 || PL_lex_state != LEX_NORMAL) {
6420 if (strnEQ(s,"=cut",4)) {
6436 PL_thiswhite = newSVpvs("");
6437 sv_catpvn(PL_thiswhite, PL_linestart,
6438 PL_bufend - PL_linestart);
6442 PL_parser->in_pod = 1;
6446 if (PL_expect == XBLOCK) {
6448 #ifdef PERL_STRICT_CR
6449 while (SPACE_OR_TAB(*t))
6451 while (SPACE_OR_TAB(*t) || *t == '\r')
6454 if (*t == '\n' || *t == '#') {
6457 SAVEI8(PL_parser->form_lex_state);
6458 SAVEI32(PL_lex_formbrack);
6459 PL_parser->form_lex_state = PL_lex_state;
6460 PL_lex_formbrack = PL_lex_brackets + 1;
6464 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6473 const char tmp = *s++;
6475 /* was this !=~ where !~ was meant?
6476 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6478 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6479 const char *t = s+1;
6481 while (t < PL_bufend && isSPACE(*t))
6484 if (*t == '/' || *t == '?' ||
6485 ((*t == 'm' || *t == 's' || *t == 'y')
6486 && !isWORDCHAR(t[1])) ||
6487 (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6488 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6489 "!=~ should be !~");
6491 if (!PL_lex_allbrackets &&
6492 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6504 if (PL_expect != XOPERATOR) {
6505 if (s[1] != '<' && !strchr(s,'>'))
6508 s = scan_heredoc(s);
6510 s = scan_inputsymbol(s);
6511 PL_expect = XOPERATOR;
6512 TOKEN(sublex_start());
6518 if (*s == '=' && !PL_lex_allbrackets &&
6519 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6523 SHop(OP_LEFT_SHIFT);
6528 if (!PL_lex_allbrackets &&
6529 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6536 if (!PL_lex_allbrackets &&
6537 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6545 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6553 const char tmp = *s++;
6555 if (*s == '=' && !PL_lex_allbrackets &&
6556 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6560 SHop(OP_RIGHT_SHIFT);
6562 else if (tmp == '=') {
6563 if (!PL_lex_allbrackets &&
6564 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6572 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6581 if (PL_expect == XOPERATOR) {
6582 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6583 return deprecate_commaless_var_list();
6586 else if (PL_expect == XPOSTDEREF) {
6589 POSTDEREF(DOLSHARP);
6594 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6595 PL_tokenbuf[0] = '@';
6596 s = scan_ident(s + 1, PL_tokenbuf + 1,
6597 sizeof PL_tokenbuf - 1, FALSE);
6598 if (PL_expect == XOPERATOR)
6599 no_op("Array length", s);
6600 if (!PL_tokenbuf[1])
6602 PL_expect = XOPERATOR;
6603 force_ident_maybe_lex('#');
6607 PL_tokenbuf[0] = '$';
6608 s = scan_ident(s, PL_tokenbuf + 1,
6609 sizeof PL_tokenbuf - 1, FALSE);
6610 if (PL_expect == XOPERATOR)
6612 if (!PL_tokenbuf[1]) {
6614 yyerror("Final $ should be \\$ or $name");
6620 const char tmp = *s;
6621 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6624 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6625 && intuit_more(s)) {
6627 PL_tokenbuf[0] = '@';
6628 if (ckWARN(WARN_SYNTAX)) {
6631 while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
6634 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6635 while (t < PL_bufend && *t != ']')
6637 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6638 "Multidimensional syntax %.*s not supported",
6639 (int)((t - PL_bufptr) + 1), PL_bufptr);
6643 else if (*s == '{') {
6645 PL_tokenbuf[0] = '%';
6646 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6647 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6649 char tmpbuf[sizeof PL_tokenbuf];
6652 } while (isSPACE(*t));
6653 if (isIDFIRST_lazy_if(t,UTF)) {
6655 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6660 && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6661 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6662 "You need to quote \"%"UTF8f"\"",
6663 UTF8fARG(UTF, len, tmpbuf));
6669 PL_expect = XOPERATOR;
6670 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6671 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6672 if (!islop || PL_last_lop_op == OP_GREPSTART)
6673 PL_expect = XOPERATOR;
6674 else if (strchr("$@\"'`q", *s))
6675 PL_expect = XTERM; /* e.g. print $fh "foo" */
6676 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6677 PL_expect = XTERM; /* e.g. print $fh &sub */
6678 else if (isIDFIRST_lazy_if(s,UTF)) {
6679 char tmpbuf[sizeof PL_tokenbuf];
6681 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6682 if ((t2 = keyword(tmpbuf, len, 0))) {
6683 /* binary operators exclude handle interpretations */
6695 PL_expect = XTERM; /* e.g. print $fh length() */
6700 PL_expect = XTERM; /* e.g. print $fh subr() */
6703 else if (isDIGIT(*s))
6704 PL_expect = XTERM; /* e.g. print $fh 3 */
6705 else if (*s == '.' && isDIGIT(s[1]))
6706 PL_expect = XTERM; /* e.g. print $fh .3 */
6707 else if ((*s == '?' || *s == '-' || *s == '+')
6708 && !isSPACE(s[1]) && s[1] != '=')
6709 PL_expect = XTERM; /* e.g. print $fh -1 */
6710 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6712 PL_expect = XTERM; /* e.g. print $fh /.../
6713 XXX except DORDOR operator
6715 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6717 PL_expect = XTERM; /* print $fh <<"EOF" */
6720 force_ident_maybe_lex('$');
6724 if (PL_expect == XOPERATOR)
6726 else if (PL_expect == XPOSTDEREF) POSTDEREF('@');
6727 PL_tokenbuf[0] = '@';
6728 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6730 if (!PL_tokenbuf[1]) {
6733 if (PL_lex_state == LEX_NORMAL)
6735 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6737 PL_tokenbuf[0] = '%';
6739 /* Warn about @ where they meant $. */
6740 if (*s == '[' || *s == '{') {
6741 if (ckWARN(WARN_SYNTAX)) {
6742 S_check_scalar_slice(aTHX_ s);
6746 PL_expect = XOPERATOR;
6747 force_ident_maybe_lex('@');
6750 case '/': /* may be division, defined-or, or pattern */
6751 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6752 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6753 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6758 case '?': /* may either be conditional or pattern */
6759 if (PL_expect == XOPERATOR) {
6762 if (!PL_lex_allbrackets &&
6763 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6767 PL_lex_allbrackets++;
6773 /* A // operator. */
6774 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6775 (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6776 LEX_FAKEEOF_LOGIC)) {
6784 if (*s == '=' && !PL_lex_allbrackets &&
6785 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6794 /* Disable warning on "study /blah/" */
6795 if (PL_oldoldbufptr == PL_last_uni
6796 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6797 || memNE(PL_last_uni, "study", 5)
6798 || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
6802 deprecate("?PATTERN? without explicit operator");
6803 s = scan_pat(s,OP_MATCH);
6804 TERM(sublex_start());
6808 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6809 #ifdef PERL_STRICT_CR
6812 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6814 && (s == PL_linestart || s[-1] == '\n') )
6817 formbrack = 2; /* dot seen where arguments expected */
6820 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6824 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6827 if (!PL_lex_allbrackets &&
6828 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6835 pl_yylval.ival = OPf_SPECIAL;
6841 if (*s == '=' && !PL_lex_allbrackets &&
6842 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6849 case '0': case '1': case '2': case '3': case '4':
6850 case '5': case '6': case '7': case '8': case '9':
6851 s = scan_num(s, &pl_yylval);
6852 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6853 if (PL_expect == XOPERATOR)
6858 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
6859 COPLINE_SET_FROM_MULTI_END;
6860 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6861 if (PL_expect == XOPERATOR) {
6862 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6863 return deprecate_commaless_var_list();
6870 pl_yylval.ival = OP_CONST;
6871 TERM(sublex_start());
6874 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
6877 printbuf("### Saw string before %s\n", s);
6879 PerlIO_printf(Perl_debug_log,
6880 "### Saw unterminated string\n");
6882 if (PL_expect == XOPERATOR) {
6883 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6884 return deprecate_commaless_var_list();
6891 pl_yylval.ival = OP_CONST;
6892 /* FIXME. I think that this can be const if char *d is replaced by
6893 more localised variables. */
6894 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6895 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6896 pl_yylval.ival = OP_STRINGIFY;
6900 if (pl_yylval.ival == OP_CONST)
6901 COPLINE_SET_FROM_MULTI_END;
6902 TERM(sublex_start());
6905 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
6906 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6907 if (PL_expect == XOPERATOR)
6908 no_op("Backticks",s);
6911 pl_yylval.ival = OP_BACKTICK;
6912 TERM(sublex_start());
6916 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6918 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6920 if (PL_expect == XOPERATOR)
6921 no_op("Backslash",s);
6925 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6926 char *start = s + 2;
6927 while (isDIGIT(*start) || *start == '_')
6929 if (*start == '.' && isDIGIT(start[1])) {
6930 s = scan_num(s, &pl_yylval);
6933 else if ((*start == ':' && start[1] == ':')
6934 || (PL_expect == XSTATE && *start == ':'))
6936 else if (PL_expect == XSTATE) {
6938 while (d < PL_bufend && isSPACE(*d)) d++;
6939 if (*d == ':') goto keylookup;
6941 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6942 if (!isALPHA(*start) && (PL_expect == XTERM
6943 || PL_expect == XREF || PL_expect == XSTATE
6944 || PL_expect == XTERMORDORDOR)) {
6945 GV *const gv = gv_fetchpvn_flags(s, start - s,
6946 UTF ? SVf_UTF8 : 0, SVt_PVCV);
6948 s = scan_num(s, &pl_yylval);
6955 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
7008 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7010 /* Some keywords can be followed by any delimiter, including ':' */
7011 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
7013 /* x::* is just a word, unless x is "CORE" */
7014 if (!anydelim && *s == ':' && s[1] == ':') {
7015 if (strEQ(PL_tokenbuf, "CORE")) goto case_KEY_CORE;
7020 while (d < PL_bufend && isSPACE(*d))
7021 d++; /* no comments skipped here, or s### is misparsed */
7023 /* Is this a word before a => operator? */
7024 if (*d == '=' && d[1] == '>') {
7028 = (OP*)newSVOP(OP_CONST, 0,
7029 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7030 pl_yylval.opval->op_private = OPpCONST_BARE;
7034 /* Check for plugged-in keyword */
7038 char *saved_bufptr = PL_bufptr;
7040 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
7042 if (result == KEYWORD_PLUGIN_DECLINE) {
7043 /* not a plugged-in keyword */
7044 PL_bufptr = saved_bufptr;
7045 } else if (result == KEYWORD_PLUGIN_STMT) {
7046 pl_yylval.opval = o;
7049 return REPORT(PLUGSTMT);
7050 } else if (result == KEYWORD_PLUGIN_EXPR) {
7051 pl_yylval.opval = o;
7053 PL_expect = XOPERATOR;
7054 return REPORT(PLUGEXPR);
7056 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
7061 /* Check for built-in keyword */
7062 tmp = keyword(PL_tokenbuf, len, 0);
7064 /* Is this a label? */
7065 if (!anydelim && PL_expect == XSTATE
7066 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
7068 pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
7069 pl_yylval.pval[len] = '\0';
7070 pl_yylval.pval[len+1] = UTF ? 1 : 0;
7075 /* Check for lexical sub */
7076 if (PL_expect != XOPERATOR) {
7077 char tmpbuf[sizeof PL_tokenbuf + 1];
7079 Copy(PL_tokenbuf, tmpbuf+1, len, char);
7080 off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
7081 if (off != NOT_IN_PAD) {
7082 assert(off); /* we assume this is boolean-true below */
7083 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
7084 HV * const stash = PAD_COMPNAME_OURSTASH(off);
7085 HEK * const stashname = HvNAME_HEK(stash);
7086 sv = newSVhek(stashname);
7087 sv_catpvs(sv, "::");
7088 sv_catpvn_flags(sv, PL_tokenbuf, len,
7089 (UTF ? SV_CATUTF8 : SV_CATBYTES));
7090 gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
7100 rv2cv_op = newOP(OP_PADANY, 0);
7101 rv2cv_op->op_targ = off;
7102 cv = find_lexical_cv(off);
7110 if (tmp < 0) { /* second-class keyword? */
7111 GV *ogv = NULL; /* override (winner) */
7112 GV *hgv = NULL; /* hidden (loser) */
7113 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
7115 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7116 (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
7120 if (GvIMPORTED_CV(gv))
7122 else if (! CvMETHOD(cv))
7126 (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
7130 ? GvCVu(gv) && GvIMPORTED_CV(gv)
7131 : SvPCS_IMPORTED(gv)
7132 && (gv_init(gv, PL_globalstash, PL_tokenbuf,
7141 tmp = 0; /* overridden by import or by GLOBAL */
7144 && -tmp==KEY_lock /* XXX generalizable kludge */
7147 tmp = 0; /* any sub overrides "weak" keyword */
7149 else { /* no override */
7151 if (tmp == KEY_dump) {
7152 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
7153 "dump() better written as CORE::dump()");
7157 if (hgv && tmp != KEY_x) /* never ambiguous */
7158 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7159 "Ambiguous call resolved as CORE::%s(), "
7160 "qualify as such or use &",
7165 if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
7166 && (!anydelim || *s != '#')) {
7167 /* no override, and not s### either; skipspace is safe here
7168 * check for => on following line */
7170 STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
7171 STRLEN soff = s - SvPVX(PL_linestr);
7172 s = skipspace_flags(s, LEX_NO_INCLINE);
7173 arrow = *s == '=' && s[1] == '>';
7174 PL_bufptr = SvPVX(PL_linestr) + bufoff;
7175 s = SvPVX(PL_linestr) + soff;
7183 default: /* not a keyword */
7184 /* Trade off - by using this evil construction we can pull the
7185 variable gv into the block labelled keylookup. If not, then
7186 we have to give it function scope so that the goto from the
7187 earlier ':' case doesn't bypass the initialisation. */
7189 just_a_word_zero_gv:
7201 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7202 const char penultchar =
7203 lastchar && PL_bufptr - 2 >= PL_linestart
7207 SV *nextPL_nextwhite = 0;
7211 /* Get the rest if it looks like a package qualifier */
7213 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7215 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7218 Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
7219 UTF8fARG(UTF, len, PL_tokenbuf),
7220 *s == '\'' ? "'" : "::");
7225 if (PL_expect == XOPERATOR) {
7226 if (PL_bufptr == PL_linestart) {
7227 CopLINE_dec(PL_curcop);
7228 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7229 CopLINE_inc(PL_curcop);
7232 no_op("Bareword",s);
7235 /* Look for a subroutine with this name in current package,
7236 unless this is a lexical sub, or name is "Foo::",
7237 in which case Foo is a bareword
7238 (and a package name). */
7240 if (len > 2 && !PL_madskills &&
7241 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
7243 if (ckWARN(WARN_BAREWORD)
7244 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7245 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7246 "Bareword \"%"UTF8f"\" refers to nonexistent package",
7247 UTF8fARG(UTF, len, PL_tokenbuf));
7249 PL_tokenbuf[len] = '\0';
7255 /* Mustn't actually add anything to a symbol table.
7256 But also don't want to "initialise" any placeholder
7257 constants that might already be there into full
7258 blown PVGVs with attached PVCV. */
7259 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7260 GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
7266 /* if we saw a global override before, get the right name */
7269 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
7270 len ? len : strlen(PL_tokenbuf));
7272 SV * const tmp_sv = sv;
7273 sv = newSVpvs("CORE::GLOBAL::");
7274 sv_catsv(sv, tmp_sv);
7275 SvREFCNT_dec(tmp_sv);
7279 if (PL_madskills && !PL_thistoken) {
7280 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
7281 PL_thistoken = newSVpvn(start,s - start);
7282 PL_realtokenstart = s - SvPVX(PL_linestr);
7286 /* Presume this is going to be a bareword of some sort. */
7288 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
7289 pl_yylval.opval->op_private = OPpCONST_BARE;
7291 /* And if "Foo::", then that's what it certainly is. */
7297 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
7298 const_op->op_private = OPpCONST_BARE;
7299 rv2cv_op = newCVREF(0, const_op);
7300 cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0);
7303 /* See if it's the indirect object for a list operator. */
7305 if (PL_oldoldbufptr &&
7306 PL_oldoldbufptr < PL_bufptr &&
7307 (PL_oldoldbufptr == PL_last_lop
7308 || PL_oldoldbufptr == PL_last_uni) &&
7309 /* NO SKIPSPACE BEFORE HERE! */
7310 (PL_expect == XREF ||
7311 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
7313 bool immediate_paren = *s == '(';
7315 /* (Now we can afford to cross potential line boundary.) */
7316 s = SKIPSPACE2(s,nextPL_nextwhite);
7318 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
7321 /* Two barewords in a row may indicate method call. */
7323 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
7324 (tmp = intuit_method(s, gv, cv))) {
7326 if (tmp == METHOD && !PL_lex_allbrackets &&
7327 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7328 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7332 /* If not a declared subroutine, it's an indirect object. */
7333 /* (But it's an indir obj regardless for sort.) */
7334 /* Also, if "_" follows a filetest operator, it's a bareword */
7337 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
7339 (PL_last_lop_op != OP_MAPSTART &&
7340 PL_last_lop_op != OP_GREPSTART))))
7341 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7342 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
7345 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7350 PL_expect = XOPERATOR;
7353 s = SKIPSPACE2(s,nextPL_nextwhite);
7354 PL_nextwhite = nextPL_nextwhite;
7359 /* Is this a word before a => operator? */
7360 if (*s == '=' && s[1] == '>' && !pkgname) {
7363 /* This is our own scalar, created a few lines above,
7365 SvREADONLY_off(cSVOPx(pl_yylval.opval)->op_sv);
7366 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
7367 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
7368 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
7369 SvREADONLY_on(cSVOPx(pl_yylval.opval)->op_sv);
7373 /* If followed by a paren, it's certainly a subroutine. */
7378 while (SPACE_OR_TAB(*d))
7380 if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
7387 PL_nextwhite = PL_thiswhite;
7390 start_force(PL_curforce);
7392 NEXTVAL_NEXTTOKE.opval =
7393 off ? rv2cv_op : pl_yylval.opval;
7394 PL_expect = XOPERATOR;
7397 PL_nextwhite = nextPL_nextwhite;
7398 curmad('X', PL_thistoken);
7399 PL_thistoken = newSVpvs("");
7403 op_free(pl_yylval.opval), force_next(PRIVATEREF);
7404 else op_free(rv2cv_op), force_next(WORD);
7409 /* If followed by var or block, call it a method (unless sub) */
7411 if ((*s == '$' || *s == '{') && !cv) {
7413 PL_last_lop = PL_oldbufptr;
7414 PL_last_lop_op = OP_METHOD;
7415 if (!PL_lex_allbrackets &&
7416 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7417 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7421 /* If followed by a bareword, see if it looks like indir obj. */
7424 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
7425 && (tmp = intuit_method(s, gv, cv))) {
7427 if (tmp == METHOD && !PL_lex_allbrackets &&
7428 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7429 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7433 /* Not a method, so call it a subroutine (if defined) */
7436 if (lastchar == '-' && penultchar != '-') {
7437 const STRLEN l = len ? len : strlen(PL_tokenbuf);
7438 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7439 "Ambiguous use of -%"UTF8f" resolved as -&%"UTF8f"()",
7440 UTF8fARG(UTF, l, PL_tokenbuf),
7441 UTF8fARG(UTF, l, PL_tokenbuf));
7443 /* Check for a constant sub */
7444 if ((sv = cv_const_sv_or_av(cv))) {
7447 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7448 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7449 if (SvTYPE(sv) == SVt_PVAV)
7450 pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7453 pl_yylval.opval->op_private = 0;
7454 pl_yylval.opval->op_folded = 1;
7455 pl_yylval.opval->op_flags |= OPf_SPECIAL;
7460 op_free(pl_yylval.opval);
7462 off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7463 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7464 PL_last_lop = PL_oldbufptr;
7465 PL_last_lop_op = OP_ENTERSUB;
7466 /* Is there a prototype? */
7473 STRLEN protolen = CvPROTOLEN(cv);
7474 const char *proto = CvPROTO(cv);
7476 proto = S_strip_spaces(aTHX_ proto, &protolen);
7479 if ((optional = *proto == ';'))
7482 while (*proto == ';');
7486 *proto == '$' || *proto == '_'
7487 || *proto == '*' || *proto == '+'
7492 *proto == '\\' && proto[1] && proto[2] == '\0'
7495 UNIPROTO(UNIOPSUB,optional);
7496 if (*proto == '\\' && proto[1] == '[') {
7497 const char *p = proto + 2;
7498 while(*p && *p != ']')
7500 if(*p == ']' && !p[1])
7501 UNIPROTO(UNIOPSUB,optional);
7503 if (*proto == '&' && *s == '{') {
7505 sv_setpvs(PL_subname, "__ANON__");
7507 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7508 if (!PL_lex_allbrackets &&
7509 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7510 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7517 PL_nextwhite = PL_thiswhite;
7520 start_force(PL_curforce);
7521 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7524 PL_nextwhite = nextPL_nextwhite;
7525 curmad('X', PL_thistoken);
7526 PL_thistoken = newSVpvs("");
7528 force_next(off ? PRIVATEREF : WORD);
7529 if (!PL_lex_allbrackets &&
7530 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7531 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7536 /* Guess harder when madskills require "best effort". */
7537 if (PL_madskills && (!gv || !GvCVu(gv))) {
7538 int probable_sub = 0;
7539 if (strchr("\"'`$@%0123456789!*+{[<", *s))
7541 else if (isALPHA(*s)) {
7545 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
7546 if (!keyword(tmpbuf, tmplen, 0))
7549 while (d < PL_bufend && isSPACE(*d))
7551 if (*d == '=' && d[1] == '>')
7556 gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
7558 op_free(pl_yylval.opval);
7560 off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7561 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7562 PL_last_lop = PL_oldbufptr;
7563 PL_last_lop_op = OP_ENTERSUB;
7564 PL_nextwhite = PL_thiswhite;
7566 start_force(PL_curforce);
7567 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7569 PL_nextwhite = nextPL_nextwhite;
7570 curmad('X', PL_thistoken);
7571 PL_thistoken = newSVpvs("");
7572 force_next(off ? PRIVATEREF : WORD);
7573 if (!PL_lex_allbrackets &&
7574 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7575 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7579 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7581 force_next(off ? PRIVATEREF : WORD);
7582 if (!PL_lex_allbrackets &&
7583 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7584 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7589 /* Call it a bare word */
7591 if (PL_hints & HINT_STRICT_SUBS)
7592 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7595 /* after "print" and similar functions (corresponding to
7596 * "F? L" in opcode.pl), whatever wasn't already parsed as
7597 * a filehandle should be subject to "strict subs".
7598 * Likewise for the optional indirect-object argument to system
7599 * or exec, which can't be a bareword */
7600 if ((PL_last_lop_op == OP_PRINT
7601 || PL_last_lop_op == OP_PRTF
7602 || PL_last_lop_op == OP_SAY
7603 || PL_last_lop_op == OP_SYSTEM
7604 || PL_last_lop_op == OP_EXEC)
7605 && (PL_hints & HINT_STRICT_SUBS))
7606 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7607 if (lastchar != '-') {
7608 if (ckWARN(WARN_RESERVED)) {
7612 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7614 /* PL_warn_reserved is constant */
7615 GCC_DIAG_IGNORE(-Wformat-nonliteral);
7616 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7626 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7627 && saw_infix_sigil) {
7628 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7629 "Operator or semicolon missing before %c%"UTF8f,
7631 UTF8fARG(UTF, strlen(PL_tokenbuf),
7633 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7634 "Ambiguous use of %c resolved as operator %c",
7635 lastchar, lastchar);
7642 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7647 (OP*)newSVOP(OP_CONST, 0,
7648 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7651 case KEY___PACKAGE__:
7653 (OP*)newSVOP(OP_CONST, 0,
7655 ? newSVhek(HvNAME_HEK(PL_curstash))
7662 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7663 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
7666 gv = (GV *)*hv_fetchs(stash, "DATA", 1);
7668 gv_init(gv,stash,"DATA",4,0);
7671 GvIOp(gv) = newIO();
7672 IoIFP(GvIOp(gv)) = PL_rsfp;
7673 #if defined(HAS_FCNTL) && defined(F_SETFD)
7675 const int fd = PerlIO_fileno(PL_rsfp);
7676 fcntl(fd,F_SETFD,fd >= 3);
7679 /* Mark this internal pseudo-handle as clean */
7680 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7681 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7682 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7684 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7685 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7686 /* if the script was opened in binmode, we need to revert
7687 * it to text mode for compatibility; but only iff it has CRs
7688 * XXX this is a questionable hack at best. */
7689 if (PL_bufend-PL_bufptr > 2
7690 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7693 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7694 loc = PerlIO_tell(PL_rsfp);
7695 (void)PerlIO_seek(PL_rsfp, 0L, 0);
7698 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7700 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7701 #endif /* NETWARE */
7703 PerlIO_seek(PL_rsfp, loc, 0);
7707 #ifdef PERLIO_LAYERS
7710 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7711 else if (PL_encoding) {
7718 XPUSHs(PL_encoding);
7720 call_method("name", G_SCALAR);
7724 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7725 Perl_form(aTHX_ ":encoding(%"SVf")",
7734 if (PL_realtokenstart >= 0) {
7735 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7737 PL_endwhite = newSVpvs("");
7738 sv_catsv(PL_endwhite, PL_thiswhite);
7740 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7741 PL_realtokenstart = -1;
7743 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7753 FUN0OP(newPVOP(OP_RUNCV,0,NULL));
7762 if (PL_expect == XSTATE) {
7773 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7774 if ((*s == ':' && s[1] == ':')
7775 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7779 Copy(PL_bufptr, PL_tokenbuf, olen, char);
7783 Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
7784 UTF8fARG(UTF, len, PL_tokenbuf));
7787 else if (tmp == KEY_require || tmp == KEY_do
7789 /* that's a way to remember we saw "CORE::" */
7801 LOP(OP_ACCEPT,XTERM);
7804 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7809 LOP(OP_ATAN2,XTERM);
7815 LOP(OP_BINMODE,XTERM);
7818 LOP(OP_BLESS,XTERM);
7827 /* We have to disambiguate the two senses of
7828 "continue". If the next token is a '{' then
7829 treat it as the start of a continue block;
7830 otherwise treat it as a control operator.
7840 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7850 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7859 if (!PL_cryptseen) {
7860 PL_cryptseen = TRUE;
7864 LOP(OP_CRYPT,XTERM);
7867 LOP(OP_CHMOD,XTERM);
7870 LOP(OP_CHOWN,XTERM);
7873 LOP(OP_CONNECT,XTERM);
7893 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7895 if (len && (len != 4 || strNE(PL_tokenbuf+1, "CORE"))
7896 && !keyword(PL_tokenbuf + 1, len, 0)) {
7899 force_ident_maybe_lex('&');
7904 if (orig_keyword == KEY_do) {
7913 PL_hints |= HINT_BLOCK_SCOPE;
7923 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7924 STR_WITH_LEN("NDBM_File::"),
7925 STR_WITH_LEN("DB_File::"),
7926 STR_WITH_LEN("GDBM_File::"),
7927 STR_WITH_LEN("SDBM_File::"),
7928 STR_WITH_LEN("ODBM_File::"),
7930 LOP(OP_DBMOPEN,XTERM);
7936 PL_expect = XOPERATOR;
7937 s = force_word(s,WORD,TRUE,FALSE);
7944 pl_yylval.ival = CopLINE(PL_curcop);
7948 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7962 if (*s == '{') { /* block eval */
7963 PL_expect = XTERMBLOCK;
7964 UNIBRACK(OP_ENTERTRY);
7966 else { /* string eval */
7968 UNIBRACK(OP_ENTEREVAL);
7973 UNIBRACK(-OP_ENTEREVAL);
7987 case KEY_endhostent:
7993 case KEY_endservent:
7996 case KEY_endprotoent:
8007 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8009 pl_yylval.ival = CopLINE(PL_curcop);
8011 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
8014 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
8017 if ((PL_bufend - p) >= 3 &&
8018 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
8020 else if ((PL_bufend - p) >= 4 &&
8021 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
8024 /* skip optional package name, as in "for my abc $x (..)" */
8025 if (isIDFIRST_lazy_if(p,UTF)) {
8026 p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8030 Perl_croak(aTHX_ "Missing $ on loop variable");
8032 s = SvPVX(PL_linestr) + soff;
8038 LOP(OP_FORMLINE,XTERM);
8047 LOP(OP_FCNTL,XTERM);
8053 LOP(OP_FLOCK,XTERM);
8056 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8061 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8066 LOP(OP_GREPSTART, XREF);
8069 PL_expect = XOPERATOR;
8070 s = force_word(s,WORD,TRUE,FALSE);
8085 case KEY_getpriority:
8086 LOP(OP_GETPRIORITY,XTERM);
8088 case KEY_getprotobyname:
8091 case KEY_getprotobynumber:
8092 LOP(OP_GPBYNUMBER,XTERM);
8094 case KEY_getprotoent:
8106 case KEY_getpeername:
8107 UNI(OP_GETPEERNAME);
8109 case KEY_gethostbyname:
8112 case KEY_gethostbyaddr:
8113 LOP(OP_GHBYADDR,XTERM);
8115 case KEY_gethostent:
8118 case KEY_getnetbyname:
8121 case KEY_getnetbyaddr:
8122 LOP(OP_GNBYADDR,XTERM);
8127 case KEY_getservbyname:
8128 LOP(OP_GSBYNAME,XTERM);
8130 case KEY_getservbyport:
8131 LOP(OP_GSBYPORT,XTERM);
8133 case KEY_getservent:
8136 case KEY_getsockname:
8137 UNI(OP_GETSOCKNAME);
8139 case KEY_getsockopt:
8140 LOP(OP_GSOCKOPT,XTERM);
8155 pl_yylval.ival = CopLINE(PL_curcop);
8156 Perl_ck_warner_d(aTHX_
8157 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8158 "given is experimental");
8163 orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB,
8171 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8173 pl_yylval.ival = CopLINE(PL_curcop);
8177 LOP(OP_INDEX,XTERM);
8183 LOP(OP_IOCTL,XTERM);
8195 PL_expect = XOPERATOR;
8196 s = force_word(s,WORD,TRUE,FALSE);
8213 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8218 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8232 LOP(OP_LISTEN,XTERM);
8241 s = scan_pat(s,OP_MATCH);
8242 TERM(sublex_start());
8245 LOP(OP_MAPSTART, XREF);
8248 LOP(OP_MKDIR,XTERM);
8251 LOP(OP_MSGCTL,XTERM);
8254 LOP(OP_MSGGET,XTERM);
8257 LOP(OP_MSGRCV,XTERM);
8260 LOP(OP_MSGSND,XTERM);
8265 PL_in_my = (U16)tmp;
8267 if (isIDFIRST_lazy_if(s,UTF)) {
8271 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8272 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
8274 if (!FEATURE_LEXSUBS_IS_ENABLED)
8276 "Experimental \"%s\" subs not enabled",
8277 tmp == KEY_my ? "my" :
8278 tmp == KEY_state ? "state" : "our");
8279 Perl_ck_warner_d(aTHX_
8280 packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
8281 "The lexical_subs feature is experimental");
8284 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
8285 if (!PL_in_my_stash) {
8288 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
8289 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
8292 if (PL_madskills) { /* just add type to declarator token */
8293 sv_catsv(PL_thistoken, PL_nextwhite);
8295 sv_catpvn(PL_thistoken, start, s - start);
8303 PL_expect = XOPERATOR;
8304 s = force_word(s,WORD,TRUE,FALSE);
8308 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8313 s = tokenize_use(0, s);
8317 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
8320 if (!PL_lex_allbrackets &&
8321 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8322 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8328 if (isIDFIRST_lazy_if(s,UTF)) {
8330 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
8332 for (t=d; isSPACE(*t);)
8334 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8336 && !(t[0] == '=' && t[1] == '>')
8337 && !(t[0] == ':' && t[1] == ':')
8338 && !keyword(s, d-s, 0)
8340 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8341 "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
8342 UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8348 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8350 pl_yylval.ival = OP_OR;
8360 LOP(OP_OPEN_DIR,XTERM);
8363 checkcomma(s,PL_tokenbuf,"filehandle");
8367 checkcomma(s,PL_tokenbuf,"filehandle");
8386 s = force_word(s,WORD,FALSE,TRUE);
8388 s = force_strict_version(s);
8389 PL_lex_expect = XBLOCK;
8393 LOP(OP_PIPE_OP,XTERM);
8396 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8397 COPLINE_SET_FROM_MULTI_END;
8400 pl_yylval.ival = OP_CONST;
8401 TERM(sublex_start());
8408 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8409 COPLINE_SET_FROM_MULTI_END;
8412 PL_expect = XOPERATOR;
8413 if (SvCUR(PL_lex_stuff)) {
8414 int warned_comma = !ckWARN(WARN_QW);
8415 int warned_comment = warned_comma;
8416 d = SvPV_force(PL_lex_stuff, len);
8418 for (; isSPACE(*d) && len; --len, ++d)
8423 if (!warned_comma || !warned_comment) {
8424 for (; !isSPACE(*d) && len; --len, ++d) {
8425 if (!warned_comma && *d == ',') {
8426 Perl_warner(aTHX_ packWARN(WARN_QW),
8427 "Possible attempt to separate words with commas");
8430 else if (!warned_comment && *d == '#') {
8431 Perl_warner(aTHX_ packWARN(WARN_QW),
8432 "Possible attempt to put comments in qw() list");
8438 for (; !isSPACE(*d) && len; --len, ++d)
8441 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8442 words = op_append_elem(OP_LIST, words,
8443 newSVOP(OP_CONST, 0, tokeq(sv)));
8448 words = newNULLLIST();
8450 SvREFCNT_dec(PL_lex_stuff);
8451 PL_lex_stuff = NULL;
8453 PL_expect = XOPERATOR;
8454 pl_yylval.opval = sawparens(words);
8459 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8462 pl_yylval.ival = OP_STRINGIFY;
8463 if (SvIVX(PL_lex_stuff) == '\'')
8464 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
8465 TERM(sublex_start());
8468 s = scan_pat(s,OP_QR);
8469 TERM(sublex_start());
8472 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8475 pl_yylval.ival = OP_BACKTICK;
8476 TERM(sublex_start());
8483 PL_expect = XOPERATOR;
8485 s = force_version(s, FALSE);
8487 else if (*s != 'v' || !isDIGIT(s[1])
8488 || (s = force_version(s, TRUE), *s == 'v'))
8490 *PL_tokenbuf = '\0';
8491 s = force_word(s,WORD,TRUE,TRUE);
8492 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
8493 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8494 GV_ADD | (UTF ? SVf_UTF8 : 0));
8496 yyerror("<> should be quotes");
8498 if (orig_keyword == KEY_require) {
8506 PL_last_uni = PL_oldbufptr;
8507 PL_last_lop_op = OP_REQUIRE;
8509 return REPORT( (int)REQUIRE );
8515 PL_expect = XOPERATOR;
8516 s = force_word(s,WORD,TRUE,FALSE);
8520 LOP(OP_RENAME,XTERM);
8529 LOP(OP_RINDEX,XTERM);
8538 UNIDOR(OP_READLINE);
8541 UNIDOR(OP_BACKTICK);
8550 LOP(OP_REVERSE,XTERM);
8553 UNIDOR(OP_READLINK);
8560 if (pl_yylval.opval)
8561 TERM(sublex_start());
8563 TOKEN(1); /* force error */
8566 checkcomma(s,PL_tokenbuf,"filehandle");
8576 LOP(OP_SELECT,XTERM);
8582 LOP(OP_SEMCTL,XTERM);
8585 LOP(OP_SEMGET,XTERM);
8588 LOP(OP_SEMOP,XTERM);
8594 LOP(OP_SETPGRP,XTERM);
8596 case KEY_setpriority:
8597 LOP(OP_SETPRIORITY,XTERM);
8599 case KEY_sethostent:
8605 case KEY_setservent:
8608 case KEY_setprotoent:
8618 LOP(OP_SEEKDIR,XTERM);
8620 case KEY_setsockopt:
8621 LOP(OP_SSOCKOPT,XTERM);
8627 LOP(OP_SHMCTL,XTERM);
8630 LOP(OP_SHMGET,XTERM);
8633 LOP(OP_SHMREAD,XTERM);
8636 LOP(OP_SHMWRITE,XTERM);
8639 LOP(OP_SHUTDOWN,XTERM);
8648 LOP(OP_SOCKET,XTERM);
8650 case KEY_socketpair:
8651 LOP(OP_SOCKPAIR,XTERM);
8654 checkcomma(s,PL_tokenbuf,"subroutine name");
8657 s = force_word(s,WORD,TRUE,TRUE);
8661 LOP(OP_SPLIT,XTERM);
8664 LOP(OP_SPRINTF,XTERM);
8667 LOP(OP_SPLICE,XTERM);
8682 LOP(OP_SUBSTR,XTERM);
8688 char * const tmpbuf = PL_tokenbuf + 1;
8689 expectation attrful;
8690 bool have_name, have_proto;
8691 const int key = tmp;
8693 SV *format_name = NULL;
8699 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
8700 SV *subtoken = PL_madskills
8701 ? newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr))
8706 s = SKIPSPACE2(s,tmpwhite);
8712 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
8713 (*s == ':' && s[1] == ':'))
8716 SV *nametoke = NULL;
8720 attrful = XATTRBLOCK;
8721 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8725 nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
8727 if (key == KEY_format)
8728 format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
8731 if (memchr(tmpbuf, ':', len) || key != KEY_sub
8733 PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
8735 sv_setpvn(PL_subname, tmpbuf, len);
8737 sv_setsv(PL_subname,PL_curstname);
8738 sv_catpvs(PL_subname,"::");
8739 sv_catpvn(PL_subname,tmpbuf,len);
8741 if (SvUTF8(PL_linestr))
8742 SvUTF8_on(PL_subname);
8748 CURMAD('X', nametoke);
8749 CURMAD('_', tmpwhite);
8750 force_ident_maybe_lex('&');
8752 s = SKIPSPACE2(d,tmpwhite);
8758 if (key == KEY_my || key == KEY_our || key==KEY_state)
8761 /* diag_listed_as: Missing name in "%s sub" */
8763 "Missing name in \"%s\"", PL_bufptr);
8765 PL_expect = XTERMBLOCK;
8766 attrful = XATTRTERM;
8767 sv_setpvs(PL_subname,"?");
8771 if (key == KEY_format) {
8773 PL_thistoken = subtoken;
8777 start_force(PL_curforce);
8778 NEXTVAL_NEXTTOKE.opval
8779 = (OP*)newSVOP(OP_CONST,0, format_name);
8780 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
8787 /* Look for a prototype */
8789 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8790 COPLINE_SET_FROM_MULTI_END;
8792 Perl_croak(aTHX_ "Prototype not terminated");
8793 (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
8798 CURMAD('q', PL_thisopen);
8799 CURMAD('_', tmpwhite);
8800 CURMAD('=', PL_thisstuff);
8801 CURMAD('Q', PL_thisclose);
8802 NEXTVAL_NEXTTOKE.opval =
8803 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8804 PL_lex_stuff = NULL;
8807 s = SKIPSPACE2(s,tmpwhite);
8815 if (*s == ':' && s[1] != ':')
8816 PL_expect = attrful;
8817 else if (*s != '{' && key == KEY_sub) {
8819 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8820 else if (*s != ';' && *s != '}')
8821 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8828 curmad('^', newSVpvs(""));
8829 CURMAD('_', tmpwhite);
8833 PL_thistoken = subtoken;
8834 PERL_UNUSED_VAR(have_proto);
8837 NEXTVAL_NEXTTOKE.opval =
8838 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8839 PL_lex_stuff = NULL;
8845 sv_setpvs(PL_subname, "__ANON__");
8847 sv_setpvs(PL_subname, "__ANON__::__ANON__");
8851 force_ident_maybe_lex('&');
8857 LOP(OP_SYSTEM,XREF);
8860 LOP(OP_SYMLINK,XTERM);
8863 LOP(OP_SYSCALL,XTERM);
8866 LOP(OP_SYSOPEN,XTERM);
8869 LOP(OP_SYSSEEK,XTERM);
8872 LOP(OP_SYSREAD,XTERM);
8875 LOP(OP_SYSWRITE,XTERM);
8880 TERM(sublex_start());
8901 LOP(OP_TRUNCATE,XTERM);
8913 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8915 pl_yylval.ival = CopLINE(PL_curcop);
8919 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8921 pl_yylval.ival = CopLINE(PL_curcop);
8925 LOP(OP_UNLINK,XTERM);
8931 LOP(OP_UNPACK,XTERM);
8934 LOP(OP_UTIME,XTERM);
8940 LOP(OP_UNSHIFT,XTERM);
8943 s = tokenize_use(1, s);
8953 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8955 pl_yylval.ival = CopLINE(PL_curcop);
8956 Perl_ck_warner_d(aTHX_
8957 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8958 "when is experimental");
8962 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8964 pl_yylval.ival = CopLINE(PL_curcop);
8968 PL_hints |= HINT_BLOCK_SCOPE;
8975 LOP(OP_WAITPID,XTERM);
8981 /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8982 * we use the same number on EBCDIC */
8983 gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8987 if (PL_expect == XOPERATOR) {
8988 if (*s == '=' && !PL_lex_allbrackets &&
8989 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8997 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8999 pl_yylval.ival = OP_XOR;
9008 Looks up an identifier in the pad or in a package
9011 PRIVATEREF if this is a lexical name.
9012 WORD if this belongs to a package.
9015 if we're in a my declaration
9016 croak if they tried to say my($foo::bar)
9017 build the ops for a my() declaration
9018 if it's an access to a my() variable
9019 build ops for access to a my() variable
9020 if in a dq string, and they've said @foo and we can't find @foo
9022 build ops for a bareword
9026 S_pending_ident(pTHX)
9030 const char pit = (char)pl_yylval.ival;
9031 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
9032 /* All routes through this function want to know if there is a colon. */
9033 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
9035 DEBUG_T({ PerlIO_printf(Perl_debug_log,
9036 "### Pending identifier '%s'\n", PL_tokenbuf); });
9038 /* if we're in a my(), we can't allow dynamics here.
9039 $foo'bar has already been turned into $foo::bar, so
9040 just check for colons.
9042 if it's a legal name, the OP is a PADANY.
9045 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9047 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
9048 "variable %s in \"our\"",
9049 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
9050 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
9054 /* PL_no_myglob is constant */
9055 GCC_DIAG_IGNORE(-Wformat-nonliteral);
9056 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
9057 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
9058 UTF ? SVf_UTF8 : 0);
9062 pl_yylval.opval = newOP(OP_PADANY, 0);
9063 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
9064 UTF ? SVf_UTF8 : 0);
9070 build the ops for accesses to a my() variable.
9075 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9076 UTF ? SVf_UTF8 : 0);
9077 if (tmp != NOT_IN_PAD) {
9078 /* might be an "our" variable" */
9079 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9080 /* build ops for a bareword */
9081 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9082 HEK * const stashname = HvNAME_HEK(stash);
9083 SV * const sym = newSVhek(stashname);
9084 sv_catpvs(sym, "::");
9085 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9086 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
9087 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9091 ? (GV_ADDMULTI | GV_ADDINEVAL)
9094 ((PL_tokenbuf[0] == '$') ? SVt_PV
9095 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9100 pl_yylval.opval = newOP(OP_PADANY, 0);
9101 pl_yylval.opval->op_targ = tmp;
9107 Whine if they've said @foo in a doublequoted string,
9108 and @foo isn't a variable we can find in the symbol
9111 if (ckWARN(WARN_AMBIGUOUS) &&
9112 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
9113 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
9114 ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
9115 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9116 /* DO NOT warn for @- and @+ */
9117 && !( PL_tokenbuf[2] == '\0' &&
9118 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
9121 /* Downgraded from fatal to warning 20000522 mjd */
9122 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9123 "Possible unintended interpolation of %"UTF8f
9125 UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9129 /* build ops for a bareword */
9130 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
9131 newSVpvn_flags(PL_tokenbuf + 1,
9133 UTF ? SVf_UTF8 : 0 ));
9134 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9136 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
9137 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
9138 | ( UTF ? SVf_UTF8 : 0 ),
9139 ((PL_tokenbuf[0] == '$') ? SVt_PV
9140 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9146 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9150 PERL_ARGS_ASSERT_CHECKCOMMA;
9152 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
9153 if (ckWARN(WARN_SYNTAX)) {
9156 for (w = s+2; *w && level; w++) {
9164 /* the list of chars below is for end of statements or
9165 * block / parens, boolean operators (&&, ||, //) and branch
9166 * constructs (or, and, if, until, unless, while, err, for).
9167 * Not a very solid hack... */
9168 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9169 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9170 "%s (...) interpreted as function",name);
9173 while (s < PL_bufend && isSPACE(*s))
9177 while (s < PL_bufend && isSPACE(*s))
9179 if (isIDFIRST_lazy_if(s,UTF)) {
9180 const char * const w = s;
9181 s += UTF ? UTF8SKIP(s) : 1;
9182 while (isWORDCHAR_lazy_if(s,UTF))
9183 s += UTF ? UTF8SKIP(s) : 1;
9184 while (s < PL_bufend && isSPACE(*s))
9188 if (keyword(w, s - w, 0))
9191 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9192 if (gv && GvCVu(gv))
9194 Perl_croak(aTHX_ "No comma allowed after %s", what);
9199 /* S_new_constant(): do any overload::constant lookup.
9201 Either returns sv, or mortalizes/frees sv and returns a new SV*.
9202 Best used as sv=new_constant(..., sv, ...).
9203 If s, pv are NULL, calls subroutine with one argument,
9204 and <type> is used with error messages only.
9205 <type> is assumed to be well formed UTF-8 */
9208 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9209 SV *sv, SV *pv, const char *type, STRLEN typelen)
9212 HV * table = GvHV(PL_hintgv); /* ^H */
9217 const char *why1 = "", *why2 = "", *why3 = "";
9219 PERL_ARGS_ASSERT_NEW_CONSTANT;
9220 /* We assume that this is true: */
9221 if (*key == 'c') { assert (strEQ(key, "charnames")); }
9224 /* charnames doesn't work well if there have been errors found */
9225 if (PL_error_count > 0 && *key == 'c')
9227 SvREFCNT_dec_NN(sv);
9228 return &PL_sv_undef;
9231 sv_2mortal(sv); /* Parent created it permanently */
9233 || ! (PL_hints & HINT_LOCALIZE_HH)
9234 || ! (cvp = hv_fetch(table, key, keylen, FALSE))
9239 /* Here haven't found what we're looking for. If it is charnames,
9240 * perhaps it needs to be loaded. Try doing that before giving up */
9242 Perl_load_module(aTHX_
9244 newSVpvs("_charnames"),
9245 /* version parameter; no need to specify it, as if
9246 * we get too early a version, will fail anyway,
9247 * not being able to find '_charnames' */
9252 assert(sp == PL_stack_sp);
9253 table = GvHV(PL_hintgv);
9255 && (PL_hints & HINT_LOCALIZE_HH)
9256 && (cvp = hv_fetch(table, key, keylen, FALSE))
9262 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9263 msg = Perl_form(aTHX_
9264 "Constant(%.*s) unknown",
9265 (int)(type ? typelen : len),
9271 why3 = "} is not defined";
9274 msg = Perl_form(aTHX_
9275 /* The +3 is for '\N{'; -4 for that, plus '}' */
9276 "Unknown charname '%.*s'", (int)typelen - 4, type + 3
9280 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
9281 (int)(type ? typelen : len),
9282 (type ? type: s), why1, why2, why3);
9285 yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9286 return SvREFCNT_inc_simple_NN(sv);
9291 pv = newSVpvn_flags(s, len, SVs_TEMP);
9293 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9295 typesv = &PL_sv_undef;
9297 PUSHSTACKi(PERLSI_OVERLOAD);
9309 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9313 /* Check the eval first */
9314 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9316 const char * errstr;
9317 sv_catpvs(errsv, "Propagated");
9318 errstr = SvPV_const(errsv, errlen);
9319 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9321 res = SvREFCNT_inc_simple_NN(sv);
9325 SvREFCNT_inc_simple_void_NN(res);
9334 why1 = "Call to &{$^H{";
9336 why3 = "}} did not return a defined value";
9338 (void)sv_2mortal(sv);
9345 PERL_STATIC_INLINE void
9346 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) {
9348 PERL_ARGS_ASSERT_PARSE_IDENT;
9352 Perl_croak(aTHX_ "%s", ident_too_long);
9353 if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
9354 /* The UTF-8 case must come first, otherwise things
9355 * like c\N{COMBINING TILDE} would start failing, as the
9356 * isWORDCHAR_A case below would gobble the 'c' up.
9359 char *t = *s + UTF8SKIP(*s);
9360 while (isIDCONT_utf8((U8*)t))
9362 if (*d + (t - *s) > e)
9363 Perl_croak(aTHX_ "%s", ident_too_long);
9364 Copy(*s, *d, t - *s, char);
9368 else if ( isWORDCHAR_A(**s) ) {
9371 } while (isWORDCHAR_A(**s) && *d < e);
9373 else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
9378 else if (allow_package && **s == ':' && (*s)[1] == ':'
9379 /* Disallow things like Foo::$bar. For the curious, this is
9380 * the code path that triggers the "Bad name after" warning
9381 * when looking for barewords.
9383 && (*s)[2] != '$') {
9393 /* Returns a NUL terminated string, with the length of the string written to
9397 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9401 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9402 bool is_utf8 = cBOOL(UTF);
9404 PERL_ARGS_ASSERT_SCAN_WORD;
9406 parse_ident(&s, &d, e, allow_package, is_utf8);
9413 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
9416 I32 herelines = PL_parser->herelines;
9417 SSize_t bracket = -1;
9420 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9421 bool is_utf8 = cBOOL(UTF);
9422 I32 orig_copline = 0, tmp_copline = 0;
9424 PERL_ARGS_ASSERT_SCAN_IDENT;
9429 while (isDIGIT(*s)) {
9431 Perl_croak(aTHX_ "%s", ident_too_long);
9436 parse_ident(&s, &d, e, 1, is_utf8);
9441 /* Either a digit variable, or parse_ident() found an identifier
9442 (anything valid as a bareword), so job done and return. */
9443 if (PL_lex_state != LEX_NORMAL)
9444 PL_lex_state = LEX_INTERPENDMAYBE;
9447 if (*s == '$' && s[1] &&
9448 (isIDFIRST_lazy_if(s+1,is_utf8)
9449 || isDIGIT_A((U8)s[1])
9452 || strnEQ(s+1,"::",2)) )
9454 /* Dereferencing a value in a scalar variable.
9455 The alternatives are different syntaxes for a scalar variable.
9456 Using ' as a leading package separator isn't allowed. :: is. */
9459 /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */
9461 bracket = s - SvPVX(PL_linestr);
9463 orig_copline = CopLINE(PL_curcop);
9464 if (s < PL_bufend && isSPACE(*s)) {
9469 /* Is the byte 'd' a legal single character identifier name? 'u' is true
9470 * iff Unicode semantics are to be used. The legal ones are any of:
9472 * b) ASCII punctuation
9473 * c) When not under Unicode rules, any upper Latin1 character
9474 * d) \c?, \c\, \c^, \c_, and \cA..\cZ, minus the ones that have traditionally
9475 * been matched by \s on ASCII platforms. That is: \c?, plus 1-32, minus
9477 #define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d)) \
9478 || isDIGIT_A((U8)(d)) \
9479 || (!(u) && !isASCII((U8)(d))) \
9480 || ((((U8)(d)) < 32) \
9481 && (((((U8)(d)) >= 14) \
9482 || (((U8)(d)) <= 8 && (d) != 0) \
9483 || (((U8)(d)) == 13)))) \
9484 || (((U8)(d)) == toCTRL('?')))
9486 && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8)))
9488 if ( isCNTRL_A((U8)*s) ) {
9489 deprecate("literal control characters in variable names");
9493 const STRLEN skip = UTF8SKIP(s);
9496 for ( i = 0; i < skip; i++ )
9504 /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
9505 if (*d == '^' && *s && isCONTROLVAR(*s)) {
9509 /* Warn about ambiguous code after unary operators if {...} notation isn't
9510 used. There's no difference in ambiguity; it's merely a heuristic
9511 about when not to warn. */
9512 else if (ck_uni && bracket == -1)
9514 if (bracket != -1) {
9515 /* If we were processing {...} notation then... */
9516 if (isIDFIRST_lazy_if(d,is_utf8)) {
9517 /* if it starts as a valid identifier, assume that it is one.
9518 (the later check for } being at the expected point will trap
9519 cases where this doesn't pan out.) */
9520 d += is_utf8 ? UTF8SKIP(d) : 1;
9521 parse_ident(&s, &d, e, 1, is_utf8);
9523 tmp_copline = CopLINE(PL_curcop);
9524 if (s < PL_bufend && isSPACE(*s)) {
9527 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9528 /* ${foo[0]} and ${foo{bar}} notation. */
9529 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9530 const char * const brack =
9532 ((*s == '[') ? "[...]" : "{...}");
9533 orig_copline = CopLINE(PL_curcop);
9534 CopLINE_set(PL_curcop, tmp_copline);
9535 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9536 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9537 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9538 funny, dest, brack, funny, dest, brack);
9539 CopLINE_set(PL_curcop, orig_copline);
9542 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9543 PL_lex_allbrackets++;
9547 /* Handle extended ${^Foo} variables
9548 * 1999-02-27 mjd-perl-patch@plover.com */
9549 else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
9553 while (isWORDCHAR(*s) && d < e) {
9557 Perl_croak(aTHX_ "%s", ident_too_long);
9562 tmp_copline = CopLINE(PL_curcop);
9563 if (s < PL_bufend && isSPACE(*s)) {
9567 /* Expect to find a closing } after consuming any trailing whitespace.
9571 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9572 PL_lex_state = LEX_INTERPEND;
9575 if (PL_lex_state == LEX_NORMAL) {
9576 if (ckWARN(WARN_AMBIGUOUS) &&
9577 (keyword(dest, d - dest, 0)
9578 || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0)))
9580 SV *tmp = newSVpvn_flags( dest, d - dest,
9581 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
9584 orig_copline = CopLINE(PL_curcop);
9585 CopLINE_set(PL_curcop, tmp_copline);
9586 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9587 "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9588 funny, tmp, funny, tmp);
9589 CopLINE_set(PL_curcop, orig_copline);
9594 /* Didn't find the closing } at the point we expected, so restore
9595 state such that the next thing to process is the opening { and */
9596 s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
9597 CopLINE_set(PL_curcop, orig_copline);
9598 PL_parser->herelines = herelines;
9602 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9603 PL_lex_state = LEX_INTERPEND;
9608 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
9610 /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
9611 * the parse starting at 's', based on the subset that are valid in this
9612 * context input to this routine in 'valid_flags'. Advances s. Returns
9613 * TRUE if the input should be treated as a valid flag, so the next char
9614 * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
9615 * first call on the current regex. This routine will set it to any
9616 * charset modifier found. The caller shouldn't change it. This way,
9617 * another charset modifier encountered in the parse can be detected as an
9618 * error, as we have decided to allow only one */
9621 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9623 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9624 if (isWORDCHAR_lazy_if(*s, UTF)) {
9625 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9626 UTF ? SVf_UTF8 : 0);
9628 /* Pretend that it worked, so will continue processing before
9637 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
9638 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
9639 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
9640 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
9641 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
9642 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9643 case LOCALE_PAT_MOD:
9645 goto multiple_charsets;
9647 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9650 case UNICODE_PAT_MOD:
9652 goto multiple_charsets;
9654 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9657 case ASCII_RESTRICT_PAT_MOD:
9659 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9663 /* Error if previous modifier wasn't an 'a', but if it was, see
9664 * if, and accept, a second occurrence (only) */
9666 || get_regex_charset(*pmfl)
9667 != REGEX_ASCII_RESTRICTED_CHARSET)
9669 goto multiple_charsets;
9671 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9675 case DEPENDS_PAT_MOD:
9677 goto multiple_charsets;
9679 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9688 if (*charset != c) {
9689 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9691 else if (c == 'a') {
9692 /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
9693 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9696 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9699 /* Pretend that it worked, so will continue processing before dieing */
9705 S_scan_pat(pTHX_ char *start, I32 type)
9710 const char * const valid_flags =
9711 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9712 char charset = '\0'; /* character set modifier */
9717 PERL_ARGS_ASSERT_SCAN_PAT;
9719 s = scan_str(start,!!PL_madskills,FALSE, (PL_in_eval & EVAL_RE_REPARSING),
9720 TRUE /* look for escaped bracketed metas */, NULL);
9723 const char * const delimiter = skipspace(start);
9727 ? "Search pattern not terminated or ternary operator parsed as search pattern"
9728 : "Search pattern not terminated" ));
9731 pm = (PMOP*)newPMOP(type, 0);
9732 if (PL_multi_open == '?') {
9733 /* This is the only point in the code that sets PMf_ONCE: */
9734 pm->op_pmflags |= PMf_ONCE;
9736 /* Hence it's safe to do this bit of PMOP book-keeping here, which
9737 allows us to restrict the list needed by reset to just the ??
9739 assert(type != OP_TRANS);
9741 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9744 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9747 elements = mg->mg_len / sizeof(PMOP**);
9748 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9749 ((PMOP**)mg->mg_ptr) [elements++] = pm;
9750 mg->mg_len = elements * sizeof(PMOP**);
9751 PmopSTASH_set(pm,PL_curstash);
9758 /* if qr/...(?{..}).../, then need to parse the pattern within a new
9759 * anon CV. False positives like qr/[(?{]/ are harmless */
9761 if (type == OP_QR) {
9763 char *e, *p = SvPV(PL_lex_stuff, len);
9765 for (; p < e; p++) {
9766 if (p[0] == '(' && p[1] == '?'
9767 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9769 pm->op_pmflags |= PMf_HAS_CV;
9773 pm->op_pmflags |= PMf_IS_QR;
9776 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
9778 if (PL_madskills && modstart != s) {
9779 SV* tmptoken = newSVpvn(modstart, s - modstart);
9780 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9783 /* issue a warning if /c is specified,but /g is not */
9784 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9786 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9787 "Use of /c modifier is meaningless without /g" );
9790 PL_lex_op = (OP*)pm;
9791 pl_yylval.ival = OP_MATCH;
9796 S_scan_subst(pTHX_ char *start)
9804 char charset = '\0'; /* character set modifier */
9810 PERL_ARGS_ASSERT_SCAN_SUBST;
9812 pl_yylval.ival = OP_NULL;
9814 s = scan_str(start,!!PL_madskills,FALSE,FALSE,
9815 TRUE /* look for escaped bracketed metas */, &t);
9818 Perl_croak(aTHX_ "Substitution pattern not terminated");
9823 CURMAD('q', PL_thisopen);
9824 CURMAD('_', PL_thiswhite);
9825 CURMAD('E', PL_thisstuff);
9826 CURMAD('Q', PL_thisclose);
9827 PL_realtokenstart = s - SvPVX(PL_linestr);
9831 first_start = PL_multi_start;
9832 first_line = CopLINE(PL_curcop);
9833 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
9836 SvREFCNT_dec(PL_lex_stuff);
9837 PL_lex_stuff = NULL;
9839 Perl_croak(aTHX_ "Substitution replacement not terminated");
9841 PL_multi_start = first_start; /* so whole substitution is taken together */
9843 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9847 CURMAD('z', PL_thisopen);
9848 CURMAD('R', PL_thisstuff);
9849 CURMAD('Z', PL_thisclose);
9855 if (*s == EXEC_PAT_MOD) {
9859 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9868 curmad('m', newSVpvn(modstart, s - modstart));
9869 append_madprops(PL_thismad, (OP*)pm, 0);
9873 if ((pm->op_pmflags & PMf_CONTINUE)) {
9874 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9878 SV * const repl = newSVpvs("");
9881 pm->op_pmflags |= PMf_EVAL;
9884 sv_catpvs(repl, "eval ");
9886 sv_catpvs(repl, "do ");
9888 sv_catpvs(repl, "{");
9889 sv_catsv(repl, PL_sublex_info.repl);
9890 sv_catpvs(repl, "}");
9892 SvREFCNT_dec(PL_sublex_info.repl);
9893 PL_sublex_info.repl = repl;
9895 if (CopLINE(PL_curcop) != first_line) {
9896 sv_upgrade(PL_sublex_info.repl, SVt_PVNV);
9897 ((XPVNV*)SvANY(PL_sublex_info.repl))->xnv_u.xpad_cop_seq.xlow =
9898 CopLINE(PL_curcop) - first_line;
9899 CopLINE_set(PL_curcop, first_line);
9902 PL_lex_op = (OP*)pm;
9903 pl_yylval.ival = OP_SUBST;
9908 S_scan_trans(pTHX_ char *start)
9916 bool nondestruct = 0;
9922 PERL_ARGS_ASSERT_SCAN_TRANS;
9924 pl_yylval.ival = OP_NULL;
9926 s = scan_str(start,!!PL_madskills,FALSE,FALSE,FALSE,&t);
9928 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9933 CURMAD('q', PL_thisopen);
9934 CURMAD('_', PL_thiswhite);
9935 CURMAD('E', PL_thisstuff);
9936 CURMAD('Q', PL_thisclose);
9937 PL_realtokenstart = s - SvPVX(PL_linestr);
9941 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
9944 SvREFCNT_dec(PL_lex_stuff);
9945 PL_lex_stuff = NULL;
9947 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9950 CURMAD('z', PL_thisopen);
9951 CURMAD('R', PL_thisstuff);
9952 CURMAD('Z', PL_thisclose);
9955 complement = del = squash = 0;
9962 complement = OPpTRANS_COMPLEMENT;
9965 del = OPpTRANS_DELETE;
9968 squash = OPpTRANS_SQUASH;
9980 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9981 o->op_private &= ~OPpTRANS_ALL;
9982 o->op_private |= del|squash|complement|
9983 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9984 (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF : 0);
9987 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9992 curmad('m', newSVpvn(modstart, s - modstart));
9993 append_madprops(PL_thismad, o, 0);
10002 Takes a pointer to the first < in <<FOO.
10003 Returns a pointer to the byte following <<FOO.
10005 This function scans a heredoc, which involves different methods
10006 depending on whether we are in a string eval, quoted construct, etc.
10007 This is because PL_linestr could containing a single line of input, or
10008 a whole string being evalled, or the contents of the current quote-
10011 The two basic methods are:
10012 - Steal lines from the input stream
10013 - Scan the heredoc in PL_linestr and remove it therefrom
10015 In a file scope or filtered eval, the first method is used; in a
10016 string eval, the second.
10018 In a quote-like operator, we have to choose between the two,
10019 depending on where we can find a newline. We peek into outer lex-
10020 ing scopes until we find one with a newline in it. If we reach the
10021 outermost lexing scope and it is a file, we use the stream method.
10022 Otherwise it is treated as an eval.
10026 S_scan_heredoc(pTHX_ char *s)
10029 I32 op_type = OP_SCALAR;
10036 const bool infile = PL_rsfp || PL_parser->filtered;
10037 const line_t origline = CopLINE(PL_curcop);
10038 LEXSHARED *shared = PL_parser->lex_shared;
10040 I32 stuffstart = s - SvPVX(PL_linestr);
10043 PL_realtokenstart = -1;
10046 PERL_ARGS_ASSERT_SCAN_HEREDOC;
10049 d = PL_tokenbuf + 1;
10050 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
10051 *PL_tokenbuf = '\n';
10053 while (SPACE_OR_TAB(*peek))
10055 if (*peek == '`' || *peek == '\'' || *peek =='"') {
10058 s = delimcpy(d, e, s, PL_bufend, term, &len);
10059 if (s == PL_bufend)
10060 Perl_croak(aTHX_ "Unterminated delimiter for here document");
10066 /* <<\FOO is equivalent to <<'FOO' */
10070 if (!isWORDCHAR_lazy_if(s,UTF))
10071 deprecate("bare << to mean <<\"\"");
10072 for (; isWORDCHAR_lazy_if(s,UTF); s++) {
10077 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
10078 Perl_croak(aTHX_ "Delimiter for here document is too long");
10081 len = d - PL_tokenbuf;
10084 if (PL_madskills) {
10085 tstart = PL_tokenbuf + 1;
10086 PL_thisclose = newSVpvn(tstart, len - 1);
10087 tstart = SvPVX(PL_linestr) + stuffstart;
10088 PL_thisopen = newSVpvn(tstart, s - tstart);
10089 stuffstart = s - SvPVX(PL_linestr);
10092 #ifndef PERL_STRICT_CR
10093 d = strchr(s, '\r');
10095 char * const olds = s;
10097 while (s < PL_bufend) {
10103 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
10112 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10117 if (PL_madskills) {
10118 tstart = SvPVX(PL_linestr) + stuffstart;
10120 sv_catpvn(PL_thisstuff, tstart, s - tstart);
10122 PL_thisstuff = newSVpvn(tstart, s - tstart);
10125 stuffstart = s - SvPVX(PL_linestr);
10128 tmpstr = newSV_type(SVt_PVIV);
10129 SvGROW(tmpstr, 80);
10130 if (term == '\'') {
10131 op_type = OP_CONST;
10132 SvIV_set(tmpstr, -1);
10134 else if (term == '`') {
10135 op_type = OP_BACKTICK;
10136 SvIV_set(tmpstr, '\\');
10139 PL_multi_start = origline + 1 + PL_parser->herelines;
10140 PL_multi_open = PL_multi_close = '<';
10141 /* inside a string eval or quote-like operator */
10142 if (!infile || PL_lex_inwhat) {
10145 char * const olds = s;
10146 PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
10147 /* These two fields are not set until an inner lexing scope is
10148 entered. But we need them set here. */
10149 shared->ls_bufptr = s;
10150 shared->ls_linestr = PL_linestr;
10152 /* Look for a newline. If the current buffer does not have one,
10153 peek into the line buffer of the parent lexing scope, going
10154 up as many levels as necessary to find one with a newline
10157 while (!(s = (char *)memchr(
10158 (void *)shared->ls_bufptr, '\n',
10159 SvEND(shared->ls_linestr)-shared->ls_bufptr
10161 shared = shared->ls_prev;
10162 /* shared is only null if we have gone beyond the outermost
10163 lexing scope. In a file, we will have broken out of the
10164 loop in the previous iteration. In an eval, the string buf-
10165 fer ends with "\n;", so the while condition above will have
10166 evaluated to false. So shared can never be null. */
10168 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10169 most lexing scope. In a file, shared->ls_linestr at that
10170 level is just one line, so there is no body to steal. */
10171 if (infile && !shared->ls_prev) {
10177 s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10180 linestr = shared->ls_linestr;
10181 bufend = SvEND(linestr);
10183 while (s < bufend - len + 1 &&
10184 memNE(s,PL_tokenbuf,len) ) {
10186 ++PL_parser->herelines;
10188 if (s >= bufend - len + 1) {
10191 sv_setpvn(tmpstr,d+1,s-d);
10193 if (PL_madskills) {
10195 sv_catpvn(PL_thisstuff, d + 1, s - d);
10197 PL_thisstuff = newSVpvn(d + 1, s - d);
10198 stuffstart = s - SvPVX(PL_linestr);
10202 /* the preceding stmt passes a newline */
10203 PL_parser->herelines++;
10205 /* s now points to the newline after the heredoc terminator.
10206 d points to the newline before the body of the heredoc.
10209 /* We are going to modify linestr in place here, so set
10210 aside copies of the string if necessary for re-evals or
10212 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10213 check shared->re_eval_str. */
10214 if (shared->re_eval_start || shared->re_eval_str) {
10215 /* Set aside the rest of the regexp */
10216 if (!shared->re_eval_str)
10217 shared->re_eval_str =
10218 newSVpvn(shared->re_eval_start,
10219 bufend - shared->re_eval_start);
10220 shared->re_eval_start -= s-d;
10222 if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
10223 CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
10224 cx->blk_eval.cur_text == linestr)
10226 cx->blk_eval.cur_text = newSVsv(linestr);
10227 SvSCREAM_on(cx->blk_eval.cur_text);
10229 /* Copy everything from s onwards back to d. */
10230 Move(s,d,bufend-s + 1,char);
10231 SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10232 /* Setting PL_bufend only applies when we have not dug deeper
10233 into other scopes, because sublex_done sets PL_bufend to
10234 SvEND(PL_linestr). */
10235 if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
10242 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
10243 term = PL_tokenbuf[1];
10245 linestr_save = PL_linestr; /* must restore this afterwards */
10246 d = s; /* and this */
10247 PL_linestr = newSVpvs("");
10248 PL_bufend = SvPVX(PL_linestr);
10251 if (PL_madskills) {
10252 tstart = SvPVX(PL_linestr) + stuffstart;
10254 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10256 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10259 PL_bufptr = PL_bufend;
10260 CopLINE_set(PL_curcop,
10261 origline + 1 + PL_parser->herelines);
10262 if (!lex_next_chunk(LEX_NO_TERM)
10263 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
10264 SvREFCNT_dec(linestr_save);
10267 CopLINE_set(PL_curcop, origline);
10268 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10269 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10270 /* ^That should be enough to avoid this needing to grow: */
10271 sv_catpvs(PL_linestr, "\n\0");
10272 assert(s == SvPVX(PL_linestr));
10273 PL_bufend = SvEND(PL_linestr);
10277 stuffstart = s - SvPVX(PL_linestr);
10279 PL_parser->herelines++;
10280 PL_last_lop = PL_last_uni = NULL;
10281 #ifndef PERL_STRICT_CR
10282 if (PL_bufend - PL_linestart >= 2) {
10283 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
10284 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10286 PL_bufend[-2] = '\n';
10288 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10290 else if (PL_bufend[-1] == '\r')
10291 PL_bufend[-1] = '\n';
10293 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10294 PL_bufend[-1] = '\n';
10296 if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
10297 SvREFCNT_dec(PL_linestr);
10298 PL_linestr = linestr_save;
10299 PL_linestart = SvPVX(linestr_save);
10300 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10305 sv_catsv(tmpstr,PL_linestr);
10309 PL_multi_end = origline + PL_parser->herelines;
10310 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10311 SvPV_shrink_to_cur(tmpstr);
10314 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10316 else if (PL_encoding)
10317 sv_recode_to_utf8(tmpstr, PL_encoding);
10319 PL_lex_stuff = tmpstr;
10320 pl_yylval.ival = op_type;
10324 SvREFCNT_dec(tmpstr);
10325 CopLINE_set(PL_curcop, origline);
10326 missingterm(PL_tokenbuf + 1);
10329 /* scan_inputsymbol
10330 takes: current position in input buffer
10331 returns: new position in input buffer
10332 side-effects: pl_yylval and lex_op are set.
10337 <FH> read from filehandle
10338 <pkg::FH> read from package qualified filehandle
10339 <pkg'FH> read from package qualified filehandle
10340 <$fh> read from filehandle in $fh
10341 <*.h> filename glob
10346 S_scan_inputsymbol(pTHX_ char *start)
10349 char *s = start; /* current position in buffer */
10352 char *d = PL_tokenbuf; /* start of temp holding space */
10353 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
10355 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10357 end = strchr(s, '\n');
10360 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
10362 /* die if we didn't have space for the contents of the <>,
10363 or if it didn't end, or if we see a newline
10366 if (len >= (I32)sizeof PL_tokenbuf)
10367 Perl_croak(aTHX_ "Excessively long <> operator");
10369 Perl_croak(aTHX_ "Unterminated <> operator");
10374 Remember, only scalar variables are interpreted as filehandles by
10375 this code. Anything more complex (e.g., <$fh{$num}>) will be
10376 treated as a glob() call.
10377 This code makes use of the fact that except for the $ at the front,
10378 a scalar variable and a filehandle look the same.
10380 if (*d == '$' && d[1]) d++;
10382 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10383 while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
10384 d += UTF ? UTF8SKIP(d) : 1;
10386 /* If we've tried to read what we allow filehandles to look like, and
10387 there's still text left, then it must be a glob() and not a getline.
10388 Use scan_str to pull out the stuff between the <> and treat it
10389 as nothing more than a string.
10392 if (d - PL_tokenbuf != len) {
10393 pl_yylval.ival = OP_GLOB;
10394 s = scan_str(start,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
10396 Perl_croak(aTHX_ "Glob not terminated");
10400 bool readline_overriden = FALSE;
10402 /* we're in a filehandle read situation */
10405 /* turn <> into <ARGV> */
10407 Copy("ARGV",d,5,char);
10409 /* Check whether readline() is overriden */
10410 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
10411 if ((gv_readline = gv_override("readline",8)))
10412 readline_overriden = TRUE;
10414 /* if <$fh>, create the ops to turn the variable into a
10418 /* try to find it in the pad for this block, otherwise find
10419 add symbol table ops
10421 const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
10422 if (tmp != NOT_IN_PAD) {
10423 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10424 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10425 HEK * const stashname = HvNAME_HEK(stash);
10426 SV * const sym = sv_2mortal(newSVhek(stashname));
10427 sv_catpvs(sym, "::");
10428 sv_catpv(sym, d+1);
10433 OP * const o = newOP(OP_PADSV, 0);
10435 PL_lex_op = readline_overriden
10436 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10437 op_append_elem(OP_LIST, o,
10438 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10439 : (OP*)newUNOP(OP_READLINE, 0, o);
10448 ? (GV_ADDMULTI | GV_ADDINEVAL)
10449 : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
10451 PL_lex_op = readline_overriden
10452 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10453 op_append_elem(OP_LIST,
10454 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10455 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10456 : (OP*)newUNOP(OP_READLINE, 0,
10457 newUNOP(OP_RV2SV, 0,
10458 newGVOP(OP_GV, 0, gv)));
10460 if (!readline_overriden)
10461 PL_lex_op->op_flags |= OPf_SPECIAL;
10462 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10463 pl_yylval.ival = OP_NULL;
10466 /* If it's none of the above, it must be a literal filehandle
10467 (<Foo::BAR> or <FOO>) so build a simple readline OP */
10469 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
10470 PL_lex_op = readline_overriden
10471 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10472 op_append_elem(OP_LIST,
10473 newGVOP(OP_GV, 0, gv),
10474 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10475 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
10476 pl_yylval.ival = OP_NULL;
10486 start position in buffer
10487 keep_quoted preserve \ on the embedded delimiter(s)
10488 keep_delims preserve the delimiters around the string
10489 re_reparse compiling a run-time /(?{})/:
10490 collapse // to /, and skip encoding src
10491 deprecate_escaped_meta issue a deprecation warning for cer-
10492 tain paired metacharacters that appear
10494 delimp if non-null, this is set to the position of
10495 the closing delimiter, or just after it if
10496 the closing and opening delimiters differ
10497 (i.e., the opening delimiter of a substitu-
10499 returns: position to continue reading from buffer
10500 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10501 updates the read buffer.
10503 This subroutine pulls a string out of the input. It is called for:
10504 q single quotes q(literal text)
10505 ' single quotes 'literal text'
10506 qq double quotes qq(interpolate $here please)
10507 " double quotes "interpolate $here please"
10508 qx backticks qx(/bin/ls -l)
10509 ` backticks `/bin/ls -l`
10510 qw quote words @EXPORT_OK = qw( func() $spam )
10511 m// regexp match m/this/
10512 s/// regexp substitute s/this/that/
10513 tr/// string transliterate tr/this/that/
10514 y/// string transliterate y/this/that/
10515 ($*@) sub prototypes sub foo ($)
10516 (stuff) sub attr parameters sub foo : attr(stuff)
10517 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
10519 In most of these cases (all but <>, patterns and transliterate)
10520 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
10521 calls scan_str(). s/// makes yylex() call scan_subst() which calls
10522 scan_str(). tr/// and y/// make yylex() call scan_trans() which
10525 It skips whitespace before the string starts, and treats the first
10526 character as the delimiter. If the delimiter is one of ([{< then
10527 the corresponding "close" character )]}> is used as the closing
10528 delimiter. It allows quoting of delimiters, and if the string has
10529 balanced delimiters ([{<>}]) it allows nesting.
10531 On success, the SV with the resulting string is put into lex_stuff or,
10532 if that is already non-NULL, into lex_repl. The second case occurs only
10533 when parsing the RHS of the special constructs s/// and tr/// (y///).
10534 For convenience, the terminating delimiter character is stuffed into
10539 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
10540 bool deprecate_escaped_meta, char **delimp
10544 SV *sv; /* scalar value: string */
10545 const char *tmps; /* temp string, used for delimiter matching */
10546 char *s = start; /* current position in the buffer */
10547 char term; /* terminating character */
10548 char *to; /* current position in the sv's data */
10549 I32 brackets = 1; /* bracket nesting level */
10550 bool has_utf8 = FALSE; /* is there any utf8 content? */
10551 I32 termcode; /* terminating char. code */
10552 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
10553 STRLEN termlen; /* length of terminating string */
10554 int last_off = 0; /* last position for nesting bracket */
10555 char *escaped_open = NULL;
10562 PERL_ARGS_ASSERT_SCAN_STR;
10564 /* skip space before the delimiter */
10570 if (PL_realtokenstart >= 0) {
10571 stuffstart = PL_realtokenstart;
10572 PL_realtokenstart = -1;
10575 stuffstart = start - SvPVX(PL_linestr);
10577 /* mark where we are, in case we need to report errors */
10580 /* after skipping whitespace, the next character is the terminator */
10583 termcode = termstr[0] = term;
10587 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10588 Copy(s, termstr, termlen, U8);
10589 if (!UTF8_IS_INVARIANT(term))
10593 /* mark where we are */
10594 PL_multi_start = CopLINE(PL_curcop);
10595 PL_multi_open = term;
10596 herelines = PL_parser->herelines;
10598 /* find corresponding closing delimiter */
10599 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10600 termcode = termstr[0] = term = tmps[5];
10602 PL_multi_close = term;
10604 /* A warning is raised if the input parameter requires it for escaped (by a
10605 * backslash) paired metacharacters {} [] and () when the delimiters are
10606 * those same characters, and the backslash is ineffective. This doesn't
10607 * happen for <>, as they aren't metas. */
10608 if (deprecate_escaped_meta
10609 && (PL_multi_open == PL_multi_close
10610 || PL_multi_open == '<'
10611 || ! ckWARN_d(WARN_DEPRECATED)))
10613 deprecate_escaped_meta = FALSE;
10616 /* create a new SV to hold the contents. 79 is the SV's initial length.
10617 What a random number. */
10618 sv = newSV_type(SVt_PVIV);
10620 SvIV_set(sv, termcode);
10621 (void)SvPOK_only(sv); /* validate pointer */
10623 /* move past delimiter and try to read a complete string */
10625 sv_catpvn(sv, s, termlen);
10628 tstart = SvPVX(PL_linestr) + stuffstart;
10629 if (PL_madskills && !PL_thisopen && !keep_delims) {
10630 PL_thisopen = newSVpvn(tstart, s - tstart);
10631 stuffstart = s - SvPVX(PL_linestr);
10635 if (PL_encoding && !UTF && !re_reparse) {
10639 int offset = s - SvPVX_const(PL_linestr);
10640 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
10641 &offset, (char*)termstr, termlen);
10645 if (SvIsCOW(PL_linestr)) {
10646 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos;
10647 STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos;
10648 STRLEN last_lop_pos, re_eval_start_pos, s_pos;
10649 char *buf = SvPVX(PL_linestr);
10650 bufend_pos = PL_parser->bufend - buf;
10651 bufptr_pos = PL_parser->bufptr - buf;
10652 oldbufptr_pos = PL_parser->oldbufptr - buf;
10653 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
10654 linestart_pos = PL_parser->linestart - buf;
10655 last_uni_pos = PL_parser->last_uni
10656 ? PL_parser->last_uni - buf
10658 last_lop_pos = PL_parser->last_lop
10659 ? PL_parser->last_lop - buf
10661 re_eval_start_pos =
10662 PL_parser->lex_shared->re_eval_start ?
10663 PL_parser->lex_shared->re_eval_start - buf : 0;
10666 sv_force_normal(PL_linestr);
10668 buf = SvPVX(PL_linestr);
10669 PL_parser->bufend = buf + bufend_pos;
10670 PL_parser->bufptr = buf + bufptr_pos;
10671 PL_parser->oldbufptr = buf + oldbufptr_pos;
10672 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
10673 PL_parser->linestart = buf + linestart_pos;
10674 if (PL_parser->last_uni)
10675 PL_parser->last_uni = buf + last_uni_pos;
10676 if (PL_parser->last_lop)
10677 PL_parser->last_lop = buf + last_lop_pos;
10678 if (PL_parser->lex_shared->re_eval_start)
10679 PL_parser->lex_shared->re_eval_start =
10680 buf + re_eval_start_pos;
10683 ns = SvPVX_const(PL_linestr) + offset;
10684 svlast = SvEND(sv) - 1;
10686 for (; s < ns; s++) {
10687 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10688 COPLINE_INC_WITH_HERELINES;
10691 goto read_more_line;
10693 /* handle quoted delimiters */
10694 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10696 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10698 if ((svlast-1 - t) % 2) {
10699 if (!keep_quoted) {
10700 *(svlast-1) = term;
10702 SvCUR_set(sv, SvCUR(sv) - 1);
10707 if (PL_multi_open == PL_multi_close) {
10713 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
10714 /* At here, all closes are "was quoted" one,
10715 so we don't check PL_multi_close. */
10717 if (!keep_quoted && *(t+1) == PL_multi_open)
10722 else if (*t == PL_multi_open)
10730 SvCUR_set(sv, w - SvPVX_const(sv));
10732 last_off = w - SvPVX(sv);
10733 if (--brackets <= 0)
10738 if (!keep_delims) {
10739 SvCUR_set(sv, SvCUR(sv) - 1);
10745 /* extend sv if need be */
10746 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10747 /* set 'to' to the next character in the sv's string */
10748 to = SvPVX(sv)+SvCUR(sv);
10750 /* if open delimiter is the close delimiter read unbridle */
10751 if (PL_multi_open == PL_multi_close) {
10752 for (; s < PL_bufend; s++,to++) {
10753 /* embedded newlines increment the current line number */
10754 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10755 COPLINE_INC_WITH_HERELINES;
10756 /* handle quoted delimiters */
10757 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10760 || (re_reparse && s[1] == '\\'))
10763 /* any other quotes are simply copied straight through */
10767 /* terminate when run out of buffer (the for() condition), or
10768 have found the terminator */
10769 else if (*s == term) {
10772 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10775 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10781 /* if the terminator isn't the same as the start character (e.g.,
10782 matched brackets), we have to allow more in the quoting, and
10783 be prepared for nested brackets.
10786 /* read until we run out of string, or we find the terminator */
10787 for (; s < PL_bufend; s++,to++) {
10788 /* embedded newlines increment the line count */
10789 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10790 COPLINE_INC_WITH_HERELINES;
10791 /* backslashes can escape the open or closing characters */
10792 if (*s == '\\' && s+1 < PL_bufend) {
10793 if (!keep_quoted &&
10794 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10798 /* Here, 'deprecate_escaped_meta' is true iff the
10799 * delimiters are paired metacharacters, and 's' points
10800 * to an occurrence of one of them within the string,
10801 * which was preceded by a backslash. If this is a
10802 * context where the delimiter is also a metacharacter,
10803 * the backslash is useless, and deprecated. () and []
10804 * are meta in any context. {} are meta only when
10805 * appearing in a quantifier or in things like '\p{'
10806 * (but '\\p{' isn't meta). They also aren't meta
10807 * unless there is a matching closed, escaped char
10808 * later on within the string. If 's' points to an
10809 * open, set a flag; if to a close, test that flag, and
10810 * raise a warning if it was set */
10812 if (deprecate_escaped_meta) {
10813 if (*s == PL_multi_open) {
10817 /* Look for a closing '\}' */
10818 else if (regcurly(s, TRUE)) {
10821 /* Look for e.g. '\x{' */
10822 else if (s - start > 2
10823 && _generic_isCC(*(s-2),
10824 _CC_BACKSLASH_FOO_LBRACE_IS_META))
10825 { /* Exclude '\\x', '\\\\x', etc. */
10826 char *lookbehind = s - 4;
10827 bool is_meta = TRUE;
10828 while (lookbehind >= start
10829 && *lookbehind == '\\')
10831 is_meta = ! is_meta;
10839 else if (escaped_open) {
10840 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
10841 "Useless use of '\\'; doesn't escape metacharacter '%c'", PL_multi_open);
10842 escaped_open = NULL;
10849 /* allow nested opens and closes */
10850 else if (*s == PL_multi_close && --brackets <= 0)
10852 else if (*s == PL_multi_open)
10854 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10859 /* terminate the copied string and update the sv's end-of-string */
10861 SvCUR_set(sv, to - SvPVX_const(sv));
10864 * this next chunk reads more into the buffer if we're not done yet
10868 break; /* handle case where we are done yet :-) */
10870 #ifndef PERL_STRICT_CR
10871 if (to - SvPVX_const(sv) >= 2) {
10872 if ((to[-2] == '\r' && to[-1] == '\n') ||
10873 (to[-2] == '\n' && to[-1] == '\r'))
10877 SvCUR_set(sv, to - SvPVX_const(sv));
10879 else if (to[-1] == '\r')
10882 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10887 /* if we're out of file, or a read fails, bail and reset the current
10888 line marker so we can report where the unterminated string began
10891 if (PL_madskills) {
10892 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10894 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10896 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10899 COPLINE_INC_WITH_HERELINES;
10900 PL_bufptr = PL_bufend;
10901 if (!lex_next_chunk(0)) {
10903 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10912 /* at this point, we have successfully read the delimited string */
10914 if (!PL_encoding || UTF || re_reparse) {
10916 if (PL_madskills) {
10917 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10918 const int len = s - tstart;
10920 sv_catpvn(PL_thisstuff, tstart, len);
10922 PL_thisstuff = newSVpvn(tstart, len);
10923 if (!PL_thisclose && !keep_delims)
10924 PL_thisclose = newSVpvn(s,termlen);
10929 sv_catpvn(sv, s, termlen);
10934 if (PL_madskills) {
10935 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10936 const int len = s - tstart - termlen;
10938 sv_catpvn(PL_thisstuff, tstart, len);
10940 PL_thisstuff = newSVpvn(tstart, len);
10941 if (!PL_thisclose && !keep_delims)
10942 PL_thisclose = newSVpvn(s - termlen,termlen);
10946 if (has_utf8 || (PL_encoding && !re_reparse))
10949 PL_multi_end = CopLINE(PL_curcop);
10950 CopLINE_set(PL_curcop, PL_multi_start);
10951 PL_parser->herelines = herelines;
10953 /* if we allocated too much space, give some back */
10954 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10955 SvLEN_set(sv, SvCUR(sv) + 1);
10956 SvPV_renew(sv, SvLEN(sv));
10959 /* decide whether this is the first or second quoted string we've read
10964 PL_sublex_info.repl = sv;
10967 if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
10973 takes: pointer to position in buffer
10974 returns: pointer to new position in buffer
10975 side-effects: builds ops for the constant in pl_yylval.op
10977 Read a number in any of the formats that Perl accepts:
10979 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10980 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
10983 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10985 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10988 If it reads a number without a decimal point or an exponent, it will
10989 try converting the number to an integer and see if it can do so
10990 without loss of precision.
10994 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10997 const char *s = start; /* current position in buffer */
10998 char *d; /* destination in temp buffer */
10999 char *e; /* end of temp buffer */
11000 NV nv; /* number read, as a double */
11001 SV *sv = NULL; /* place to put the converted number */
11002 bool floatit; /* boolean: int or float? */
11003 const char *lastub = NULL; /* position of last underbar */
11004 static const char* const number_too_long = "Number too long";
11006 PERL_ARGS_ASSERT_SCAN_NUM;
11008 /* We use the first character to decide what type of number this is */
11012 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
11014 /* if it starts with a 0, it could be an octal number, a decimal in
11015 0.13 disguise, or a hexadecimal number, or a binary number. */
11019 u holds the "number so far"
11020 shift the power of 2 of the base
11021 (hex == 4, octal == 3, binary == 1)
11022 overflowed was the number more than we can hold?
11024 Shift is used when we add a digit. It also serves as an "are
11025 we in octal/hex/binary?" indicator to disallow hex characters
11026 when in octal mode.
11031 bool overflowed = FALSE;
11032 bool just_zero = TRUE; /* just plain 0 or binary number? */
11033 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11034 static const char* const bases[5] =
11035 { "", "binary", "", "octal", "hexadecimal" };
11036 static const char* const Bases[5] =
11037 { "", "Binary", "", "Octal", "Hexadecimal" };
11038 static const char* const maxima[5] =
11040 "0b11111111111111111111111111111111",
11044 const char *base, *Base, *max;
11046 /* check for hex */
11047 if (s[1] == 'x' || s[1] == 'X') {
11051 } else if (s[1] == 'b' || s[1] == 'B') {
11056 /* check for a decimal in disguise */
11057 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
11059 /* so it must be octal */
11066 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11067 "Misplaced _ in number");
11071 base = bases[shift];
11072 Base = Bases[shift];
11073 max = maxima[shift];
11075 /* read the rest of the number */
11077 /* x is used in the overflow test,
11078 b is the digit we're adding on. */
11083 /* if we don't mention it, we're done */
11087 /* _ are ignored -- but warned about if consecutive */
11089 if (lastub && s == lastub + 1)
11090 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11091 "Misplaced _ in number");
11095 /* 8 and 9 are not octal */
11096 case '8': case '9':
11098 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11102 case '2': case '3': case '4':
11103 case '5': case '6': case '7':
11105 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11108 case '0': case '1':
11109 b = *s++ & 15; /* ASCII digit -> value of digit */
11113 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11114 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11115 /* make sure they said 0x */
11118 b = (*s++ & 7) + 9;
11120 /* Prepare to put the digit we have onto the end
11121 of the number so far. We check for overflows.
11127 x = u << shift; /* make room for the digit */
11129 if ((x >> shift) != u
11130 && !(PL_hints & HINT_NEW_BINARY)) {
11133 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11134 "Integer overflow in %s number",
11137 u = x | b; /* add the digit to the end */
11140 n *= nvshift[shift];
11141 /* If an NV has not enough bits in its
11142 * mantissa to represent an UV this summing of
11143 * small low-order numbers is a waste of time
11144 * (because the NV cannot preserve the
11145 * low-order bits anyway): we could just
11146 * remember when did we overflow and in the
11147 * end just multiply n by the right
11155 /* if we get here, we had success: make a scalar value from
11160 /* final misplaced underbar check */
11161 if (s[-1] == '_') {
11162 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11166 if (n > 4294967295.0)
11167 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11168 "%s number > %s non-portable",
11174 if (u > 0xffffffff)
11175 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11176 "%s number > %s non-portable",
11181 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
11182 sv = new_constant(start, s - start, "integer",
11183 sv, NULL, NULL, 0);
11184 else if (PL_hints & HINT_NEW_BINARY)
11185 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
11190 handle decimal numbers.
11191 we're also sent here when we read a 0 as the first digit
11193 case '1': case '2': case '3': case '4': case '5':
11194 case '6': case '7': case '8': case '9': case '.':
11197 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
11200 /* read next group of digits and _ and copy into d */
11201 while (isDIGIT(*s) || *s == '_') {
11202 /* skip underscores, checking for misplaced ones
11206 if (lastub && s == lastub + 1)
11207 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11208 "Misplaced _ in number");
11212 /* check for end of fixed-length buffer */
11214 Perl_croak(aTHX_ "%s", number_too_long);
11215 /* if we're ok, copy the character */
11220 /* final misplaced underbar check */
11221 if (lastub && s == lastub + 1) {
11222 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11225 /* read a decimal portion if there is one. avoid
11226 3..5 being interpreted as the number 3. followed
11229 if (*s == '.' && s[1] != '.') {
11234 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11235 "Misplaced _ in number");
11239 /* copy, ignoring underbars, until we run out of digits.
11241 for (; isDIGIT(*s) || *s == '_'; s++) {
11242 /* fixed length buffer check */
11244 Perl_croak(aTHX_ "%s", number_too_long);
11246 if (lastub && s == lastub + 1)
11247 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11248 "Misplaced _ in number");
11254 /* fractional part ending in underbar? */
11255 if (s[-1] == '_') {
11256 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11257 "Misplaced _ in number");
11259 if (*s == '.' && isDIGIT(s[1])) {
11260 /* oops, it's really a v-string, but without the "v" */
11266 /* read exponent part, if present */
11267 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
11271 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
11272 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
11274 /* stray preinitial _ */
11276 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11277 "Misplaced _ in number");
11281 /* allow positive or negative exponent */
11282 if (*s == '+' || *s == '-')
11285 /* stray initial _ */
11287 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11288 "Misplaced _ in number");
11292 /* read digits of exponent */
11293 while (isDIGIT(*s) || *s == '_') {
11296 Perl_croak(aTHX_ "%s", number_too_long);
11300 if (((lastub && s == lastub + 1) ||
11301 (!isDIGIT(s[1]) && s[1] != '_')))
11302 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11303 "Misplaced _ in number");
11311 We try to do an integer conversion first if no characters
11312 indicating "float" have been found.
11317 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
11319 if (flags == IS_NUMBER_IN_UV) {
11321 sv = newSViv(uv); /* Prefer IVs over UVs. */
11324 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11325 if (uv <= (UV) IV_MIN)
11326 sv = newSViv(-(IV)uv);
11333 /* terminate the string */
11335 nv = Atof(PL_tokenbuf);
11340 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
11341 const char *const key = floatit ? "float" : "integer";
11342 const STRLEN keylen = floatit ? 5 : 7;
11343 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
11344 key, keylen, sv, NULL, NULL, 0);
11348 /* if it starts with a v, it could be a v-string */
11351 sv = newSV(5); /* preallocate storage space */
11352 ENTER_with_name("scan_vstring");
11354 s = scan_vstring(s, PL_bufend, sv);
11355 SvREFCNT_inc_simple_void_NN(sv);
11356 LEAVE_with_name("scan_vstring");
11360 /* make the op for the constant and return */
11363 lvalp->opval = newSVOP(OP_CONST, 0, sv);
11365 lvalp->opval = NULL;
11371 S_scan_formline(pTHX_ char *s)
11376 SV * const stuff = newSVpvs("");
11377 bool needargs = FALSE;
11378 bool eofmt = FALSE;
11380 char *tokenstart = s;
11381 SV* savewhite = NULL;
11383 if (PL_madskills) {
11384 savewhite = PL_thiswhite;
11389 PERL_ARGS_ASSERT_SCAN_FORMLINE;
11391 while (!needargs) {
11394 #ifdef PERL_STRICT_CR
11395 while (SPACE_OR_TAB(*t))
11398 while (SPACE_OR_TAB(*t) || *t == '\r')
11401 if (*t == '\n' || t == PL_bufend) {
11406 eol = (char *) memchr(s,'\n',PL_bufend-s);
11410 for (t = s; t < eol; t++) {
11411 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
11413 goto enough; /* ~~ must be first line in formline */
11415 if (*t == '@' || *t == '^')
11419 sv_catpvn(stuff, s, eol-s);
11420 #ifndef PERL_STRICT_CR
11421 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
11422 char *end = SvPVX(stuff) + SvCUR(stuff);
11425 SvCUR_set(stuff, SvCUR(stuff) - 1);
11433 if ((PL_rsfp || PL_parser->filtered)
11434 && PL_parser->form_lex_state == LEX_NORMAL) {
11437 if (PL_madskills) {
11439 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
11441 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
11444 PL_bufptr = PL_bufend;
11445 COPLINE_INC_WITH_HERELINES;
11446 got_some = lex_next_chunk(0);
11447 CopLINE_dec(PL_curcop);
11450 tokenstart = PL_bufptr;
11458 if (!SvCUR(stuff) || needargs)
11459 PL_lex_state = PL_parser->form_lex_state;
11460 if (SvCUR(stuff)) {
11461 PL_expect = XSTATE;
11463 start_force(PL_curforce);
11464 NEXTVAL_NEXTTOKE.ival = 0;
11465 force_next(FORMLBRACK);
11468 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
11470 else if (PL_encoding)
11471 sv_recode_to_utf8(stuff, PL_encoding);
11473 start_force(PL_curforce);
11474 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
11478 SvREFCNT_dec(stuff);
11480 PL_lex_formbrack = 0;
11483 if (PL_madskills) {
11485 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
11487 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
11488 PL_thiswhite = savewhite;
11495 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
11498 const I32 oldsavestack_ix = PL_savestack_ix;
11499 CV* const outsidecv = PL_compcv;
11501 SAVEI32(PL_subline);
11502 save_item(PL_subname);
11503 SAVESPTR(PL_compcv);
11505 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
11506 CvFLAGS(PL_compcv) |= flags;
11508 PL_subline = CopLINE(PL_curcop);
11509 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
11510 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
11511 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
11512 if (outsidecv && CvPADLIST(outsidecv))
11513 CvPADLIST(PL_compcv)->xpadl_outid =
11514 PadlistNAMES(CvPADLIST(outsidecv));
11516 return oldsavestack_ix;
11520 S_yywarn(pTHX_ const char *const s, U32 flags)
11524 PERL_ARGS_ASSERT_YYWARN;
11526 PL_in_eval |= EVAL_WARNONLY;
11527 yyerror_pv(s, flags);
11528 PL_in_eval &= ~EVAL_WARNONLY;
11533 Perl_yyerror(pTHX_ const char *const s)
11535 PERL_ARGS_ASSERT_YYERROR;
11536 return yyerror_pvn(s, strlen(s), 0);
11540 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11542 PERL_ARGS_ASSERT_YYERROR_PV;
11543 return yyerror_pvn(s, strlen(s), flags);
11547 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11550 const char *context = NULL;
11553 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
11554 int yychar = PL_parser->yychar;
11556 PERL_ARGS_ASSERT_YYERROR_PVN;
11558 if (!yychar || (yychar == ';' && !PL_rsfp))
11559 sv_catpvs(where_sv, "at EOF");
11560 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
11561 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
11562 PL_oldbufptr != PL_bufptr) {
11565 The code below is removed for NetWare because it abends/crashes on NetWare
11566 when the script has error such as not having the closing quotes like:
11567 if ($var eq "value)
11568 Checking of white spaces is anyway done in NetWare code.
11571 while (isSPACE(*PL_oldoldbufptr))
11574 context = PL_oldoldbufptr;
11575 contlen = PL_bufptr - PL_oldoldbufptr;
11577 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
11578 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
11581 The code below is removed for NetWare because it abends/crashes on NetWare
11582 when the script has error such as not having the closing quotes like:
11583 if ($var eq "value)
11584 Checking of white spaces is anyway done in NetWare code.
11587 while (isSPACE(*PL_oldbufptr))
11590 context = PL_oldbufptr;
11591 contlen = PL_bufptr - PL_oldbufptr;
11593 else if (yychar > 255)
11594 sv_catpvs(where_sv, "next token ???");
11595 else if (yychar == -2) { /* YYEMPTY */
11596 if (PL_lex_state == LEX_NORMAL ||
11597 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
11598 sv_catpvs(where_sv, "at end of line");
11599 else if (PL_lex_inpat)
11600 sv_catpvs(where_sv, "within pattern");
11602 sv_catpvs(where_sv, "within string");
11605 sv_catpvs(where_sv, "next char ");
11607 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11608 else if (isPRINT_LC(yychar)) {
11609 const char string = yychar;
11610 sv_catpvn(where_sv, &string, 1);
11613 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11615 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
11616 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
11617 OutCopFILE(PL_curcop),
11618 (IV)(PL_parser->preambling == NOLINE
11619 ? CopLINE(PL_curcop)
11620 : PL_parser->preambling));
11622 Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
11623 UTF8fARG(UTF, contlen, context));
11625 Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
11626 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11627 Perl_sv_catpvf(aTHX_ msg,
11628 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
11629 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11632 if (PL_in_eval & EVAL_WARNONLY) {
11633 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
11637 if (PL_error_count >= 10) {
11639 if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
11640 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11641 SVfARG(errsv), OutCopFILE(PL_curcop));
11643 Perl_croak(aTHX_ "%s has too many errors.\n",
11644 OutCopFILE(PL_curcop));
11647 PL_in_my_stash = NULL;
11652 S_swallow_bom(pTHX_ U8 *s)
11655 const STRLEN slen = SvCUR(PL_linestr);
11657 PERL_ARGS_ASSERT_SWALLOW_BOM;
11661 if (s[1] == 0xFE) {
11662 /* UTF-16 little-endian? (or UTF-32LE?) */
11663 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
11664 /* diag_listed_as: Unsupported script encoding %s */
11665 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11666 #ifndef PERL_NO_UTF16_FILTER
11667 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11669 if (PL_bufend > (char*)s) {
11670 s = add_utf16_textfilter(s, TRUE);
11673 /* diag_listed_as: Unsupported script encoding %s */
11674 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11679 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
11680 #ifndef PERL_NO_UTF16_FILTER
11681 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11683 if (PL_bufend > (char *)s) {
11684 s = add_utf16_textfilter(s, FALSE);
11687 /* diag_listed_as: Unsupported script encoding %s */
11688 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11692 case BOM_UTF8_FIRST_BYTE: {
11693 const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
11694 if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
11695 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11696 s += len + 1; /* UTF-8 */
11703 if (s[2] == 0xFE && s[3] == 0xFF) {
11704 /* UTF-32 big-endian */
11705 /* diag_listed_as: Unsupported script encoding %s */
11706 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11709 else if (s[2] == 0 && s[3] != 0) {
11712 * are a good indicator of UTF-16BE. */
11713 #ifndef PERL_NO_UTF16_FILTER
11714 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11715 s = add_utf16_textfilter(s, FALSE);
11717 /* diag_listed_as: Unsupported script encoding %s */
11718 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11724 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11727 * are a good indicator of UTF-16LE. */
11728 #ifndef PERL_NO_UTF16_FILTER
11729 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11730 s = add_utf16_textfilter(s, TRUE);
11732 /* diag_listed_as: Unsupported script encoding %s */
11733 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11741 #ifndef PERL_NO_UTF16_FILTER
11743 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11746 SV *const filter = FILTER_DATA(idx);
11747 /* We re-use this each time round, throwing the contents away before we
11749 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11750 SV *const utf8_buffer = filter;
11751 IV status = IoPAGE(filter);
11752 const bool reverse = cBOOL(IoLINES(filter));
11755 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11757 /* As we're automatically added, at the lowest level, and hence only called
11758 from this file, we can be sure that we're not called in block mode. Hence
11759 don't bother writing code to deal with block mode. */
11761 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11764 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11766 DEBUG_P(PerlIO_printf(Perl_debug_log,
11767 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11768 FPTR2DPTR(void *, S_utf16_textfilter),
11769 reverse ? 'l' : 'b', idx, maxlen, status,
11770 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11777 /* First, look in our buffer of existing UTF-8 data: */
11778 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11782 } else if (status == 0) {
11784 IoPAGE(filter) = 0;
11785 nl = SvEND(utf8_buffer);
11788 STRLEN got = nl - SvPVX(utf8_buffer);
11789 /* Did we have anything to append? */
11791 sv_catpvn(sv, SvPVX(utf8_buffer), got);
11792 /* Everything else in this code works just fine if SVp_POK isn't
11793 set. This, however, needs it, and we need it to work, else
11794 we loop infinitely because the buffer is never consumed. */
11795 sv_chop(utf8_buffer, nl);
11799 /* OK, not a complete line there, so need to read some more UTF-16.
11800 Read an extra octect if the buffer currently has an odd number. */
11804 if (SvCUR(utf16_buffer) >= 2) {
11805 /* Location of the high octet of the last complete code point.
11806 Gosh, UTF-16 is a pain. All the benefits of variable length,
11807 *coupled* with all the benefits of partial reads and
11809 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11810 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11812 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11816 /* We have the first half of a surrogate. Read more. */
11817 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11820 status = FILTER_READ(idx + 1, utf16_buffer,
11821 160 + (SvCUR(utf16_buffer) & 1));
11822 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
11823 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11826 IoPAGE(filter) = status;
11831 chars = SvCUR(utf16_buffer) >> 1;
11832 have = SvCUR(utf8_buffer);
11833 SvGROW(utf8_buffer, have + chars * 3 + 1);
11836 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11837 (U8*)SvPVX_const(utf8_buffer) + have,
11838 chars * 2, &newlen);
11840 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11841 (U8*)SvPVX_const(utf8_buffer) + have,
11842 chars * 2, &newlen);
11844 SvCUR_set(utf8_buffer, have + newlen);
11847 /* No need to keep this SV "well-formed" with a '\0' after the end, as
11848 it's private to us, and utf16_to_utf8{,reversed} take a
11849 (pointer,length) pair, rather than a NUL-terminated string. */
11850 if(SvCUR(utf16_buffer) & 1) {
11851 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11852 SvCUR_set(utf16_buffer, 1);
11854 SvCUR_set(utf16_buffer, 0);
11857 DEBUG_P(PerlIO_printf(Perl_debug_log,
11858 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11860 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11861 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11866 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11868 SV *filter = filter_add(S_utf16_textfilter, NULL);
11870 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11872 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11873 sv_setpvs(filter, "");
11874 IoLINES(filter) = reversed;
11875 IoPAGE(filter) = 1; /* Not EOF */
11877 /* Sadly, we have to return a valid pointer, come what may, so we have to
11878 ignore any error return from this. */
11879 SvCUR_set(PL_linestr, 0);
11880 if (FILTER_READ(0, PL_linestr, 0)) {
11881 SvUTF8_on(PL_linestr);
11883 SvUTF8_on(PL_linestr);
11885 PL_bufend = SvEND(PL_linestr);
11886 return (U8*)SvPVX(PL_linestr);
11891 Returns a pointer to the next character after the parsed
11892 vstring, as well as updating the passed in sv.
11894 Function must be called like
11896 sv = sv_2mortal(newSV(5));
11897 s = scan_vstring(s,e,sv);
11899 where s and e are the start and end of the string.
11900 The sv should already be large enough to store the vstring
11901 passed in, for performance reasons.
11903 This function may croak if fatal warnings are enabled in the
11904 calling scope, hence the sv_2mortal in the example (to prevent
11905 a leak). Make sure to do SvREFCNT_inc afterwards if you use
11911 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11914 const char *pos = s;
11915 const char *start = s;
11917 PERL_ARGS_ASSERT_SCAN_VSTRING;
11919 if (*pos == 'v') pos++; /* get past 'v' */
11920 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11922 if ( *pos != '.') {
11923 /* this may not be a v-string if followed by => */
11924 const char *next = pos;
11925 while (next < e && isSPACE(*next))
11927 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11928 /* return string not v-string */
11929 sv_setpvn(sv,(char *)s,pos-s);
11930 return (char *)pos;
11934 if (!isALPHA(*pos)) {
11935 U8 tmpbuf[UTF8_MAXBYTES+1];
11938 s++; /* get past 'v' */
11943 /* this is atoi() that tolerates underscores */
11946 const char *end = pos;
11948 while (--end >= s) {
11950 const UV orev = rev;
11951 rev += (*end - '0') * mult;
11954 /* diag_listed_as: Integer overflow in %s number */
11955 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11956 "Integer overflow in decimal number");
11960 if (rev > 0x7FFFFFFF)
11961 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11963 /* Append native character for the rev point */
11964 tmpend = uvchr_to_utf8(tmpbuf, rev);
11965 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11966 if (!UVCHR_IS_INVARIANT(rev))
11968 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11974 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11978 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11985 Perl_keyword_plugin_standard(pTHX_
11986 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11988 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11989 PERL_UNUSED_CONTEXT;
11990 PERL_UNUSED_ARG(keyword_ptr);
11991 PERL_UNUSED_ARG(keyword_len);
11992 PERL_UNUSED_ARG(op_ptr);
11993 return KEYWORD_PLUGIN_DECLINE;
11996 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11998 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
12000 SAVEI32(PL_lex_brackets);
12001 if (PL_lex_brackets > 100)
12002 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
12003 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
12004 SAVEI32(PL_lex_allbrackets);
12005 PL_lex_allbrackets = 0;
12006 SAVEI8(PL_lex_fakeeof);
12007 PL_lex_fakeeof = (U8)fakeeof;
12008 if(yyparse(gramtype) && !PL_parser->error_count)
12009 qerror(Perl_mess(aTHX_ "Parse error"));
12012 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
12014 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
12018 SAVEVPTR(PL_eval_root);
12019 PL_eval_root = NULL;
12020 parse_recdescent(gramtype, fakeeof);
12026 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
12028 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
12031 if (flags & ~PARSE_OPTIONAL)
12032 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
12033 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
12034 if (!exprop && !(flags & PARSE_OPTIONAL)) {
12035 if (!PL_parser->error_count)
12036 qerror(Perl_mess(aTHX_ "Parse error"));
12037 exprop = newOP(OP_NULL, 0);
12043 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
12045 Parse a Perl arithmetic expression. This may contain operators of precedence
12046 down to the bit shift operators. The expression must be followed (and thus
12047 terminated) either by a comparison or lower-precedence operator or by
12048 something that would normally terminate an expression such as semicolon.
12049 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12050 otherwise it is mandatory. It is up to the caller to ensure that the
12051 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12052 the source of the code to be parsed and the lexical context for the
12055 The op tree representing the expression is returned. If an optional
12056 expression is absent, a null pointer is returned, otherwise the pointer
12059 If an error occurs in parsing or compilation, in most cases a valid op
12060 tree is returned anyway. The error is reflected in the parser state,
12061 normally resulting in a single exception at the top level of parsing
12062 which covers all the compilation errors that occurred. Some compilation
12063 errors, however, will throw an exception immediately.
12069 Perl_parse_arithexpr(pTHX_ U32 flags)
12071 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
12075 =for apidoc Amx|OP *|parse_termexpr|U32 flags
12077 Parse a Perl term expression. This may contain operators of precedence
12078 down to the assignment operators. The expression must be followed (and thus
12079 terminated) either by a comma or lower-precedence operator or by
12080 something that would normally terminate an expression such as semicolon.
12081 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12082 otherwise it is mandatory. It is up to the caller to ensure that the
12083 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12084 the source of the code to be parsed and the lexical context for the
12087 The op tree representing the expression is returned. If an optional
12088 expression is absent, a null pointer is returned, otherwise the pointer
12091 If an error occurs in parsing or compilation, in most cases a valid op
12092 tree is returned anyway. The error is reflected in the parser state,
12093 normally resulting in a single exception at the top level of parsing
12094 which covers all the compilation errors that occurred. Some compilation
12095 errors, however, will throw an exception immediately.
12101 Perl_parse_termexpr(pTHX_ U32 flags)
12103 return parse_expr(LEX_FAKEEOF_COMMA, flags);
12107 =for apidoc Amx|OP *|parse_listexpr|U32 flags
12109 Parse a Perl list expression. This may contain operators of precedence
12110 down to the comma operator. The expression must be followed (and thus
12111 terminated) either by a low-precedence logic operator such as C<or> or by
12112 something that would normally terminate an expression such as semicolon.
12113 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12114 otherwise it is mandatory. It is up to the caller to ensure that the
12115 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12116 the source of the code to be parsed and the lexical context for the
12119 The op tree representing the expression is returned. If an optional
12120 expression is absent, a null pointer is returned, otherwise the pointer
12123 If an error occurs in parsing or compilation, in most cases a valid op
12124 tree is returned anyway. The error is reflected in the parser state,
12125 normally resulting in a single exception at the top level of parsing
12126 which covers all the compilation errors that occurred. Some compilation
12127 errors, however, will throw an exception immediately.
12133 Perl_parse_listexpr(pTHX_ U32 flags)
12135 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
12139 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
12141 Parse a single complete Perl expression. This allows the full
12142 expression grammar, including the lowest-precedence operators such
12143 as C<or>. The expression must be followed (and thus terminated) by a
12144 token that an expression would normally be terminated by: end-of-file,
12145 closing bracketing punctuation, semicolon, or one of the keywords that
12146 signals a postfix expression-statement modifier. If I<flags> includes
12147 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
12148 mandatory. It is up to the caller to ensure that the dynamic parser
12149 state (L</PL_parser> et al) is correctly set to reflect the source of
12150 the code to be parsed and the lexical context for the expression.
12152 The op tree representing the expression is returned. If an optional
12153 expression is absent, a null pointer is returned, otherwise the pointer
12156 If an error occurs in parsing or compilation, in most cases a valid op
12157 tree is returned anyway. The error is reflected in the parser state,
12158 normally resulting in a single exception at the top level of parsing
12159 which covers all the compilation errors that occurred. Some compilation
12160 errors, however, will throw an exception immediately.
12166 Perl_parse_fullexpr(pTHX_ U32 flags)
12168 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
12172 =for apidoc Amx|OP *|parse_block|U32 flags
12174 Parse a single complete Perl code block. This consists of an opening
12175 brace, a sequence of statements, and a closing brace. The block
12176 constitutes a lexical scope, so C<my> variables and various compile-time
12177 effects can be contained within it. It is up to the caller to ensure
12178 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12179 reflect the source of the code to be parsed and the lexical context for
12182 The op tree representing the code block is returned. This is always a
12183 real op, never a null pointer. It will normally be a C<lineseq> list,
12184 including C<nextstate> or equivalent ops. No ops to construct any kind
12185 of runtime scope are included by virtue of it being a block.
12187 If an error occurs in parsing or compilation, in most cases a valid op
12188 tree (most likely null) is returned anyway. The error is reflected in
12189 the parser state, normally resulting in a single exception at the top
12190 level of parsing which covers all the compilation errors that occurred.
12191 Some compilation errors, however, will throw an exception immediately.
12193 The I<flags> parameter is reserved for future use, and must always
12200 Perl_parse_block(pTHX_ U32 flags)
12203 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
12204 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
12208 =for apidoc Amx|OP *|parse_barestmt|U32 flags
12210 Parse a single unadorned Perl statement. This may be a normal imperative
12211 statement or a declaration that has compile-time effect. It does not
12212 include any label or other affixture. It is up to the caller to ensure
12213 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12214 reflect the source of the code to be parsed and the lexical context for
12217 The op tree representing the statement is returned. This may be a
12218 null pointer if the statement is null, for example if it was actually
12219 a subroutine definition (which has compile-time side effects). If not
12220 null, it will be ops directly implementing the statement, suitable to
12221 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
12222 equivalent op (except for those embedded in a scope contained entirely
12223 within the statement).
12225 If an error occurs in parsing or compilation, in most cases a valid op
12226 tree (most likely null) is returned anyway. The error is reflected in
12227 the parser state, normally resulting in a single exception at the top
12228 level of parsing which covers all the compilation errors that occurred.
12229 Some compilation errors, however, will throw an exception immediately.
12231 The I<flags> parameter is reserved for future use, and must always
12238 Perl_parse_barestmt(pTHX_ U32 flags)
12241 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
12242 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
12246 =for apidoc Amx|SV *|parse_label|U32 flags
12248 Parse a single label, possibly optional, of the type that may prefix a
12249 Perl statement. It is up to the caller to ensure that the dynamic parser
12250 state (L</PL_parser> et al) is correctly set to reflect the source of
12251 the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
12252 label is optional, otherwise it is mandatory.
12254 The name of the label is returned in the form of a fresh scalar. If an
12255 optional label is absent, a null pointer is returned.
12257 If an error occurs in parsing, which can only occur if the label is
12258 mandatory, a valid label is returned anyway. The error is reflected in
12259 the parser state, normally resulting in a single exception at the top
12260 level of parsing which covers all the compilation errors that occurred.
12266 Perl_parse_label(pTHX_ U32 flags)
12268 if (flags & ~PARSE_OPTIONAL)
12269 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
12270 if (PL_lex_state == LEX_KNOWNEXT) {
12271 PL_parser->yychar = yylex();
12272 if (PL_parser->yychar == LABEL) {
12273 char * const lpv = pl_yylval.pval;
12274 STRLEN llen = strlen(lpv);
12275 PL_parser->yychar = YYEMPTY;
12276 return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
12283 STRLEN wlen, bufptr_pos;
12286 if (!isIDFIRST_lazy_if(s, UTF))
12288 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
12289 if (word_takes_any_delimeter(s, wlen))
12291 bufptr_pos = s - SvPVX(PL_linestr);
12293 lex_read_space(LEX_KEEP_PREVIOUS);
12295 s = SvPVX(PL_linestr) + bufptr_pos;
12296 if (t[0] == ':' && t[1] != ':') {
12297 PL_oldoldbufptr = PL_oldbufptr;
12300 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
12304 if (flags & PARSE_OPTIONAL) {
12307 qerror(Perl_mess(aTHX_ "Parse error"));
12308 return newSVpvs("x");
12315 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
12317 Parse a single complete Perl statement. This may be a normal imperative
12318 statement or a declaration that has compile-time effect, and may include
12319 optional labels. It is up to the caller to ensure that the dynamic
12320 parser state (L</PL_parser> et al) is correctly set to reflect the source
12321 of the code to be parsed and the lexical context for the statement.
12323 The op tree representing the statement is returned. This may be a
12324 null pointer if the statement is null, for example if it was actually
12325 a subroutine definition (which has compile-time side effects). If not
12326 null, it will be the result of a L</newSTATEOP> call, normally including
12327 a C<nextstate> or equivalent op.
12329 If an error occurs in parsing or compilation, in most cases a valid op
12330 tree (most likely null) is returned anyway. The error is reflected in
12331 the parser state, normally resulting in a single exception at the top
12332 level of parsing which covers all the compilation errors that occurred.
12333 Some compilation errors, however, will throw an exception immediately.
12335 The I<flags> parameter is reserved for future use, and must always
12342 Perl_parse_fullstmt(pTHX_ U32 flags)
12345 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
12346 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
12350 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
12352 Parse a sequence of zero or more Perl statements. These may be normal
12353 imperative statements, including optional labels, or declarations
12354 that have compile-time effect, or any mixture thereof. The statement
12355 sequence ends when a closing brace or end-of-file is encountered in a
12356 place where a new statement could have validly started. It is up to
12357 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
12358 is correctly set to reflect the source of the code to be parsed and the
12359 lexical context for the statements.
12361 The op tree representing the statement sequence is returned. This may
12362 be a null pointer if the statements were all null, for example if there
12363 were no statements or if there were only subroutine definitions (which
12364 have compile-time side effects). If not null, it will be a C<lineseq>
12365 list, normally including C<nextstate> or equivalent ops.
12367 If an error occurs in parsing or compilation, in most cases a valid op
12368 tree is returned anyway. The error is reflected in the parser state,
12369 normally resulting in a single exception at the top level of parsing
12370 which covers all the compilation errors that occurred. Some compilation
12371 errors, however, will throw an exception immediately.
12373 The I<flags> parameter is reserved for future use, and must always
12380 Perl_parse_stmtseq(pTHX_ U32 flags)
12385 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
12386 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
12387 c = lex_peek_unichar(0);
12388 if (c != -1 && c != /*{*/'}')
12389 qerror(Perl_mess(aTHX_ "Parse error"));
12395 * c-indentation-style: bsd
12396 * c-basic-offset: 4
12397 * indent-tabs-mode: nil
12400 * ex: set ts=8 sts=4 sw=4 et: