3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 =head1 Lexer interface
27 This is the lower layer of the Perl parser, managing characters and tokens.
29 =for apidoc AmU|yy_parser *|PL_parser
31 Pointer to a structure encapsulating the state of the parsing operation
32 currently in progress. The pointer can be locally changed to perform
33 a nested parse without interfering with the state of an outer parse.
34 Individual members of C<PL_parser> have their own documentation.
40 #define PERL_IN_TOKE_C
42 #include "dquote_static.c"
44 #define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
47 #define pl_yylval (PL_parser->yylval)
49 /* XXX temporary backwards compatibility */
50 #define PL_lex_brackets (PL_parser->lex_brackets)
51 #define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52 #define PL_lex_fakeeof (PL_parser->lex_fakeeof)
53 #define PL_lex_brackstack (PL_parser->lex_brackstack)
54 #define PL_lex_casemods (PL_parser->lex_casemods)
55 #define PL_lex_casestack (PL_parser->lex_casestack)
56 #define PL_lex_defer (PL_parser->lex_defer)
57 #define PL_lex_dojoin (PL_parser->lex_dojoin)
58 #define PL_lex_expect (PL_parser->lex_expect)
59 #define PL_lex_formbrack (PL_parser->lex_formbrack)
60 #define PL_lex_inpat (PL_parser->lex_inpat)
61 #define PL_lex_inwhat (PL_parser->lex_inwhat)
62 #define PL_lex_op (PL_parser->lex_op)
63 #define PL_lex_repl (PL_parser->lex_repl)
64 #define PL_lex_starts (PL_parser->lex_starts)
65 #define PL_lex_stuff (PL_parser->lex_stuff)
66 #define PL_multi_start (PL_parser->multi_start)
67 #define PL_multi_open (PL_parser->multi_open)
68 #define PL_multi_close (PL_parser->multi_close)
69 #define PL_preambled (PL_parser->preambled)
70 #define PL_sublex_info (PL_parser->sublex_info)
71 #define PL_linestr (PL_parser->linestr)
72 #define PL_expect (PL_parser->expect)
73 #define PL_copline (PL_parser->copline)
74 #define PL_bufptr (PL_parser->bufptr)
75 #define PL_oldbufptr (PL_parser->oldbufptr)
76 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
77 #define PL_linestart (PL_parser->linestart)
78 #define PL_bufend (PL_parser->bufend)
79 #define PL_last_uni (PL_parser->last_uni)
80 #define PL_last_lop (PL_parser->last_lop)
81 #define PL_last_lop_op (PL_parser->last_lop_op)
82 #define PL_lex_state (PL_parser->lex_state)
83 #define PL_rsfp (PL_parser->rsfp)
84 #define PL_rsfp_filters (PL_parser->rsfp_filters)
85 #define PL_in_my (PL_parser->in_my)
86 #define PL_in_my_stash (PL_parser->in_my_stash)
87 #define PL_tokenbuf (PL_parser->tokenbuf)
88 #define PL_multi_end (PL_parser->multi_end)
89 #define PL_error_count (PL_parser->error_count)
92 # define PL_endwhite (PL_parser->endwhite)
93 # define PL_faketokens (PL_parser->faketokens)
94 # define PL_lasttoke (PL_parser->lasttoke)
95 # define PL_nextwhite (PL_parser->nextwhite)
96 # define PL_realtokenstart (PL_parser->realtokenstart)
97 # define PL_skipwhite (PL_parser->skipwhite)
98 # define PL_thisclose (PL_parser->thisclose)
99 # define PL_thismad (PL_parser->thismad)
100 # define PL_thisopen (PL_parser->thisopen)
101 # define PL_thisstuff (PL_parser->thisstuff)
102 # define PL_thistoken (PL_parser->thistoken)
103 # define PL_thiswhite (PL_parser->thiswhite)
104 # define PL_thiswhite (PL_parser->thiswhite)
105 # define PL_nexttoke (PL_parser->nexttoke)
106 # define PL_curforce (PL_parser->curforce)
108 # define PL_nexttoke (PL_parser->nexttoke)
109 # define PL_nexttype (PL_parser->nexttype)
110 # define PL_nextval (PL_parser->nextval)
113 static const char* const ident_too_long = "Identifier too long";
116 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
117 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
119 # define CURMAD(slot,sv)
120 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
123 #define XENUMMASK 0x3f
124 #define XFAKEEOF 0x40
125 #define XFAKEBRACK 0x80
127 #ifdef USE_UTF8_SCRIPTS
128 # define UTF (!IN_BYTES)
130 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
133 /* The maximum number of characters preceding the unrecognized one to display */
134 #define UNRECOGNIZED_PRECEDE_COUNT 10
136 /* In variables named $^X, these are the legal values for X.
137 * 1999-02-27 mjd-perl-patch@plover.com */
138 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
140 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
142 /* LEX_* are values for PL_lex_state, the state of the lexer.
143 * They are arranged oddly so that the guard on the switch statement
144 * can get by with a single comparison (if the compiler is smart enough).
146 * These values refer to the various states within a sublex parse,
147 * i.e. within a double quotish string
150 /* #define LEX_NOTPARSING 11 is done in perl.h. */
152 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
153 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
154 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
155 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
156 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
158 /* at end of code, eg "$x" followed by: */
159 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
160 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
162 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
163 string or after \E, $foo, etc */
164 #define LEX_INTERPCONST 2 /* NOT USED */
165 #define LEX_FORMLINE 1 /* expecting a format line */
166 #define LEX_KNOWNEXT 0 /* next token known; just return it */
170 static const char* const lex_state_names[] = {
189 #include "keywords.h"
191 /* CLINE is a macro that ensures PL_copline has a sane value */
196 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
199 # define SKIPSPACE0(s) skipspace0(s)
200 # define SKIPSPACE1(s) skipspace1(s)
201 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
202 # define PEEKSPACE(s) skipspace2(s,0)
204 # define SKIPSPACE0(s) skipspace(s)
205 # define SKIPSPACE1(s) skipspace(s)
206 # define SKIPSPACE2(s,tsv) skipspace(s)
207 # define PEEKSPACE(s) skipspace(s)
211 * Convenience functions to return different tokens and prime the
212 * lexer for the next token. They all take an argument.
214 * TOKEN : generic token (used for '(', DOLSHARP, etc)
215 * OPERATOR : generic operator
216 * AOPERATOR : assignment operator
217 * PREBLOCK : beginning the block after an if, while, foreach, ...
218 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
219 * PREREF : *EXPR where EXPR is not a simple identifier
220 * TERM : expression term
221 * LOOPX : loop exiting command (goto, last, dump, etc)
222 * FTST : file test operator
223 * FUN0 : zero-argument function
224 * FUN0OP : zero-argument function, with its op created in this file
225 * FUN1 : not used, except for not, which isn't a UNIOP
226 * BOop : bitwise or or xor
228 * SHop : shift operator
229 * PWop : power operator
230 * PMop : pattern-matching operator
231 * Aop : addition-level operator
232 * Mop : multiplication-level operator
233 * Eop : equality-testing operator
234 * Rop : relational operator <= != gt
236 * Also see LOP and lop() below.
239 #ifdef DEBUGGING /* Serve -DT. */
240 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
242 # define REPORT(retval) (retval)
245 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
246 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
247 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
248 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
249 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
250 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
251 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
252 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
253 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
254 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
255 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
256 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
257 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
258 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
259 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
260 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
261 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
262 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
263 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
264 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
265 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
267 /* This bit of chicanery makes a unary function followed by
268 * a parenthesis into a function with one argument, highest precedence.
269 * The UNIDOR macro is for unary functions that can be followed by the //
270 * operator (such as C<shift // 0>).
272 #define UNI3(f,x,have_x) { \
273 pl_yylval.ival = f; \
274 if (have_x) PL_expect = x; \
276 PL_last_uni = PL_oldbufptr; \
277 PL_last_lop_op = f; \
279 return REPORT( (int)FUNC1 ); \
281 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
283 #define UNI(f) UNI3(f,XTERM,1)
284 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
285 #define UNIPROTO(f,optional) { \
286 if (optional) PL_last_uni = PL_oldbufptr; \
290 #define UNIBRACK(f) UNI3(f,0,0)
292 /* grandfather return to old style */
295 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
296 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
297 pl_yylval.ival = (f); \
303 #define COPLINE_INC_WITH_HERELINES \
305 CopLINE_inc(PL_curcop); \
306 if (PL_parser->lex_shared->herelines) \
307 CopLINE(PL_curcop) += PL_parser->lex_shared->herelines, \
308 PL_parser->lex_shared->herelines = 0; \
314 /* how to interpret the pl_yylval associated with the token */
318 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
323 static struct debug_tokens {
325 enum token_type type;
327 } const debug_tokens[] =
329 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
330 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
331 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
332 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
333 { ARROW, TOKENTYPE_NONE, "ARROW" },
334 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
335 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
336 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
337 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
338 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
339 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
340 { DO, TOKENTYPE_NONE, "DO" },
341 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
342 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
343 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
344 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
345 { ELSE, TOKENTYPE_NONE, "ELSE" },
346 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
347 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
348 { FOR, TOKENTYPE_IVAL, "FOR" },
349 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
350 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
351 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
352 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
353 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
354 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
355 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
356 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
357 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
358 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
359 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
360 { IF, TOKENTYPE_IVAL, "IF" },
361 { LABEL, TOKENTYPE_PVAL, "LABEL" },
362 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
363 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
364 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
365 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
366 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
367 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
368 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
369 { MY, TOKENTYPE_IVAL, "MY" },
370 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
371 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
372 { OROP, TOKENTYPE_IVAL, "OROP" },
373 { OROR, TOKENTYPE_NONE, "OROR" },
374 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
375 { PEG, TOKENTYPE_NONE, "PEG" },
376 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
377 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
378 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
379 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
380 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
381 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
382 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
383 { PREINC, TOKENTYPE_NONE, "PREINC" },
384 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
385 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
386 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
387 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
388 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
389 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
390 { SUB, TOKENTYPE_NONE, "SUB" },
391 { THING, TOKENTYPE_OPVAL, "THING" },
392 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
393 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
394 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
395 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
396 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
397 { USE, TOKENTYPE_IVAL, "USE" },
398 { WHEN, TOKENTYPE_IVAL, "WHEN" },
399 { WHILE, TOKENTYPE_IVAL, "WHILE" },
400 { WORD, TOKENTYPE_OPVAL, "WORD" },
401 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
402 { 0, TOKENTYPE_NONE, NULL }
405 /* dump the returned token in rv, plus any optional arg in pl_yylval */
408 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
412 PERL_ARGS_ASSERT_TOKEREPORT;
415 const char *name = NULL;
416 enum token_type type = TOKENTYPE_NONE;
417 const struct debug_tokens *p;
418 SV* const report = newSVpvs("<== ");
420 for (p = debug_tokens; p->token; p++) {
421 if (p->token == (int)rv) {
428 Perl_sv_catpv(aTHX_ report, name);
429 else if ((char)rv > ' ' && (char)rv <= '~')
430 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
432 sv_catpvs(report, "EOF");
434 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
439 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
441 case TOKENTYPE_OPNUM:
442 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
443 PL_op_name[lvalp->ival]);
446 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
448 case TOKENTYPE_OPVAL:
450 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
451 PL_op_name[lvalp->opval->op_type]);
452 if (lvalp->opval->op_type == OP_CONST) {
453 Perl_sv_catpvf(aTHX_ report, " %s",
454 SvPEEK(cSVOPx_sv(lvalp->opval)));
459 sv_catpvs(report, "(opval=null)");
462 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
468 /* print the buffer with suitable escapes */
471 S_printbuf(pTHX_ const char *const fmt, const char *const s)
473 SV* const tmp = newSVpvs("");
475 PERL_ARGS_ASSERT_PRINTBUF;
477 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
484 S_deprecate_commaless_var_list(pTHX) {
486 deprecate("comma-less variable list");
487 return REPORT(','); /* grandfather non-comma-format format */
493 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
494 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
498 S_ao(pTHX_ int toketype)
501 if (*PL_bufptr == '=') {
503 if (toketype == ANDAND)
504 pl_yylval.ival = OP_ANDASSIGN;
505 else if (toketype == OROR)
506 pl_yylval.ival = OP_ORASSIGN;
507 else if (toketype == DORDOR)
508 pl_yylval.ival = OP_DORASSIGN;
516 * When Perl expects an operator and finds something else, no_op
517 * prints the warning. It always prints "<something> found where
518 * operator expected. It prints "Missing semicolon on previous line?"
519 * if the surprise occurs at the start of the line. "do you need to
520 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
521 * where the compiler doesn't know if foo is a method call or a function.
522 * It prints "Missing operator before end of line" if there's nothing
523 * after the missing operator, or "... before <...>" if there is something
524 * after the missing operator.
528 S_no_op(pTHX_ const char *const what, char *s)
531 char * const oldbp = PL_bufptr;
532 const bool is_first = (PL_oldbufptr == PL_linestart);
534 PERL_ARGS_ASSERT_NO_OP;
540 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
541 if (ckWARN_d(WARN_SYNTAX)) {
543 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
544 "\t(Missing semicolon on previous line?)\n");
545 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
547 for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
548 t += UTF ? UTF8SKIP(t) : 1)
550 if (t < PL_bufptr && isSPACE(*t))
551 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
552 "\t(Do you need to predeclare %"SVf"?)\n",
553 SVfARG(newSVpvn_flags(PL_oldoldbufptr, (STRLEN)(t - PL_oldoldbufptr),
554 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
558 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
559 "\t(Missing operator before %"SVf"?)\n",
560 SVfARG(newSVpvn_flags(oldbp, (STRLEN)(s - oldbp),
561 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
569 * Complain about missing quote/regexp/heredoc terminator.
570 * If it's called with NULL then it cauterizes the line buffer.
571 * If we're in a delimited string and the delimiter is a control
572 * character, it's reformatted into a two-char sequence like ^C.
577 S_missingterm(pTHX_ char *s)
583 char * const nl = strrchr(s,'\n');
587 else if (isCNTRL(PL_multi_close)) {
589 tmpbuf[1] = (char)toCTRL(PL_multi_close);
594 *tmpbuf = (char)PL_multi_close;
598 q = strchr(s,'"') ? '\'' : '"';
599 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
605 * Check whether the named feature is enabled.
608 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
611 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
613 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
615 assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
617 if (namelen > MAX_FEATURE_LEN)
619 memcpy(&he_name[8], name, namelen);
621 return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
622 REFCOUNTED_HE_EXISTS));
626 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
627 * utf16-to-utf8-reversed.
630 #ifdef PERL_CR_FILTER
634 const char *s = SvPVX_const(sv);
635 const char * const e = s + SvCUR(sv);
637 PERL_ARGS_ASSERT_STRIP_RETURN;
639 /* outer loop optimized to do nothing if there are no CR-LFs */
641 if (*s++ == '\r' && *s == '\n') {
642 /* hit a CR-LF, need to copy the rest */
646 if (*s == '\r' && s[1] == '\n')
657 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
659 const I32 count = FILTER_READ(idx+1, sv, maxlen);
660 if (count > 0 && !maxlen)
667 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
669 Creates and initialises a new lexer/parser state object, supplying
670 a context in which to lex and parse from a new source of Perl code.
671 A pointer to the new state object is placed in L</PL_parser>. An entry
672 is made on the save stack so that upon unwinding the new state object
673 will be destroyed and the former value of L</PL_parser> will be restored.
674 Nothing else need be done to clean up the parsing context.
676 The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
677 non-null, provides a string (in SV form) containing code to be parsed.
678 A copy of the string is made, so subsequent modification of I<line>
679 does not affect parsing. I<rsfp>, if non-null, provides an input stream
680 from which code will be read to be parsed. If both are non-null, the
681 code in I<line> comes first and must consist of complete lines of input,
682 and I<rsfp> supplies the remainder of the source.
684 The I<flags> parameter is reserved for future use. Currently it is only
685 used by perl internally, so extensions should always pass zero.
690 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
691 can share filters with the current parser.
692 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
693 caller, hence isn't owned by the parser, so shouldn't be closed on parser
694 destruction. This is used to handle the case of defaulting to reading the
695 script from the standard input because no filename was given on the command
696 line (without getting confused by situation where STDIN has been closed, so
697 the script handle is opened on fd 0) */
700 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
703 const char *s = NULL;
704 yy_parser *parser, *oparser;
705 if (flags && flags & ~LEX_START_FLAGS)
706 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
708 /* create and initialise a parser */
710 Newxz(parser, 1, yy_parser);
711 parser->old_parser = oparser = PL_parser;
714 parser->stack = NULL;
716 parser->stack_size = 0;
718 /* on scope exit, free this parser and restore any outer one */
720 parser->saved_curcop = PL_curcop;
722 /* initialise lexer state */
725 parser->curforce = -1;
727 parser->nexttoke = 0;
729 parser->error_count = oparser ? oparser->error_count : 0;
730 parser->copline = NOLINE;
731 parser->lex_state = LEX_NORMAL;
732 parser->expect = XSTATE;
734 parser->rsfp_filters =
735 !(flags & LEX_START_SAME_FILTER) || !oparser
737 : MUTABLE_AV(SvREFCNT_inc(
738 oparser->rsfp_filters
739 ? oparser->rsfp_filters
740 : (oparser->rsfp_filters = newAV())
743 Newx(parser->lex_brackstack, 120, char);
744 Newx(parser->lex_casestack, 12, char);
745 *parser->lex_casestack = '\0';
746 Newxz(parser->lex_shared, 1, LEXSHARED);
750 s = SvPV_const(line, len);
751 parser->linestr = flags & LEX_START_COPIED
752 ? SvREFCNT_inc_simple_NN(line)
753 : newSVpvn_flags(s, len, SvUTF8(line));
754 sv_catpvs(parser->linestr, "\n;");
756 parser->linestr = newSVpvs("\n;");
758 parser->oldoldbufptr =
761 parser->linestart = SvPVX(parser->linestr);
762 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
763 parser->last_lop = parser->last_uni = NULL;
764 parser->lex_flags = flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
765 |LEX_DONT_CLOSE_RSFP);
767 parser->in_pod = parser->filtered = 0;
771 /* delete a parser object */
774 Perl_parser_free(pTHX_ const yy_parser *parser)
776 PERL_ARGS_ASSERT_PARSER_FREE;
778 PL_curcop = parser->saved_curcop;
779 SvREFCNT_dec(parser->linestr);
781 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
782 PerlIO_clearerr(parser->rsfp);
783 else if (parser->rsfp && (!parser->old_parser ||
784 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
785 PerlIO_close(parser->rsfp);
786 SvREFCNT_dec(parser->rsfp_filters);
787 SvREFCNT_dec(parser->lex_stuff);
788 SvREFCNT_dec(parser->sublex_info.repl);
790 Safefree(parser->lex_brackstack);
791 Safefree(parser->lex_casestack);
792 Safefree(parser->lex_shared);
793 PL_parser = parser->old_parser;
798 Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab)
801 I32 nexttoke = parser->lasttoke;
803 I32 nexttoke = parser->nexttoke;
805 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
808 if (S_is_opval_token(parser->nexttoke[nexttoke].next_type
810 && parser->nexttoke[nexttoke].next_val.opval
811 && parser->nexttoke[nexttoke].next_val.opval->op_slabbed
812 && OpSLAB(parser->nexttoke[nexttoke].next_val.opval) == slab) {
813 op_free(parser->nexttoke[nexttoke].next_val.opval);
814 parser->nexttoke[nexttoke].next_val.opval = NULL;
817 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
818 && parser->nextval[nexttoke].opval
819 && parser->nextval[nexttoke].opval->op_slabbed
820 && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
821 op_free(parser->nextval[nexttoke].opval);
822 parser->nextval[nexttoke].opval = NULL;
830 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
832 Buffer scalar containing the chunk currently under consideration of the
833 text currently being lexed. This is always a plain string scalar (for
834 which C<SvPOK> is true). It is not intended to be used as a scalar by
835 normal scalar means; instead refer to the buffer directly by the pointer
836 variables described below.
838 The lexer maintains various C<char*> pointers to things in the
839 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
840 reallocated, all of these pointers must be updated. Don't attempt to
841 do this manually, but rather use L</lex_grow_linestr> if you need to
842 reallocate the buffer.
844 The content of the text chunk in the buffer is commonly exactly one
845 complete line of input, up to and including a newline terminator,
846 but there are situations where it is otherwise. The octets of the
847 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
848 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
849 flag on this scalar, which may disagree with it.
851 For direct examination of the buffer, the variable
852 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
853 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
854 of these pointers is usually preferable to examination of the scalar
855 through normal scalar means.
857 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
859 Direct pointer to the end of the chunk of text currently being lexed, the
860 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
861 + SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
862 always located at the end of the buffer, and does not count as part of
863 the buffer's contents.
865 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
867 Points to the current position of lexing inside the lexer buffer.
868 Characters around this point may be freely examined, within
869 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
870 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
871 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
873 Lexing code (whether in the Perl core or not) moves this pointer past
874 the characters that it consumes. It is also expected to perform some
875 bookkeeping whenever a newline character is consumed. This movement
876 can be more conveniently performed by the function L</lex_read_to>,
877 which handles newlines appropriately.
879 Interpretation of the buffer's octets can be abstracted out by
880 using the slightly higher-level functions L</lex_peek_unichar> and
881 L</lex_read_unichar>.
883 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
885 Points to the start of the current line inside the lexer buffer.
886 This is useful for indicating at which column an error occurred, and
887 not much else. This must be updated by any lexing code that consumes
888 a newline; the function L</lex_read_to> handles this detail.
894 =for apidoc Amx|bool|lex_bufutf8
896 Indicates whether the octets in the lexer buffer
897 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
898 of Unicode characters. If not, they should be interpreted as Latin-1
899 characters. This is analogous to the C<SvUTF8> flag for scalars.
901 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
902 contains valid UTF-8. Lexing code must be robust in the face of invalid
905 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
906 is significant, but not the whole story regarding the input character
907 encoding. Normally, when a file is being read, the scalar contains octets
908 and its C<SvUTF8> flag is off, but the octets should be interpreted as
909 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
910 however, the scalar may have the C<SvUTF8> flag on, and in this case its
911 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
912 is in effect. This logic may change in the future; use this function
913 instead of implementing the logic yourself.
919 Perl_lex_bufutf8(pTHX)
925 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
927 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
928 at least I<len> octets (including terminating NUL). Returns a
929 pointer to the reallocated buffer. This is necessary before making
930 any direct modification of the buffer that would increase its length.
931 L</lex_stuff_pvn> provides a more convenient way to insert text into
934 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
935 this function updates all of the lexer's variables that point directly
942 Perl_lex_grow_linestr(pTHX_ STRLEN len)
946 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
947 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
948 linestr = PL_parser->linestr;
949 buf = SvPVX(linestr);
950 if (len <= SvLEN(linestr))
952 bufend_pos = PL_parser->bufend - buf;
953 bufptr_pos = PL_parser->bufptr - buf;
954 oldbufptr_pos = PL_parser->oldbufptr - buf;
955 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
956 linestart_pos = PL_parser->linestart - buf;
957 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
958 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
959 re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
960 PL_parser->lex_shared->re_eval_start - buf : 0;
962 buf = sv_grow(linestr, len);
964 PL_parser->bufend = buf + bufend_pos;
965 PL_parser->bufptr = buf + bufptr_pos;
966 PL_parser->oldbufptr = buf + oldbufptr_pos;
967 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
968 PL_parser->linestart = buf + linestart_pos;
969 if (PL_parser->last_uni)
970 PL_parser->last_uni = buf + last_uni_pos;
971 if (PL_parser->last_lop)
972 PL_parser->last_lop = buf + last_lop_pos;
973 if (PL_parser->lex_shared->re_eval_start)
974 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
979 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
981 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
982 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
983 reallocating the buffer if necessary. This means that lexing code that
984 runs later will see the characters as if they had appeared in the input.
985 It is not recommended to do this as part of normal parsing, and most
986 uses of this facility run the risk of the inserted characters being
987 interpreted in an unintended manner.
989 The string to be inserted is represented by I<len> octets starting
990 at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
991 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
992 The characters are recoded for the lexer buffer, according to how the
993 buffer is currently being interpreted (L</lex_bufutf8>). If a string
994 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
995 function is more convenient.
1001 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1005 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1006 if (flags & ~(LEX_STUFF_UTF8))
1007 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1009 if (flags & LEX_STUFF_UTF8) {
1012 STRLEN highhalf = 0; /* Count of variants */
1013 const char *p, *e = pv+len;
1014 for (p = pv; p != e; p++) {
1015 if (! UTF8_IS_INVARIANT(*p)) {
1021 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1022 bufptr = PL_parser->bufptr;
1023 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1024 SvCUR_set(PL_parser->linestr,
1025 SvCUR(PL_parser->linestr) + len+highhalf);
1026 PL_parser->bufend += len+highhalf;
1027 for (p = pv; p != e; p++) {
1029 if (! UTF8_IS_INVARIANT(c)) {
1030 *bufptr++ = UTF8_TWO_BYTE_HI(c);
1031 *bufptr++ = UTF8_TWO_BYTE_LO(c);
1033 *bufptr++ = (char)c;
1038 if (flags & LEX_STUFF_UTF8) {
1039 STRLEN highhalf = 0;
1040 const char *p, *e = pv+len;
1041 for (p = pv; p != e; p++) {
1043 if (UTF8_IS_ABOVE_LATIN1(c)) {
1044 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1045 "non-Latin-1 character into Latin-1 input");
1046 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1049 } else if (! UTF8_IS_INVARIANT(c)) {
1050 /* malformed UTF-8 */
1052 SAVESPTR(PL_warnhook);
1053 PL_warnhook = PERL_WARNHOOK_FATAL;
1054 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
1060 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1061 bufptr = PL_parser->bufptr;
1062 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1063 SvCUR_set(PL_parser->linestr,
1064 SvCUR(PL_parser->linestr) + len-highhalf);
1065 PL_parser->bufend += len-highhalf;
1068 if (UTF8_IS_INVARIANT(*p)) {
1074 *bufptr++ = TWO_BYTE_UTF8_TO_UNI(*p, *(p+1));
1080 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1081 bufptr = PL_parser->bufptr;
1082 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1083 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1084 PL_parser->bufend += len;
1085 Copy(pv, bufptr, len, char);
1091 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1093 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1094 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1095 reallocating the buffer if necessary. This means that lexing code that
1096 runs later will see the characters as if they had appeared in the input.
1097 It is not recommended to do this as part of normal parsing, and most
1098 uses of this facility run the risk of the inserted characters being
1099 interpreted in an unintended manner.
1101 The string to be inserted is represented by octets starting at I<pv>
1102 and continuing to the first nul. These octets are interpreted as either
1103 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1104 in I<flags>. The characters are recoded for the lexer buffer, according
1105 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1106 If it is not convenient to nul-terminate a string to be inserted, the
1107 L</lex_stuff_pvn> function is more appropriate.
1113 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1115 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1116 lex_stuff_pvn(pv, strlen(pv), flags);
1120 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1122 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1123 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1124 reallocating the buffer if necessary. This means that lexing code that
1125 runs later will see the characters as if they had appeared in the input.
1126 It is not recommended to do this as part of normal parsing, and most
1127 uses of this facility run the risk of the inserted characters being
1128 interpreted in an unintended manner.
1130 The string to be inserted is the string value of I<sv>. The characters
1131 are recoded for the lexer buffer, according to how the buffer is currently
1132 being interpreted (L</lex_bufutf8>). If a string to be inserted is
1133 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1134 need to construct a scalar.
1140 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1144 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1146 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1148 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1152 =for apidoc Amx|void|lex_unstuff|char *ptr
1154 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1155 I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1156 This hides the discarded text from any lexing code that runs later,
1157 as if the text had never appeared.
1159 This is not the normal way to consume lexed text. For that, use
1166 Perl_lex_unstuff(pTHX_ char *ptr)
1170 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1171 buf = PL_parser->bufptr;
1173 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1176 bufend = PL_parser->bufend;
1178 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1179 unstuff_len = ptr - buf;
1180 Move(ptr, buf, bufend+1-ptr, char);
1181 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1182 PL_parser->bufend = bufend - unstuff_len;
1186 =for apidoc Amx|void|lex_read_to|char *ptr
1188 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1189 to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1190 performing the correct bookkeeping whenever a newline character is passed.
1191 This is the normal way to consume lexed text.
1193 Interpretation of the buffer's octets can be abstracted out by
1194 using the slightly higher-level functions L</lex_peek_unichar> and
1195 L</lex_read_unichar>.
1201 Perl_lex_read_to(pTHX_ char *ptr)
1204 PERL_ARGS_ASSERT_LEX_READ_TO;
1205 s = PL_parser->bufptr;
1206 if (ptr < s || ptr > PL_parser->bufend)
1207 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1208 for (; s != ptr; s++)
1210 COPLINE_INC_WITH_HERELINES;
1211 PL_parser->linestart = s+1;
1213 PL_parser->bufptr = ptr;
1217 =for apidoc Amx|void|lex_discard_to|char *ptr
1219 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1220 up to I<ptr>. The remaining content of the buffer will be moved, and
1221 all pointers into the buffer updated appropriately. I<ptr> must not
1222 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1223 it is not permitted to discard text that has yet to be lexed.
1225 Normally it is not necessarily to do this directly, because it suffices to
1226 use the implicit discarding behaviour of L</lex_next_chunk> and things
1227 based on it. However, if a token stretches across multiple lines,
1228 and the lexing code has kept multiple lines of text in the buffer for
1229 that purpose, then after completion of the token it would be wise to
1230 explicitly discard the now-unneeded earlier lines, to avoid future
1231 multi-line tokens growing the buffer without bound.
1237 Perl_lex_discard_to(pTHX_ char *ptr)
1241 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1242 buf = SvPVX(PL_parser->linestr);
1244 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1247 if (ptr > PL_parser->bufptr)
1248 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1249 discard_len = ptr - buf;
1250 if (PL_parser->oldbufptr < ptr)
1251 PL_parser->oldbufptr = ptr;
1252 if (PL_parser->oldoldbufptr < ptr)
1253 PL_parser->oldoldbufptr = ptr;
1254 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1255 PL_parser->last_uni = NULL;
1256 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1257 PL_parser->last_lop = NULL;
1258 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1259 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1260 PL_parser->bufend -= discard_len;
1261 PL_parser->bufptr -= discard_len;
1262 PL_parser->oldbufptr -= discard_len;
1263 PL_parser->oldoldbufptr -= discard_len;
1264 if (PL_parser->last_uni)
1265 PL_parser->last_uni -= discard_len;
1266 if (PL_parser->last_lop)
1267 PL_parser->last_lop -= discard_len;
1271 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1273 Reads in the next chunk of text to be lexed, appending it to
1274 L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1275 looked to the end of the current chunk and wants to know more. It is
1276 usual, but not necessary, for lexing to have consumed the entirety of
1277 the current chunk at this time.
1279 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1280 chunk (i.e., the current chunk has been entirely consumed), normally the
1281 current chunk will be discarded at the same time that the new chunk is
1282 read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1283 will not be discarded. If the current chunk has not been entirely
1284 consumed, then it will not be discarded regardless of the flag.
1286 Returns true if some new text was added to the buffer, or false if the
1287 buffer has reached the end of the input text.
1292 #define LEX_FAKE_EOF 0x80000000
1293 #define LEX_NO_TERM 0x40000000
1296 Perl_lex_next_chunk(pTHX_ U32 flags)
1300 STRLEN old_bufend_pos, new_bufend_pos;
1301 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1302 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1303 bool got_some_for_debugger = 0;
1305 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1306 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1307 linestr = PL_parser->linestr;
1308 buf = SvPVX(linestr);
1309 if (!(flags & LEX_KEEP_PREVIOUS) &&
1310 PL_parser->bufptr == PL_parser->bufend) {
1311 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1313 if (PL_parser->last_uni != PL_parser->bufend)
1314 PL_parser->last_uni = NULL;
1315 if (PL_parser->last_lop != PL_parser->bufend)
1316 PL_parser->last_lop = NULL;
1317 last_uni_pos = last_lop_pos = 0;
1321 old_bufend_pos = PL_parser->bufend - buf;
1322 bufptr_pos = PL_parser->bufptr - buf;
1323 oldbufptr_pos = PL_parser->oldbufptr - buf;
1324 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1325 linestart_pos = PL_parser->linestart - buf;
1326 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1327 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1329 if (flags & LEX_FAKE_EOF) {
1331 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1333 } else if (filter_gets(linestr, old_bufend_pos)) {
1335 got_some_for_debugger = 1;
1336 } else if (flags & LEX_NO_TERM) {
1339 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1340 sv_setpvs(linestr, "");
1342 /* End of real input. Close filehandle (unless it was STDIN),
1343 * then add implicit termination.
1345 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1346 PerlIO_clearerr(PL_parser->rsfp);
1347 else if (PL_parser->rsfp)
1348 (void)PerlIO_close(PL_parser->rsfp);
1349 PL_parser->rsfp = NULL;
1350 PL_parser->in_pod = PL_parser->filtered = 0;
1352 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1355 if (!PL_in_eval && PL_minus_p) {
1357 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1358 PL_minus_n = PL_minus_p = 0;
1359 } else if (!PL_in_eval && PL_minus_n) {
1360 sv_catpvs(linestr, /*{*/";}");
1363 sv_catpvs(linestr, ";");
1366 buf = SvPVX(linestr);
1367 new_bufend_pos = SvCUR(linestr);
1368 PL_parser->bufend = buf + new_bufend_pos;
1369 PL_parser->bufptr = buf + bufptr_pos;
1370 PL_parser->oldbufptr = buf + oldbufptr_pos;
1371 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1372 PL_parser->linestart = buf + linestart_pos;
1373 if (PL_parser->last_uni)
1374 PL_parser->last_uni = buf + last_uni_pos;
1375 if (PL_parser->last_lop)
1376 PL_parser->last_lop = buf + last_lop_pos;
1377 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1378 PL_curstash != PL_debstash) {
1379 /* debugger active and we're not compiling the debugger code,
1380 * so store the line into the debugger's array of lines
1382 update_debugger_info(NULL, buf+old_bufend_pos,
1383 new_bufend_pos-old_bufend_pos);
1389 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1391 Looks ahead one (Unicode) character in the text currently being lexed.
1392 Returns the codepoint (unsigned integer value) of the next character,
1393 or -1 if lexing has reached the end of the input text. To consume the
1394 peeked character, use L</lex_read_unichar>.
1396 If the next character is in (or extends into) the next chunk of input
1397 text, the next chunk will be read in. Normally the current chunk will be
1398 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1399 then the current chunk will not be discarded.
1401 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1402 is encountered, an exception is generated.
1408 Perl_lex_peek_unichar(pTHX_ U32 flags)
1412 if (flags & ~(LEX_KEEP_PREVIOUS))
1413 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1414 s = PL_parser->bufptr;
1415 bufend = PL_parser->bufend;
1421 if (!lex_next_chunk(flags))
1423 s = PL_parser->bufptr;
1424 bufend = PL_parser->bufend;
1427 if (UTF8_IS_INVARIANT(head))
1429 if (UTF8_IS_START(head)) {
1430 len = UTF8SKIP(&head);
1431 while ((STRLEN)(bufend-s) < len) {
1432 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1434 s = PL_parser->bufptr;
1435 bufend = PL_parser->bufend;
1438 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1439 if (retlen == (STRLEN)-1) {
1440 /* malformed UTF-8 */
1442 SAVESPTR(PL_warnhook);
1443 PL_warnhook = PERL_WARNHOOK_FATAL;
1444 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1450 if (!lex_next_chunk(flags))
1452 s = PL_parser->bufptr;
1459 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1461 Reads the next (Unicode) character in the text currently being lexed.
1462 Returns the codepoint (unsigned integer value) of the character read,
1463 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1464 if lexing has reached the end of the input text. To non-destructively
1465 examine the next character, use L</lex_peek_unichar> instead.
1467 If the next character is in (or extends into) the next chunk of input
1468 text, the next chunk will be read in. Normally the current chunk will be
1469 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1470 then the current chunk will not be discarded.
1472 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1473 is encountered, an exception is generated.
1479 Perl_lex_read_unichar(pTHX_ U32 flags)
1482 if (flags & ~(LEX_KEEP_PREVIOUS))
1483 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1484 c = lex_peek_unichar(flags);
1487 COPLINE_INC_WITH_HERELINES;
1489 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1491 ++(PL_parser->bufptr);
1497 =for apidoc Amx|void|lex_read_space|U32 flags
1499 Reads optional spaces, in Perl style, in the text currently being
1500 lexed. The spaces may include ordinary whitespace characters and
1501 Perl-style comments. C<#line> directives are processed if encountered.
1502 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1503 at a non-space character (or the end of the input text).
1505 If spaces extend into the next chunk of input text, the next chunk will
1506 be read in. Normally the current chunk will be discarded at the same
1507 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1508 chunk will not be discarded.
1513 #define LEX_NO_NEXT_CHUNK 0x80000000
1516 Perl_lex_read_space(pTHX_ U32 flags)
1519 bool need_incline = 0;
1520 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1521 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1524 sv_free(PL_skipwhite);
1525 PL_skipwhite = NULL;
1528 PL_skipwhite = newSVpvs("");
1529 #endif /* PERL_MAD */
1530 s = PL_parser->bufptr;
1531 bufend = PL_parser->bufend;
1537 } while (!(c == '\n' || (c == 0 && s == bufend)));
1538 } else if (c == '\n') {
1540 PL_parser->linestart = s;
1545 } else if (isSPACE(c)) {
1547 } else if (c == 0 && s == bufend) {
1551 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1552 #endif /* PERL_MAD */
1553 if (flags & LEX_NO_NEXT_CHUNK)
1555 PL_parser->bufptr = s;
1556 COPLINE_INC_WITH_HERELINES;
1557 got_more = lex_next_chunk(flags);
1558 CopLINE_dec(PL_curcop);
1559 s = PL_parser->bufptr;
1560 bufend = PL_parser->bufend;
1563 if (need_incline && PL_parser->rsfp) {
1573 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1574 #endif /* PERL_MAD */
1575 PL_parser->bufptr = s;
1580 * This subroutine has nothing to do with tilting, whether at windmills
1581 * or pinball tables. Its name is short for "increment line". It
1582 * increments the current line number in CopLINE(PL_curcop) and checks
1583 * to see whether the line starts with a comment of the form
1584 * # line 500 "foo.pm"
1585 * If so, it sets the current line number and file to the values in the comment.
1589 S_incline(pTHX_ const char *s)
1597 PERL_ARGS_ASSERT_INCLINE;
1599 COPLINE_INC_WITH_HERELINES;
1600 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1601 && s+1 == PL_bufend && *s == ';') {
1602 /* fake newline in string eval */
1603 CopLINE_dec(PL_curcop);
1608 while (SPACE_OR_TAB(*s))
1610 if (strnEQ(s, "line", 4))
1614 if (SPACE_OR_TAB(*s))
1618 while (SPACE_OR_TAB(*s))
1626 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1628 while (SPACE_OR_TAB(*s))
1630 if (*s == '"' && (t = strchr(s+1, '"'))) {
1636 while (!isSPACE(*t))
1640 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1642 if (*e != '\n' && *e != '\0')
1643 return; /* false alarm */
1645 line_num = atoi(n)-1;
1648 const STRLEN len = t - s;
1649 SV *const temp_sv = CopFILESV(PL_curcop);
1654 cf = SvPVX(temp_sv);
1655 tmplen = SvCUR(temp_sv);
1661 if (!PL_rsfp && !PL_parser->filtered) {
1662 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1663 * to *{"::_<newfilename"} */
1664 /* However, the long form of evals is only turned on by the
1665 debugger - usually they're "(eval %lu)" */
1669 STRLEN tmplen2 = len;
1670 if (tmplen + 2 <= sizeof smallbuf)
1673 Newx(tmpbuf, tmplen + 2, char);
1676 memcpy(tmpbuf + 2, cf, tmplen);
1678 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1683 if (tmplen2 + 2 <= sizeof smallbuf)
1686 Newx(tmpbuf2, tmplen2 + 2, char);
1688 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1689 /* Either they malloc'd it, or we malloc'd it,
1690 so no prefix is present in ours. */
1695 memcpy(tmpbuf2 + 2, s, tmplen2);
1698 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1700 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1701 /* adjust ${"::_<newfilename"} to store the new file name */
1702 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1703 /* The line number may differ. If that is the case,
1704 alias the saved lines that are in the array.
1705 Otherwise alias the whole array. */
1706 if (CopLINE(PL_curcop) == line_num) {
1707 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1708 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1710 else if (GvAV(*gvp)) {
1711 AV * const av = GvAV(*gvp);
1712 const I32 start = CopLINE(PL_curcop)+1;
1713 I32 items = AvFILLp(av) - start;
1715 AV * const av2 = GvAVn(gv2);
1716 SV **svp = AvARRAY(av) + start;
1717 I32 l = (I32)line_num+1;
1719 av_store(av2, l++, SvREFCNT_inc(*svp++));
1724 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1726 if (tmpbuf != smallbuf) Safefree(tmpbuf);
1728 CopFILE_free(PL_curcop);
1729 CopFILE_setn(PL_curcop, s, len);
1731 CopLINE_set(PL_curcop, line_num);
1735 /* skip space before PL_thistoken */
1738 S_skipspace0(pTHX_ char *s)
1740 PERL_ARGS_ASSERT_SKIPSPACE0;
1747 PL_thiswhite = newSVpvs("");
1748 sv_catsv(PL_thiswhite, PL_skipwhite);
1749 sv_free(PL_skipwhite);
1752 PL_realtokenstart = s - SvPVX(PL_linestr);
1756 /* skip space after PL_thistoken */
1759 S_skipspace1(pTHX_ char *s)
1761 const char *start = s;
1762 I32 startoff = start - SvPVX(PL_linestr);
1764 PERL_ARGS_ASSERT_SKIPSPACE1;
1769 start = SvPVX(PL_linestr) + startoff;
1770 if (!PL_thistoken && PL_realtokenstart >= 0) {
1771 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1772 PL_thistoken = newSVpvn(tstart, start - tstart);
1774 PL_realtokenstart = -1;
1777 PL_nextwhite = newSVpvs("");
1778 sv_catsv(PL_nextwhite, PL_skipwhite);
1779 sv_free(PL_skipwhite);
1786 S_skipspace2(pTHX_ char *s, SV **svp)
1789 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1790 const I32 startoff = s - SvPVX(PL_linestr);
1792 PERL_ARGS_ASSERT_SKIPSPACE2;
1795 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1796 if (!PL_madskills || !svp)
1798 start = SvPVX(PL_linestr) + startoff;
1799 if (!PL_thistoken && PL_realtokenstart >= 0) {
1800 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1801 PL_thistoken = newSVpvn(tstart, start - tstart);
1802 PL_realtokenstart = -1;
1806 *svp = newSVpvs("");
1807 sv_setsv(*svp, PL_skipwhite);
1808 sv_free(PL_skipwhite);
1817 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1819 AV *av = CopFILEAVx(PL_curcop);
1821 SV * const sv = newSV_type(SVt_PVMG);
1823 sv_setsv(sv, orig_sv);
1825 sv_setpvn(sv, buf, len);
1828 av_store(av, (I32)CopLINE(PL_curcop), sv);
1834 * Called to gobble the appropriate amount and type of whitespace.
1835 * Skips comments as well.
1839 S_skipspace(pTHX_ char *s)
1843 #endif /* PERL_MAD */
1844 PERL_ARGS_ASSERT_SKIPSPACE;
1847 sv_free(PL_skipwhite);
1848 PL_skipwhite = NULL;
1850 #endif /* PERL_MAD */
1851 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1852 while (s < PL_bufend && SPACE_OR_TAB(*s))
1855 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1857 lex_read_space(LEX_KEEP_PREVIOUS |
1858 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1859 LEX_NO_NEXT_CHUNK : 0));
1861 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1862 if (PL_linestart > PL_bufptr)
1863 PL_bufptr = PL_linestart;
1868 PL_skipwhite = newSVpvn(start, s-start);
1869 #endif /* PERL_MAD */
1875 * Check the unary operators to ensure there's no ambiguity in how they're
1876 * used. An ambiguous piece of code would be:
1878 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1879 * the +5 is its argument.
1889 if (PL_oldoldbufptr != PL_last_uni)
1891 while (isSPACE(*PL_last_uni))
1894 while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
1896 if ((t = strchr(s, '(')) && t < PL_bufptr)
1899 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1900 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1901 (int)(s - PL_last_uni), PL_last_uni);
1905 * LOP : macro to build a list operator. Its behaviour has been replaced
1906 * with a subroutine, S_lop() for which LOP is just another name.
1909 #define LOP(f,x) return lop(f,x,s)
1913 * Build a list operator (or something that might be one). The rules:
1914 * - if we have a next token, then it's a list operator [why?]
1915 * - if the next thing is an opening paren, then it's a function
1916 * - else it's a list operator
1920 S_lop(pTHX_ I32 f, int x, char *s)
1924 PERL_ARGS_ASSERT_LOP;
1930 PL_last_lop = PL_oldbufptr;
1931 PL_last_lop_op = (OPCODE)f;
1940 return REPORT(FUNC);
1943 return REPORT(FUNC);
1946 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1947 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1948 return REPORT(LSTOP);
1955 * Sets up for an eventual force_next(). start_force(0) basically does
1956 * an unshift, while start_force(-1) does a push. yylex removes items
1961 S_start_force(pTHX_ int where)
1965 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1966 where = PL_lasttoke;
1967 assert(PL_curforce < 0 || PL_curforce == where);
1968 if (PL_curforce != where) {
1969 for (i = PL_lasttoke; i > where; --i) {
1970 PL_nexttoke[i] = PL_nexttoke[i-1];
1974 if (PL_curforce < 0) /* in case of duplicate start_force() */
1975 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1976 PL_curforce = where;
1979 curmad('^', newSVpvs(""));
1980 CURMAD('_', PL_nextwhite);
1985 S_curmad(pTHX_ char slot, SV *sv)
1991 if (PL_curforce < 0)
1992 where = &PL_thismad;
1994 where = &PL_nexttoke[PL_curforce].next_mad;
2000 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
2002 else if (PL_encoding) {
2003 sv_recode_to_utf8(sv, PL_encoding);
2008 /* keep a slot open for the head of the list? */
2009 if (slot != '_' && *where && (*where)->mad_key == '^') {
2010 (*where)->mad_key = slot;
2011 sv_free(MUTABLE_SV(((*where)->mad_val)));
2012 (*where)->mad_val = (void*)sv;
2015 addmad(newMADsv(slot, sv), where, 0);
2018 # define start_force(where) NOOP
2019 # define curmad(slot, sv) NOOP
2024 * When the lexer realizes it knows the next token (for instance,
2025 * it is reordering tokens for the parser) then it can call S_force_next
2026 * to know what token to return the next time the lexer is called. Caller
2027 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
2028 * and possibly PL_expect to ensure the lexer handles the token correctly.
2032 S_force_next(pTHX_ I32 type)
2037 PerlIO_printf(Perl_debug_log, "### forced token:\n");
2038 tokereport(type, &NEXTVAL_NEXTTOKE);
2042 if (PL_curforce < 0)
2043 start_force(PL_lasttoke);
2044 PL_nexttoke[PL_curforce].next_type = type;
2045 if (PL_lex_state != LEX_KNOWNEXT)
2046 PL_lex_defer = PL_lex_state;
2047 PL_lex_state = LEX_KNOWNEXT;
2048 PL_lex_expect = PL_expect;
2051 PL_nexttype[PL_nexttoke] = type;
2053 if (PL_lex_state != LEX_KNOWNEXT) {
2054 PL_lex_defer = PL_lex_state;
2055 PL_lex_expect = PL_expect;
2056 PL_lex_state = LEX_KNOWNEXT;
2064 int yyc = PL_parser->yychar;
2065 if (yyc != YYEMPTY) {
2068 NEXTVAL_NEXTTOKE = PL_parser->yylval;
2069 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2070 PL_lex_allbrackets--;
2072 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2073 } else if (yyc == '('/*)*/) {
2074 PL_lex_allbrackets--;
2079 PL_parser->yychar = YYEMPTY;
2084 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2087 SV * const sv = newSVpvn_utf8(start, len,
2090 && !is_ascii_string((const U8*)start, len)
2091 && is_utf8_string((const U8*)start, len));
2097 * When the lexer knows the next thing is a word (for instance, it has
2098 * just seen -> and it knows that the next char is a word char, then
2099 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2103 * char *start : buffer position (must be within PL_linestr)
2104 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2105 * int check_keyword : if true, Perl checks to make sure the word isn't
2106 * a keyword (do this if the word is a label, e.g. goto FOO)
2107 * int allow_pack : if true, : characters will also be allowed (require,
2108 * use, etc. do this)
2109 * int allow_initial_tick : used by the "sub" lexer only.
2113 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
2119 PERL_ARGS_ASSERT_FORCE_WORD;
2121 start = SKIPSPACE1(start);
2123 if (isIDFIRST_lazy_if(s,UTF) ||
2124 (allow_pack && *s == ':') ||
2125 (allow_initial_tick && *s == '\'') )
2127 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2128 if (check_keyword && keyword(PL_tokenbuf, len, 0))
2130 start_force(PL_curforce);
2132 curmad('X', newSVpvn(start,s-start));
2133 if (token == METHOD) {
2138 PL_expect = XOPERATOR;
2142 curmad('g', newSVpvs( "forced" ));
2143 NEXTVAL_NEXTTOKE.opval
2144 = (OP*)newSVOP(OP_CONST,0,
2145 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2146 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2154 * Called when the lexer wants $foo *foo &foo etc, but the program
2155 * text only contains the "foo" portion. The first argument is a pointer
2156 * to the "foo", and the second argument is the type symbol to prefix.
2157 * Forces the next token to be a "WORD".
2158 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2162 S_force_ident(pTHX_ const char *s, int kind)
2166 PERL_ARGS_ASSERT_FORCE_IDENT;
2169 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2170 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2171 UTF ? SVf_UTF8 : 0));
2172 start_force(PL_curforce);
2173 NEXTVAL_NEXTTOKE.opval = o;
2176 o->op_private = OPpCONST_ENTERED;
2177 /* XXX see note in pp_entereval() for why we forgo typo
2178 warnings if the symbol must be introduced in an eval.
2180 gv_fetchpvn_flags(s, len,
2181 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2182 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2183 kind == '$' ? SVt_PV :
2184 kind == '@' ? SVt_PVAV :
2185 kind == '%' ? SVt_PVHV :
2193 S_force_ident_maybe_lex(pTHX_ char pit)
2195 start_force(PL_curforce);
2196 NEXTVAL_NEXTTOKE.ival = pit;
2201 Perl_str_to_version(pTHX_ SV *sv)
2206 const char *start = SvPV_const(sv,len);
2207 const char * const end = start + len;
2208 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2210 PERL_ARGS_ASSERT_STR_TO_VERSION;
2212 while (start < end) {
2216 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2221 retval += ((NV)n)/nshift;
2230 * Forces the next token to be a version number.
2231 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2232 * and if "guessing" is TRUE, then no new token is created (and the caller
2233 * must use an alternative parsing method).
2237 S_force_version(pTHX_ char *s, int guessing)
2243 I32 startoff = s - SvPVX(PL_linestr);
2246 PERL_ARGS_ASSERT_FORCE_VERSION;
2254 while (isDIGIT(*d) || *d == '_' || *d == '.')
2258 start_force(PL_curforce);
2259 curmad('X', newSVpvn(s,d-s));
2262 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2264 #ifdef USE_LOCALE_NUMERIC
2265 char *loc = savepv(setlocale(LC_NUMERIC, NULL));
2266 setlocale(LC_NUMERIC, "C");
2268 s = scan_num(s, &pl_yylval);
2269 #ifdef USE_LOCALE_NUMERIC
2270 setlocale(LC_NUMERIC, loc);
2273 version = pl_yylval.opval;
2274 ver = cSVOPx(version)->op_sv;
2275 if (SvPOK(ver) && !SvNIOK(ver)) {
2276 SvUPGRADE(ver, SVt_PVNV);
2277 SvNV_set(ver, str_to_version(ver));
2278 SvNOK_on(ver); /* hint that it is a version */
2281 else if (guessing) {
2284 sv_free(PL_nextwhite); /* let next token collect whitespace */
2286 s = SvPVX(PL_linestr) + startoff;
2294 if (PL_madskills && !version) {
2295 sv_free(PL_nextwhite); /* let next token collect whitespace */
2297 s = SvPVX(PL_linestr) + startoff;
2300 /* NOTE: The parser sees the package name and the VERSION swapped */
2301 start_force(PL_curforce);
2302 NEXTVAL_NEXTTOKE.opval = version;
2309 * S_force_strict_version
2310 * Forces the next token to be a version number using strict syntax rules.
2314 S_force_strict_version(pTHX_ char *s)
2319 I32 startoff = s - SvPVX(PL_linestr);
2321 const char *errstr = NULL;
2323 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2325 while (isSPACE(*s)) /* leading whitespace */
2328 if (is_STRICT_VERSION(s,&errstr)) {
2330 s = (char *)scan_version(s, ver, 0);
2331 version = newSVOP(OP_CONST, 0, ver);
2333 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2334 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2338 yyerror(errstr); /* version required */
2343 if (PL_madskills && !version) {
2344 sv_free(PL_nextwhite); /* let next token collect whitespace */
2346 s = SvPVX(PL_linestr) + startoff;
2349 /* NOTE: The parser sees the package name and the VERSION swapped */
2350 start_force(PL_curforce);
2351 NEXTVAL_NEXTTOKE.opval = version;
2359 * Tokenize a quoted string passed in as an SV. It finds the next
2360 * chunk, up to end of string or a backslash. It may make a new
2361 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2366 S_tokeq(pTHX_ SV *sv)
2375 PERL_ARGS_ASSERT_TOKEQ;
2380 s = SvPV_force(sv, len);
2381 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2384 /* This is relying on the SV being "well formed" with a trailing '\0' */
2385 while (s < send && !(*s == '\\' && s[1] == '\\'))
2390 if ( PL_hints & HINT_NEW_STRING ) {
2391 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2395 if (s + 1 < send && (s[1] == '\\'))
2396 s++; /* all that, just for this */
2401 SvCUR_set(sv, d - SvPVX_const(sv));
2403 if ( PL_hints & HINT_NEW_STRING )
2404 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2409 * Now come three functions related to double-quote context,
2410 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2411 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2412 * interact with PL_lex_state, and create fake ( ... ) argument lists
2413 * to handle functions and concatenation.
2417 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
2422 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2424 * Pattern matching will set PL_lex_op to the pattern-matching op to
2425 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2427 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2429 * Everything else becomes a FUNC.
2431 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2432 * had an OP_CONST or OP_READLINE). This just sets us up for a
2433 * call to S_sublex_push().
2437 S_sublex_start(pTHX)
2440 const I32 op_type = pl_yylval.ival;
2442 if (op_type == OP_NULL) {
2443 pl_yylval.opval = PL_lex_op;
2447 if (op_type == OP_CONST || op_type == OP_READLINE) {
2448 SV *sv = tokeq(PL_lex_stuff);
2450 if (SvTYPE(sv) == SVt_PVIV) {
2451 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2453 const char * const p = SvPV_const(sv, len);
2454 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2458 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2459 PL_lex_stuff = NULL;
2460 /* Allow <FH> // "foo" */
2461 if (op_type == OP_READLINE)
2462 PL_expect = XTERMORDORDOR;
2465 else if (op_type == OP_BACKTICK && PL_lex_op) {
2466 /* readpipe() vas overriden */
2467 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2468 pl_yylval.opval = PL_lex_op;
2470 PL_lex_stuff = NULL;
2474 PL_sublex_info.super_state = PL_lex_state;
2475 PL_sublex_info.sub_inwhat = (U16)op_type;
2476 PL_sublex_info.sub_op = PL_lex_op;
2477 PL_lex_state = LEX_INTERPPUSH;
2481 pl_yylval.opval = PL_lex_op;
2491 * Create a new scope to save the lexing state. The scope will be
2492 * ended in S_sublex_done. Returns a '(', starting the function arguments
2493 * to the uc, lc, etc. found before.
2494 * Sets PL_lex_state to LEX_INTERPCONCAT.
2504 PL_lex_state = PL_sublex_info.super_state;
2505 SAVEBOOL(PL_lex_dojoin);
2506 SAVEI32(PL_lex_brackets);
2507 SAVEI32(PL_lex_allbrackets);
2508 SAVEI32(PL_lex_formbrack);
2509 SAVEI8(PL_lex_fakeeof);
2510 SAVEI32(PL_lex_casemods);
2511 SAVEI32(PL_lex_starts);
2512 SAVEI8(PL_lex_state);
2513 SAVESPTR(PL_lex_repl);
2514 SAVEVPTR(PL_lex_inpat);
2515 SAVEI16(PL_lex_inwhat);
2516 SAVECOPLINE(PL_curcop);
2517 SAVEPPTR(PL_bufptr);
2518 SAVEPPTR(PL_bufend);
2519 SAVEPPTR(PL_oldbufptr);
2520 SAVEPPTR(PL_oldoldbufptr);
2521 SAVEPPTR(PL_last_lop);
2522 SAVEPPTR(PL_last_uni);
2523 SAVEPPTR(PL_linestart);
2524 SAVESPTR(PL_linestr);
2525 SAVEGENERICPV(PL_lex_brackstack);
2526 SAVEGENERICPV(PL_lex_casestack);
2527 SAVEGENERICPV(PL_parser->lex_shared);
2529 /* The here-doc parser needs to be able to peek into outer lexing
2530 scopes to find the body of the here-doc. So we put PL_linestr and
2531 PL_bufptr into lex_shared, to ‘share’ those values.
2533 PL_parser->lex_shared->ls_linestr = PL_linestr;
2534 PL_parser->lex_shared->ls_bufptr = PL_bufptr;
2536 PL_linestr = PL_lex_stuff;
2537 PL_lex_repl = PL_sublex_info.repl;
2538 PL_lex_stuff = NULL;
2539 PL_sublex_info.repl = NULL;
2541 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2542 = SvPVX(PL_linestr);
2543 PL_bufend += SvCUR(PL_linestr);
2544 PL_last_lop = PL_last_uni = NULL;
2545 SAVEFREESV(PL_linestr);
2546 if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2548 PL_lex_dojoin = FALSE;
2549 PL_lex_brackets = PL_lex_formbrack = 0;
2550 PL_lex_allbrackets = 0;
2551 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2552 Newx(PL_lex_brackstack, 120, char);
2553 Newx(PL_lex_casestack, 12, char);
2554 PL_lex_casemods = 0;
2555 *PL_lex_casestack = '\0';
2557 PL_lex_state = LEX_INTERPCONCAT;
2558 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2560 Newxz(shared, 1, LEXSHARED);
2561 shared->ls_prev = PL_parser->lex_shared;
2562 PL_parser->lex_shared = shared;
2564 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2565 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2566 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2567 PL_lex_inpat = PL_sublex_info.sub_op;
2569 PL_lex_inpat = NULL;
2576 * Restores lexer state after a S_sublex_push.
2583 if (!PL_lex_starts++) {
2584 SV * const sv = newSVpvs("");
2585 if (SvUTF8(PL_linestr))
2587 PL_expect = XOPERATOR;
2588 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2592 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2593 PL_lex_state = LEX_INTERPCASEMOD;
2597 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2598 assert(PL_lex_inwhat != OP_TRANSR);
2599 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2600 PL_linestr = PL_lex_repl;
2602 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2603 PL_bufend += SvCUR(PL_linestr);
2604 PL_last_lop = PL_last_uni = NULL;
2605 PL_lex_dojoin = FALSE;
2606 PL_lex_brackets = 0;
2607 PL_lex_allbrackets = 0;
2608 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2609 PL_lex_casemods = 0;
2610 *PL_lex_casestack = '\0';
2612 if (SvEVALED(PL_lex_repl)) {
2613 PL_lex_state = LEX_INTERPNORMAL;
2615 /* we don't clear PL_lex_repl here, so that we can check later
2616 whether this is an evalled subst; that means we rely on the
2617 logic to ensure sublex_done() is called again only via the
2618 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2621 PL_lex_state = LEX_INTERPCONCAT;
2631 PL_endwhite = newSVpvs("");
2632 sv_catsv(PL_endwhite, PL_thiswhite);
2636 sv_setpvs(PL_thistoken,"");
2638 PL_realtokenstart = -1;
2642 PL_bufend = SvPVX(PL_linestr);
2643 PL_bufend += SvCUR(PL_linestr);
2644 PL_expect = XOPERATOR;
2645 PL_sublex_info.sub_inwhat = 0;
2650 PERL_STATIC_INLINE SV*
2651 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2653 /* <s> points to first character of interior of \N{}, <e> to one beyond the
2654 * interior, hence to the "}". Finds what the name resolves to, returning
2655 * an SV* containing it; NULL if no valid one found */
2657 SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2664 const U8* first_bad_char_loc;
2665 const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2667 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2669 if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2671 &first_bad_char_loc))
2673 /* If warnings are on, this will print a more detailed analysis of what
2674 * is wrong than the error message below */
2675 utf8n_to_uvuni(first_bad_char_loc,
2676 e - ((char *) first_bad_char_loc),
2679 /* We deliberately don't try to print the malformed character, which
2680 * might not print very well; it also may be just the first of many
2681 * malformations, so don't print what comes after it */
2682 yyerror(Perl_form(aTHX_
2683 "Malformed UTF-8 character immediately after '%.*s'",
2684 (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
2688 res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2689 /* include the <}> */
2690 e - backslash_ptr + 1);
2692 SvREFCNT_dec_NN(res);
2696 /* See if the charnames handler is the Perl core's, and if so, we can skip
2697 * the validation needed for a user-supplied one, as Perl's does its own
2699 table = GvHV(PL_hintgv); /* ^H */
2700 cvp = hv_fetchs(table, "charnames", FALSE);
2701 if (cvp && (cv = *cvp) && SvROK(cv) && ((rv = SvRV(cv)) != NULL)
2702 && SvTYPE(rv) == SVt_PVCV && ((stash = CvSTASH(rv)) != NULL))
2704 const char * const name = HvNAME(stash);
2705 if strEQ(name, "_charnames") {
2710 /* Here, it isn't Perl's charname handler. We can't rely on a
2711 * user-supplied handler to validate the input name. For non-ut8 input,
2712 * look to see that the first character is legal. Then loop through the
2713 * rest checking that each is a continuation */
2715 /* This code needs to be sync'ed with a regex in _charnames.pm which does
2719 if (! isALPHAU(*s)) {
2724 if (! isCHARNAME_CONT(*s)) {
2727 if (*s == ' ' && *(s-1) == ' ' && ckWARN(WARN_DEPRECATED)) {
2728 Perl_warn(aTHX_ "A sequence of multiple spaces in a charnames alias definition is deprecated");
2732 if (*(s-1) == ' ' && ckWARN(WARN_DEPRECATED)) {
2733 Perl_warn(aTHX_ "Trailing white-space in a charnames alias definition is deprecated");
2737 /* Similarly for utf8. For invariants can check directly; for other
2738 * Latin1, can calculate their code point and check; otherwise use a
2740 if (UTF8_IS_INVARIANT(*s)) {
2741 if (! isALPHAU(*s)) {
2745 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2746 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s, *(s+1))))) {
2752 if (! PL_utf8_charname_begin) {
2753 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2754 PL_utf8_charname_begin = _core_swash_init("utf8",
2755 "_Perl_Charname_Begin",
2757 1, 0, NULL, &flags);
2759 if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2766 if (UTF8_IS_INVARIANT(*s)) {
2767 if (! isCHARNAME_CONT(*s)) {
2770 if (*s == ' ' && *(s-1) == ' ' && ckWARN(WARN_DEPRECATED)) {
2771 Perl_warn(aTHX_ "A sequence of multiple spaces in a charnames alias definition is deprecated");
2775 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2776 if (! isCHARNAME_CONT(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s,
2784 if (! PL_utf8_charname_continue) {
2785 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2786 PL_utf8_charname_continue = _core_swash_init("utf8",
2787 "_Perl_Charname_Continue",
2789 1, 0, NULL, &flags);
2791 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2797 if (*(s-1) == ' ' && ckWARN(WARN_DEPRECATED)) {
2798 Perl_warn(aTHX_ "Trailing white-space in a charnames alias definition is deprecated");
2802 if (SvUTF8(res)) { /* Don't accept malformed input */
2803 const U8* first_bad_char_loc;
2805 const char* const str = SvPV_const(res, len);
2806 if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2807 /* If warnings are on, this will print a more detailed analysis of
2808 * what is wrong than the error message below */
2809 utf8n_to_uvuni(first_bad_char_loc,
2810 (char *) first_bad_char_loc - str,
2813 /* We deliberately don't try to print the malformed character,
2814 * which might not print very well; it also may be just the first
2815 * of many malformations, so don't print what comes after it */
2818 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2819 (int) (e - backslash_ptr + 1), backslash_ptr,
2820 (int) ((char *) first_bad_char_loc - str), str
2830 int bad_char_size = ((UTF) ? UTF8SKIP(s) : 1);
2832 /* The final %.*s makes sure that should the trailing NUL be missing
2833 * that this print won't run off the end of the string */
2836 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2837 (int)(s - backslash_ptr + bad_char_size), backslash_ptr,
2838 (int)(e - s + bad_char_size), s + bad_char_size
2840 UTF ? SVf_UTF8 : 0);
2848 Extracts the next constant part of a pattern, double-quoted string,
2849 or transliteration. This is terrifying code.
2851 For example, in parsing the double-quoted string "ab\x63$d", it would
2852 stop at the '$' and return an OP_CONST containing 'abc'.
2854 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2855 processing a pattern (PL_lex_inpat is true), a transliteration
2856 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2858 Returns a pointer to the character scanned up to. If this is
2859 advanced from the start pointer supplied (i.e. if anything was
2860 successfully parsed), will leave an OP_CONST for the substring scanned
2861 in pl_yylval. Caller must intuit reason for not parsing further
2862 by looking at the next characters herself.
2866 \N{ABC} => \N{U+41.42.43}
2869 all other \-char, including \N and \N{ apart from \N{ABC}
2872 @ and $ where it appears to be a var, but not for $ as tail anchor
2877 In transliterations:
2878 characters are VERY literal, except for - not at the start or end
2879 of the string, which indicates a range. If the range is in bytes,
2880 scan_const expands the range to the full set of intermediate
2881 characters. If the range is in utf8, the hyphen is replaced with
2882 a certain range mark which will be handled by pmtrans() in op.c.
2884 In double-quoted strings:
2886 double-quoted style: \r and \n
2887 constants: \x31, etc.
2888 deprecated backrefs: \1 (in substitution replacements)
2889 case and quoting: \U \Q \E
2892 scan_const does *not* construct ops to handle interpolated strings.
2893 It stops processing as soon as it finds an embedded $ or @ variable
2894 and leaves it to the caller to work out what's going on.
2896 embedded arrays (whether in pattern or not) could be:
2897 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2899 $ in double-quoted strings must be the symbol of an embedded scalar.
2901 $ in pattern could be $foo or could be tail anchor. Assumption:
2902 it's a tail anchor if $ is the last thing in the string, or if it's
2903 followed by one of "()| \r\n\t"
2905 \1 (backreferences) are turned into $1 in substitutions
2907 The structure of the code is
2908 while (there's a character to process) {
2909 handle transliteration ranges
2910 skip regexp comments /(?#comment)/ and codes /(?{code})/
2911 skip #-initiated comments in //x patterns
2912 check for embedded arrays
2913 check for embedded scalars
2915 deprecate \1 in substitution replacements
2916 handle string-changing backslashes \l \U \Q \E, etc.
2917 switch (what was escaped) {
2918 handle \- in a transliteration (becomes a literal -)
2919 if a pattern and not \N{, go treat as regular character
2920 handle \132 (octal characters)
2921 handle \x15 and \x{1234} (hex characters)
2922 handle \N{name} (named characters, also \N{3,5} in a pattern)
2923 handle \cV (control characters)
2924 handle printf-style backslashes (\f, \r, \n, etc)
2927 } (end if backslash)
2928 handle regular character
2929 } (end while character to read)
2934 S_scan_const(pTHX_ char *start)
2937 char *send = PL_bufend; /* end of the constant */
2938 SV *sv = newSV(send - start); /* sv for the constant. See
2939 note below on sizing. */
2940 char *s = start; /* start of the constant */
2941 char *d = SvPVX(sv); /* destination for copies */
2942 bool dorange = FALSE; /* are we in a translit range? */
2943 bool didrange = FALSE; /* did we just finish a range? */
2944 bool in_charclass = FALSE; /* within /[...]/ */
2945 bool has_utf8 = FALSE; /* Output constant is UTF8 */
2946 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
2947 to be UTF8? But, this can
2948 show as true when the source
2949 isn't utf8, as for example
2950 when it is entirely composed
2952 SV *res; /* result from charnames */
2954 /* Note on sizing: The scanned constant is placed into sv, which is
2955 * initialized by newSV() assuming one byte of output for every byte of
2956 * input. This routine expects newSV() to allocate an extra byte for a
2957 * trailing NUL, which this routine will append if it gets to the end of
2958 * the input. There may be more bytes of input than output (eg., \N{LATIN
2959 * CAPITAL LETTER A}), or more output than input if the constant ends up
2960 * recoded to utf8, but each time a construct is found that might increase
2961 * the needed size, SvGROW() is called. Its size parameter each time is
2962 * based on the best guess estimate at the time, namely the length used so
2963 * far, plus the length the current construct will occupy, plus room for
2964 * the trailing NUL, plus one byte for every input byte still unscanned */
2966 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
2969 UV literal_endpoint = 0;
2970 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2973 PERL_ARGS_ASSERT_SCAN_CONST;
2975 assert(PL_lex_inwhat != OP_TRANSR);
2976 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2977 /* If we are doing a trans and we know we want UTF8 set expectation */
2978 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2979 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2982 /* Protect sv from errors and fatal warnings. */
2983 ENTER_with_name("scan_const");
2986 while (s < send || dorange) {
2988 /* get transliterations out of the way (they're most literal) */
2989 if (PL_lex_inwhat == OP_TRANS) {
2990 /* expand a range A-Z to the full set of characters. AIE! */
2992 I32 i; /* current expanded character */
2993 I32 min; /* first character in range */
2994 I32 max; /* last character in range */
3005 char * const c = (char*)utf8_hop((U8*)d, -1);
3009 *c = (char)UTF_TO_NATIVE(0xff);
3010 /* mark the range as done, and continue */
3016 i = d - SvPVX_const(sv); /* remember current offset */
3019 SvLEN(sv) + (has_utf8 ?
3020 (512 - UTF_CONTINUATION_MARK +
3023 /* How many two-byte within 0..255: 128 in UTF-8,
3024 * 96 in UTF-8-mod. */
3026 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
3028 d = SvPVX(sv) + i; /* refresh d after realloc */
3032 for (j = 0; j <= 1; j++) {
3033 char * const c = (char*)utf8_hop((U8*)d, -1);
3034 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
3040 max = (U8)0xff; /* only to \xff */
3041 uvmax = uv; /* \x{100} to uvmax */
3043 d = c; /* eat endpoint chars */
3048 d -= 2; /* eat the first char and the - */
3049 min = (U8)*d; /* first char in range */
3050 max = (U8)d[1]; /* last char in range */
3057 "Invalid range \"%c-%c\" in transliteration operator",
3058 (char)min, (char)max);
3062 if (literal_endpoint == 2 &&
3063 ((isLOWER(min) && isLOWER(max)) ||
3064 (isUPPER(min) && isUPPER(max)))) {
3066 for (i = min; i <= max; i++)
3068 *d++ = NATIVE_TO_NEED(has_utf8,i);
3070 for (i = min; i <= max; i++)
3072 *d++ = NATIVE_TO_NEED(has_utf8,i);
3077 for (i = min; i <= max; i++)
3080 const U8 ch = (U8)NATIVE_TO_UTF(i);
3081 if (UNI_IS_INVARIANT(ch))
3084 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
3085 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
3094 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
3096 *d++ = (char)UTF_TO_NATIVE(0xff);
3098 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
3102 /* mark the range as done, and continue */
3106 literal_endpoint = 0;
3111 /* range begins (ignore - as first or last char) */
3112 else if (*s == '-' && s+1 < send && s != start) {
3114 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
3121 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
3131 literal_endpoint = 0;
3132 native_range = TRUE;
3137 /* if we get here, we're not doing a transliteration */
3139 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3142 while (s1 >= start && *s1-- == '\\')
3145 in_charclass = TRUE;
3148 else if (*s == ']' && PL_lex_inpat && in_charclass) {
3151 while (s1 >= start && *s1-- == '\\')
3154 in_charclass = FALSE;
3157 /* skip for regexp comments /(?#comment)/, except for the last
3158 * char, which will be done separately.
3159 * Stop on (?{..}) and friends */
3161 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
3163 while (s+1 < send && *s != ')')
3164 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3166 else if (!PL_lex_casemods && !in_charclass &&
3167 ( s[2] == '{' /* This should match regcomp.c */
3168 || (s[2] == '?' && s[3] == '{')))
3174 /* likewise skip #-initiated comments in //x patterns */
3175 else if (*s == '#' && PL_lex_inpat &&
3176 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
3177 while (s+1 < send && *s != '\n')
3178 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3181 /* no further processing of single-quoted regex */
3182 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3183 goto default_action;
3185 /* check for embedded arrays
3186 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3188 else if (*s == '@' && s[1]) {
3189 if (isWORDCHAR_lazy_if(s+1,UTF))
3191 if (strchr(":'{$", s[1]))
3193 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3194 break; /* in regexp, neither @+ nor @- are interpolated */
3197 /* check for embedded scalars. only stop if we're sure it's a
3200 else if (*s == '$') {
3201 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
3203 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3205 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3206 "Possible unintended interpolation of $\\ in regex");
3208 break; /* in regexp, $ might be tail anchor */
3212 /* End of else if chain - OP_TRANS rejoin rest */
3215 if (*s == '\\' && s+1 < send) {
3216 char* e; /* Can be used for ending '}', etc. */
3220 /* warn on \1 - \9 in substitution replacements, but note that \11
3221 * is an octal; and \19 is \1 followed by '9' */
3222 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
3223 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
3225 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3230 /* string-change backslash escapes */
3231 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3235 /* In a pattern, process \N, but skip any other backslash escapes.
3236 * This is because we don't want to translate an escape sequence
3237 * into a meta symbol and have the regex compiler use the meta
3238 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3239 * in spite of this, we do have to process \N here while the proper
3240 * charnames handler is in scope. See bugs #56444 and #62056.
3241 * There is a complication because \N in a pattern may also stand
3242 * for 'match a non-nl', and not mean a charname, in which case its
3243 * processing should be deferred to the regex compiler. To be a
3244 * charname it must be followed immediately by a '{', and not look
3245 * like \N followed by a curly quantifier, i.e., not something like
3246 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3248 else if (PL_lex_inpat
3251 || regcurly(s + 1, FALSE)))
3253 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
3254 goto default_action;
3259 /* quoted - in transliterations */
3261 if (PL_lex_inwhat == OP_TRANS) {
3268 if ((isALPHANUMERIC(*s)))
3269 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3270 "Unrecognized escape \\%c passed through",
3272 /* default action is to copy the quoted character */
3273 goto default_action;
3276 /* eg. \132 indicates the octal constant 0132 */
3277 case '0': case '1': case '2': case '3':
3278 case '4': case '5': case '6': case '7':
3280 I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3282 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
3284 if (len < 3 && s < send && isDIGIT(*s)
3285 && ckWARN(WARN_MISC))
3287 Perl_warner(aTHX_ packWARN(WARN_MISC),
3288 "%s", form_short_octal_warning(s, len));
3291 goto NUM_ESCAPE_INSERT;
3293 /* eg. \o{24} indicates the octal constant \024 */
3298 bool valid = grok_bslash_o(&s, &uv, &error,
3299 TRUE, /* Output warning */
3300 FALSE, /* Not strict */
3301 TRUE, /* Output warnings for
3308 goto NUM_ESCAPE_INSERT;
3311 /* eg. \x24 indicates the hex constant 0x24 */
3316 bool valid = grok_bslash_x(&s, &uv, &error,
3317 TRUE, /* Output warning */
3318 FALSE, /* Not strict */
3319 TRUE, /* Output warnings for
3329 /* Insert oct or hex escaped character. There will always be
3330 * enough room in sv since such escapes will be longer than any
3331 * UTF-8 sequence they can end up as, except if they force us
3332 * to recode the rest of the string into utf8 */
3334 /* Here uv is the ordinal of the next character being added in
3335 * unicode (converted from native). */
3336 if (!UNI_IS_INVARIANT(uv)) {
3337 if (!has_utf8 && uv > 255) {
3338 /* Might need to recode whatever we have accumulated so
3339 * far if it contains any chars variant in utf8 or
3342 SvCUR_set(sv, d - SvPVX_const(sv));
3345 /* See Note on sizing above. */
3346 sv_utf8_upgrade_flags_grow(sv,
3347 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3348 UNISKIP(uv) + (STRLEN)(send - s) + 1);
3349 d = SvPVX(sv) + SvCUR(sv);
3354 d = (char*)uvuni_to_utf8((U8*)d, uv);
3355 if (PL_lex_inwhat == OP_TRANS &&
3356 PL_sublex_info.sub_op) {
3357 PL_sublex_info.sub_op->op_private |=
3358 (PL_lex_repl ? OPpTRANS_FROM_UTF
3362 if (uv > 255 && !dorange)
3363 native_range = FALSE;
3376 /* In a non-pattern \N must be a named character, like \N{LATIN
3377 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3378 * mean to match a non-newline. For non-patterns, named
3379 * characters are converted to their string equivalents. In
3380 * patterns, named characters are not converted to their
3381 * ultimate forms for the same reasons that other escapes
3382 * aren't. Instead, they are converted to the \N{U+...} form
3383 * to get the value from the charnames that is in effect right
3384 * now, while preserving the fact that it was a named character
3385 * so that the regex compiler knows this */
3387 /* This section of code doesn't generally use the
3388 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
3389 * a close examination of this macro and determined it is a
3390 * no-op except on utfebcdic variant characters. Every
3391 * character generated by this that would normally need to be
3392 * enclosed by this macro is invariant, so the macro is not
3393 * needed, and would complicate use of copy(). XXX There are
3394 * other parts of this file where the macro is used
3395 * inconsistently, but are saved by it being a no-op */
3397 /* The structure of this section of code (besides checking for
3398 * errors and upgrading to utf8) is:
3399 * Further disambiguate between the two meanings of \N, and if
3400 * not a charname, go process it elsewhere
3401 * If of form \N{U+...}, pass it through if a pattern;
3402 * otherwise convert to utf8
3403 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3404 * pattern; otherwise convert to utf8 */
3406 /* Here, s points to the 'N'; the test below is guaranteed to
3407 * succeed if we are being called on a pattern as we already
3408 * know from a test above that the next character is a '{'.
3409 * On a non-pattern \N must mean 'named sequence, which
3410 * requires braces */
3413 yyerror("Missing braces on \\N{}");
3418 /* If there is no matching '}', it is an error. */
3419 if (! (e = strchr(s, '}'))) {
3420 if (! PL_lex_inpat) {
3421 yyerror("Missing right brace on \\N{}");
3423 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3428 /* Here it looks like a named character */
3430 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3431 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3432 | PERL_SCAN_DISALLOW_PREFIX;
3435 /* For \N{U+...}, the '...' is a unicode value even on
3436 * EBCDIC machines */
3437 s += 2; /* Skip to next char after the 'U+' */
3439 uv = grok_hex(s, &len, &flags, NULL);
3440 if (len == 0 || len != (STRLEN)(e - s)) {
3441 yyerror("Invalid hexadecimal number in \\N{U+...}");
3448 /* On non-EBCDIC platforms, pass through to the regex
3449 * compiler unchanged. The reason we evaluated the
3450 * number above is to make sure there wasn't a syntax
3451 * error. But on EBCDIC we convert to native so
3452 * downstream code can continue to assume it's native
3454 s -= 5; /* Include the '\N{U+' */
3456 d += my_snprintf(d, e - s + 1 + 1, /* includes the }
3459 (unsigned int) UNI_TO_NATIVE(uv));
3461 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3465 else { /* Not a pattern: convert the hex to string */
3467 /* If destination is not in utf8, unconditionally
3468 * recode it to be so. This is because \N{} implies
3469 * Unicode semantics, and scalars have to be in utf8
3470 * to guarantee those semantics */
3472 SvCUR_set(sv, d - SvPVX_const(sv));
3475 /* See Note on sizing above. */
3476 sv_utf8_upgrade_flags_grow(
3478 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3479 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3480 d = SvPVX(sv) + SvCUR(sv);
3484 /* Add the string to the output */
3485 if (UNI_IS_INVARIANT(uv)) {
3488 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3491 else /* Here is \N{NAME} but not \N{U+...}. */
3492 if ((res = get_and_check_backslash_N_name(s, e)))
3495 const char *str = SvPV_const(res, len);
3498 if (! len) { /* The name resolved to an empty string */
3499 Copy("\\N{}", d, 4, char);
3503 /* In order to not lose information for the regex
3504 * compiler, pass the result in the specially made
3505 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3506 * the code points in hex of each character
3507 * returned by charnames */
3509 const char *str_end = str + len;
3510 const STRLEN off = d - SvPVX_const(sv);
3512 if (! SvUTF8(res)) {
3513 /* For the non-UTF-8 case, we can determine the
3514 * exact length needed without having to parse
3515 * through the string. Each character takes up
3516 * 2 hex digits plus either a trailing dot or
3518 d = off + SvGROW(sv, off
3520 + 6 /* For the "\N{U+", and
3522 + (STRLEN)(send - e));
3523 Copy("\\N{U+", d, 5, char);
3525 while (str < str_end) {
3527 my_snprintf(hex_string, sizeof(hex_string),
3528 "%02X.", (U8) *str);
3529 Copy(hex_string, d, 3, char);
3533 d--; /* We will overwrite below the final
3534 dot with a right brace */
3537 STRLEN char_length; /* cur char's byte length */
3539 /* and the number of bytes after this is
3540 * translated into hex digits */
3541 STRLEN output_length;
3543 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3544 * for max('U+', '.'); and 1 for NUL */
3545 char hex_string[2 * UTF8_MAXBYTES + 5];
3547 /* Get the first character of the result. */
3548 U32 uv = utf8n_to_uvuni((U8 *) str,
3552 /* Convert first code point to hex, including
3553 * the boiler plate before it. For all these,
3554 * we convert to native format so that
3555 * downstream code can continue to assume the
3556 * input is native */
3558 my_snprintf(hex_string, sizeof(hex_string),
3560 (unsigned int) UNI_TO_NATIVE(uv));
3562 /* Make sure there is enough space to hold it */
3563 d = off + SvGROW(sv, off
3565 + (STRLEN)(send - e)
3566 + 2); /* '}' + NUL */
3568 Copy(hex_string, d, output_length, char);
3571 /* For each subsequent character, append dot and
3572 * its ordinal in hex */
3573 while ((str += char_length) < str_end) {
3574 const STRLEN off = d - SvPVX_const(sv);
3575 U32 uv = utf8n_to_uvuni((U8 *) str,
3580 my_snprintf(hex_string,
3583 (unsigned int) UNI_TO_NATIVE(uv));
3585 d = off + SvGROW(sv, off
3587 + (STRLEN)(send - e)
3588 + 2); /* '}' + NUL */
3589 Copy(hex_string, d, output_length, char);
3594 *d++ = '}'; /* Done. Add the trailing brace */
3597 else { /* Here, not in a pattern. Convert the name to a
3600 /* If destination is not in utf8, unconditionally
3601 * recode it to be so. This is because \N{} implies
3602 * Unicode semantics, and scalars have to be in utf8
3603 * to guarantee those semantics */
3605 SvCUR_set(sv, d - SvPVX_const(sv));
3608 /* See Note on sizing above. */
3609 sv_utf8_upgrade_flags_grow(sv,
3610 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3611 len + (STRLEN)(send - s) + 1);
3612 d = SvPVX(sv) + SvCUR(sv);
3614 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3616 /* See Note on sizing above. (NOTE: SvCUR() is not
3617 * set correctly here). */
3618 const STRLEN off = d - SvPVX_const(sv);
3619 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3621 Copy(str, d, len, char);
3627 } /* End \N{NAME} */
3630 native_range = FALSE; /* \N{} is defined to be Unicode */
3632 s = e + 1; /* Point to just after the '}' */
3635 /* \c is a control character */
3639 *d++ = grok_bslash_c(*s++, has_utf8, 1);
3642 yyerror("Missing control char name in \\c");
3646 /* printf-style backslashes, formfeeds, newlines, etc */
3648 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3651 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3654 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3657 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3660 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3663 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3666 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3672 } /* end if (backslash) */
3679 /* If we started with encoded form, or already know we want it,
3680 then encode the next character */
3681 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3685 /* One might think that it is wasted effort in the case of the
3686 * source being utf8 (this_utf8 == TRUE) to take the next character
3687 * in the source, convert it to an unsigned value, and then convert
3688 * it back again. But the source has not been validated here. The
3689 * routine that does the conversion checks for errors like
3692 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3693 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3695 SvCUR_set(sv, d - SvPVX_const(sv));
3698 /* See Note on sizing above. */
3699 sv_utf8_upgrade_flags_grow(sv,
3700 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3701 need + (STRLEN)(send - s) + 1);
3702 d = SvPVX(sv) + SvCUR(sv);
3704 } else if (need > len) {
3705 /* encoded value larger than old, may need extra space (NOTE:
3706 * SvCUR() is not set correctly here). See Note on sizing
3708 const STRLEN off = d - SvPVX_const(sv);
3709 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3713 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3715 if (uv > 255 && !dorange)
3716 native_range = FALSE;
3720 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3722 } /* while loop to process each character */
3724 /* terminate the string and set up the sv */
3726 SvCUR_set(sv, d - SvPVX_const(sv));
3727 if (SvCUR(sv) >= SvLEN(sv))
3728 Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3729 " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3732 if (PL_encoding && !has_utf8) {
3733 sv_recode_to_utf8(sv, PL_encoding);
3739 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3740 PL_sublex_info.sub_op->op_private |=
3741 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3745 /* shrink the sv if we allocated more than we used */
3746 if (SvCUR(sv) + 5 < SvLEN(sv)) {
3747 SvPV_shrink_to_cur(sv);
3750 /* return the substring (via pl_yylval) only if we parsed anything */
3751 if (s > PL_bufptr) {
3752 SvREFCNT_inc_simple_void_NN(sv);
3753 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3754 const char *const key = PL_lex_inpat ? "qr" : "q";
3755 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3759 if (PL_lex_inwhat == OP_TRANS) {
3762 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3765 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3773 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3776 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3778 LEAVE_with_name("scan_const");
3783 * Returns TRUE if there's more to the expression (e.g., a subscript),
3786 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3788 * ->[ and ->{ return TRUE
3789 * { and [ outside a pattern are always subscripts, so return TRUE
3790 * if we're outside a pattern and it's not { or [, then return FALSE
3791 * if we're in a pattern and the first char is a {
3792 * {4,5} (any digits around the comma) returns FALSE
3793 * if we're in a pattern and the first char is a [
3795 * [SOMETHING] has a funky algorithm to decide whether it's a
3796 * character class or not. It has to deal with things like
3797 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3798 * anything else returns TRUE
3801 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3804 S_intuit_more(pTHX_ char *s)
3808 PERL_ARGS_ASSERT_INTUIT_MORE;
3810 if (PL_lex_brackets)
3812 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3814 if (*s != '{' && *s != '[')
3819 /* In a pattern, so maybe we have {n,m}. */
3821 if (regcurly(s, FALSE)) {
3827 /* On the other hand, maybe we have a character class */
3830 if (*s == ']' || *s == '^')
3833 /* this is terrifying, and it works */
3836 const char * const send = strchr(s,']');
3837 unsigned char un_char, last_un_char;
3838 char tmpbuf[sizeof PL_tokenbuf * 4];
3840 if (!send) /* has to be an expression */
3842 weight = 2; /* let's weigh the evidence */
3846 else if (isDIGIT(*s)) {
3848 if (isDIGIT(s[1]) && s[2] == ']')
3854 Zero(seen,256,char);
3856 for (; s < send; s++) {
3857 last_un_char = un_char;
3858 un_char = (unsigned char)*s;
3863 weight -= seen[un_char] * 10;
3864 if (isWORDCHAR_lazy_if(s+1,UTF)) {
3866 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3867 len = (int)strlen(tmpbuf);
3868 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3869 UTF ? SVf_UTF8 : 0, SVt_PV))
3874 else if (*s == '$' && s[1] &&
3875 strchr("[#!%*<>()-=",s[1])) {
3876 if (/*{*/ strchr("])} =",s[2]))
3885 if (strchr("wds]",s[1]))
3887 else if (seen[(U8)'\''] || seen[(U8)'"'])
3889 else if (strchr("rnftbxcav",s[1]))
3891 else if (isDIGIT(s[1])) {
3893 while (s[1] && isDIGIT(s[1]))
3903 if (strchr("aA01! ",last_un_char))
3905 if (strchr("zZ79~",s[1]))
3907 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3908 weight -= 5; /* cope with negative subscript */
3911 if (!isWORDCHAR(last_un_char)
3912 && !(last_un_char == '$' || last_un_char == '@'
3913 || last_un_char == '&')
3914 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3919 if (keyword(tmpbuf, d - tmpbuf, 0))
3922 if (un_char == last_un_char + 1)
3924 weight -= seen[un_char];
3929 if (weight >= 0) /* probably a character class */
3939 * Does all the checking to disambiguate
3941 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3942 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3944 * First argument is the stuff after the first token, e.g. "bar".
3946 * Not a method if foo is a filehandle.
3947 * Not a method if foo is a subroutine prototyped to take a filehandle.
3948 * Not a method if it's really "Foo $bar"
3949 * Method if it's "foo $bar"
3950 * Not a method if it's really "print foo $bar"
3951 * Method if it's really "foo package::" (interpreted as package->foo)
3952 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3953 * Not a method if bar is a filehandle or package, but is quoted with
3958 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3961 char *s = start + (*start == '$');
3962 char tmpbuf[sizeof PL_tokenbuf];
3969 PERL_ARGS_ASSERT_INTUIT_METHOD;
3971 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3973 if (cv && SvPOK(cv)) {
3974 const char *proto = CvPROTO(cv);
3982 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3983 /* start is the beginning of the possible filehandle/object,
3984 * and s is the end of it
3985 * tmpbuf is a copy of it
3988 if (*start == '$') {
3989 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3990 isUPPER(*PL_tokenbuf))
3993 len = start - SvPVX(PL_linestr);
3997 start = SvPVX(PL_linestr) + len;
4001 return *s == '(' ? FUNCMETH : METHOD;
4003 if (!keyword(tmpbuf, len, 0)) {
4004 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4008 soff = s - SvPVX(PL_linestr);
4012 indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
4013 if (indirgv && GvCVu(indirgv))
4015 /* filehandle or package name makes it a method */
4016 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4018 soff = s - SvPVX(PL_linestr);
4021 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4022 return 0; /* no assumptions -- "=>" quotes bareword */
4024 start_force(PL_curforce);
4025 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
4026 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4027 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4029 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
4030 ( UTF ? SVf_UTF8 : 0 )));
4035 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
4037 return *s == '(' ? FUNCMETH : METHOD;
4043 /* Encoded script support. filter_add() effectively inserts a
4044 * 'pre-processing' function into the current source input stream.
4045 * Note that the filter function only applies to the current source file
4046 * (e.g., it will not affect files 'require'd or 'use'd by this one).
4048 * The datasv parameter (which may be NULL) can be used to pass
4049 * private data to this instance of the filter. The filter function
4050 * can recover the SV using the FILTER_DATA macro and use it to
4051 * store private buffers and state information.
4053 * The supplied datasv parameter is upgraded to a PVIO type
4054 * and the IoDIRP/IoANY field is used to store the function pointer,
4055 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4056 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4057 * private use must be set using malloc'd pointers.
4061 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4070 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4071 Perl_croak(aTHX_ "Source filters apply only to byte streams");
4073 if (!PL_rsfp_filters)
4074 PL_rsfp_filters = newAV();
4077 SvUPGRADE(datasv, SVt_PVIO);
4078 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4079 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4080 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4081 FPTR2DPTR(void *, IoANY(datasv)),
4082 SvPV_nolen(datasv)));
4083 av_unshift(PL_rsfp_filters, 1);
4084 av_store(PL_rsfp_filters, 0, datasv) ;
4086 !PL_parser->filtered
4087 && PL_parser->lex_flags & LEX_EVALBYTES
4088 && PL_bufptr < PL_bufend
4090 const char *s = PL_bufptr;
4091 while (s < PL_bufend) {
4093 SV *linestr = PL_parser->linestr;
4094 char *buf = SvPVX(linestr);
4095 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4096 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4097 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4098 STRLEN const linestart_pos = PL_parser->linestart - buf;
4099 STRLEN const last_uni_pos =
4100 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4101 STRLEN const last_lop_pos =
4102 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4103 av_push(PL_rsfp_filters, linestr);
4104 PL_parser->linestr =
4105 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4106 buf = SvPVX(PL_parser->linestr);
4107 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4108 PL_parser->bufptr = buf + bufptr_pos;
4109 PL_parser->oldbufptr = buf + oldbufptr_pos;
4110 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4111 PL_parser->linestart = buf + linestart_pos;
4112 if (PL_parser->last_uni)
4113 PL_parser->last_uni = buf + last_uni_pos;
4114 if (PL_parser->last_lop)
4115 PL_parser->last_lop = buf + last_lop_pos;
4116 SvLEN(linestr) = SvCUR(linestr);
4117 SvCUR(linestr) = s-SvPVX(linestr);
4118 PL_parser->filtered = 1;
4128 /* Delete most recently added instance of this filter function. */
4130 Perl_filter_del(pTHX_ filter_t funcp)
4135 PERL_ARGS_ASSERT_FILTER_DEL;
4138 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4139 FPTR2DPTR(void*, funcp)));
4141 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4143 /* if filter is on top of stack (usual case) just pop it off */
4144 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4145 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4146 sv_free(av_pop(PL_rsfp_filters));
4150 /* we need to search for the correct entry and clear it */
4151 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4155 /* Invoke the idxth filter function for the current rsfp. */
4156 /* maxlen 0 = read one text line */
4158 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4163 /* This API is bad. It should have been using unsigned int for maxlen.
4164 Not sure if we want to change the API, but if not we should sanity
4165 check the value here. */
4166 unsigned int correct_length
4175 PERL_ARGS_ASSERT_FILTER_READ;
4177 if (!PL_parser || !PL_rsfp_filters)
4179 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
4180 /* Provide a default input filter to make life easy. */
4181 /* Note that we append to the line. This is handy. */
4182 DEBUG_P(PerlIO_printf(Perl_debug_log,
4183 "filter_read %d: from rsfp\n", idx));
4184 if (correct_length) {
4187 const int old_len = SvCUR(buf_sv);
4189 /* ensure buf_sv is large enough */
4190 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4191 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4192 correct_length)) <= 0) {
4193 if (PerlIO_error(PL_rsfp))
4194 return -1; /* error */
4196 return 0 ; /* end of file */
4198 SvCUR_set(buf_sv, old_len + len) ;
4199 SvPVX(buf_sv)[old_len + len] = '\0';
4202 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4203 if (PerlIO_error(PL_rsfp))
4204 return -1; /* error */
4206 return 0 ; /* end of file */
4209 return SvCUR(buf_sv);
4211 /* Skip this filter slot if filter has been deleted */
4212 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4213 DEBUG_P(PerlIO_printf(Perl_debug_log,
4214 "filter_read %d: skipped (filter deleted)\n",
4216 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4218 if (SvTYPE(datasv) != SVt_PVIO) {
4219 if (correct_length) {
4221 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4222 if (!remainder) return 0; /* eof */
4223 if (correct_length > remainder) correct_length = remainder;
4224 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4225 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4228 const char *s = SvEND(datasv);
4229 const char *send = SvPVX(datasv) + SvLEN(datasv);
4237 if (s == send) return 0; /* eof */
4238 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4239 SvCUR_set(datasv, s-SvPVX(datasv));
4241 return SvCUR(buf_sv);
4243 /* Get function pointer hidden within datasv */
4244 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4245 DEBUG_P(PerlIO_printf(Perl_debug_log,
4246 "filter_read %d: via function %p (%s)\n",
4247 idx, (void*)datasv, SvPV_nolen_const(datasv)));
4248 /* Call function. The function is expected to */
4249 /* call "FILTER_READ(idx+1, buf_sv)" first. */
4250 /* Return: <0:error, =0:eof, >0:not eof */
4251 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4255 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4259 PERL_ARGS_ASSERT_FILTER_GETS;
4261 #ifdef PERL_CR_FILTER
4262 if (!PL_rsfp_filters) {
4263 filter_add(S_cr_textfilter,NULL);
4266 if (PL_rsfp_filters) {
4268 SvCUR_set(sv, 0); /* start with empty line */
4269 if (FILTER_READ(0, sv, 0) > 0)
4270 return ( SvPVX(sv) ) ;
4275 return (sv_gets(sv, PL_rsfp, append));
4279 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4284 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4286 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4290 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4291 (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4293 return GvHV(gv); /* Foo:: */
4296 /* use constant CLASS => 'MyClass' */
4297 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4298 if (gv && GvCV(gv)) {
4299 SV * const sv = cv_const_sv(GvCV(gv));
4301 pkgname = SvPV_const(sv, len);
4304 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4308 * S_readpipe_override
4309 * Check whether readpipe() is overridden, and generates the appropriate
4310 * optree, provided sublex_start() is called afterwards.
4313 S_readpipe_override(pTHX)
4316 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
4317 pl_yylval.ival = OP_BACKTICK;
4319 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4321 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
4322 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
4323 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4325 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
4326 op_append_elem(OP_LIST,
4327 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4328 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4335 * The intent of this yylex wrapper is to minimize the changes to the
4336 * tokener when we aren't interested in collecting madprops. It remains
4337 * to be seen how successful this strategy will be...
4344 char *s = PL_bufptr;
4346 /* make sure PL_thiswhite is initialized */
4350 /* previous token ate up our whitespace? */
4351 if (!PL_lasttoke && PL_nextwhite) {
4352 PL_thiswhite = PL_nextwhite;
4356 /* isolate the token, and figure out where it is without whitespace */
4357 PL_realtokenstart = -1;
4361 assert(PL_curforce < 0);
4363 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
4364 if (!PL_thistoken) {
4365 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4366 PL_thistoken = newSVpvs("");
4368 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4369 PL_thistoken = newSVpvn(tstart, s - tstart);
4372 if (PL_thismad) /* install head */
4373 CURMAD('X', PL_thistoken);
4376 /* last whitespace of a sublex? */
4377 if (optype == ')' && PL_endwhite) {
4378 CURMAD('X', PL_endwhite);
4383 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
4384 if (!PL_thiswhite && !PL_endwhite && !optype) {
4385 sv_free(PL_thistoken);
4390 /* put off final whitespace till peg */
4391 if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
4392 PL_nextwhite = PL_thiswhite;
4395 else if (PL_thisopen) {
4396 CURMAD('q', PL_thisopen);
4398 sv_free(PL_thistoken);
4402 /* Store actual token text as madprop X */
4403 CURMAD('X', PL_thistoken);
4407 /* add preceding whitespace as madprop _ */
4408 CURMAD('_', PL_thiswhite);
4412 /* add quoted material as madprop = */
4413 CURMAD('=', PL_thisstuff);
4417 /* add terminating quote as madprop Q */
4418 CURMAD('Q', PL_thisclose);
4422 /* special processing based on optype */
4426 /* opval doesn't need a TOKEN since it can already store mp */
4436 if (pl_yylval.opval)
4437 append_madprops(PL_thismad, pl_yylval.opval, 0);
4445 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4458 /* remember any fake bracket that lexer is about to discard */
4459 if (PL_lex_brackets == 1 &&
4460 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4463 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4466 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4467 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4470 break; /* don't bother looking for trailing comment */
4479 /* attach a trailing comment to its statement instead of next token */
4483 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4485 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4487 if (*s == '\n' || *s == '#') {
4488 while (s < PL_bufend && *s != '\n')
4492 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4493 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4506 /* Create new token struct. Note: opvals return early above. */
4507 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4514 S_tokenize_use(pTHX_ int is_use, char *s) {
4517 PERL_ARGS_ASSERT_TOKENIZE_USE;
4519 if (PL_expect != XSTATE)
4520 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4521 is_use ? "use" : "no"));
4524 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4525 s = force_version(s, TRUE);
4526 if (*s == ';' || *s == '}'
4527 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4528 start_force(PL_curforce);
4529 NEXTVAL_NEXTTOKE.opval = NULL;
4532 else if (*s == 'v') {
4533 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4534 s = force_version(s, FALSE);
4538 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4539 s = force_version(s, FALSE);
4541 pl_yylval.ival = is_use;
4545 static const char* const exp_name[] =
4546 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4547 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4551 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4553 S_word_takes_any_delimeter(char *p, STRLEN len)
4555 return (len == 1 && strchr("msyq", p[0])) ||
4557 (p[0] == 't' && p[1] == 'r') ||
4558 (p[0] == 'q' && strchr("qwxr", p[1]))));
4564 Works out what to call the token just pulled out of the input
4565 stream. The yacc parser takes care of taking the ops we return and
4566 stitching them into a tree.
4569 The type of the next token
4572 Switch based on the current state:
4573 - if we already built the token before, use it
4574 - if we have a case modifier in a string, deal with that
4575 - handle other cases of interpolation inside a string
4576 - scan the next line if we are inside a format
4577 In the normal state switch on the next character:
4579 if alphabetic, go to key lookup
4580 unrecoginized character - croak
4581 - 0/4/26: handle end-of-line or EOF
4582 - cases for whitespace
4583 - \n and #: handle comments and line numbers
4584 - various operators, brackets and sigils
4587 - 'v': vstrings (or go to key lookup)
4588 - 'x' repetition operator (or go to key lookup)
4589 - other ASCII alphanumerics (key lookup begins here):
4592 scan built-in keyword (but do nothing with it yet)
4593 check for statement label
4594 check for lexical subs
4595 goto just_a_word if there is one
4596 see whether built-in keyword is overridden
4597 switch on keyword number:
4598 - default: just_a_word:
4599 not a built-in keyword; handle bareword lookup
4600 disambiguate between method and sub call
4601 fall back to bareword
4602 - cases for built-in keywords
4607 #pragma segment Perl_yylex
4613 char *s = PL_bufptr;
4620 /* orig_keyword, gvp, and gv are initialized here because
4621 * jump to the label just_a_word_zero can bypass their
4622 * initialization later. */
4623 I32 orig_keyword = 0;
4628 SV* tmp = newSVpvs("");
4629 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4630 (IV)CopLINE(PL_curcop),
4631 lex_state_names[PL_lex_state],
4632 exp_name[PL_expect],
4633 pv_display(tmp, s, strlen(s), 0, 60));
4637 switch (PL_lex_state) {
4639 case LEX_NORMAL: /* Some compilers will produce faster */
4640 case LEX_INTERPNORMAL: /* code if we comment these out. */
4644 /* when we've already built the next token, just pull it out of the queue */
4648 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4650 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4651 PL_nexttoke[PL_lasttoke].next_mad = 0;
4652 if (PL_thismad && PL_thismad->mad_key == '_') {
4653 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4654 PL_thismad->mad_val = 0;
4655 mad_free(PL_thismad);
4660 PL_lex_state = PL_lex_defer;
4661 PL_expect = PL_lex_expect;
4662 PL_lex_defer = LEX_NORMAL;
4663 if (!PL_nexttoke[PL_lasttoke].next_type)
4668 pl_yylval = PL_nextval[PL_nexttoke];
4670 PL_lex_state = PL_lex_defer;
4671 PL_expect = PL_lex_expect;
4672 PL_lex_defer = LEX_NORMAL;
4678 next_type = PL_nexttoke[PL_lasttoke].next_type;
4680 next_type = PL_nexttype[PL_nexttoke];
4682 if (next_type & (7<<24)) {
4683 if (next_type & (1<<24)) {
4684 if (PL_lex_brackets > 100)
4685 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4686 PL_lex_brackstack[PL_lex_brackets++] =
4687 (char) ((next_type >> 16) & 0xff);
4689 if (next_type & (2<<24))
4690 PL_lex_allbrackets++;
4691 if (next_type & (4<<24))
4692 PL_lex_allbrackets--;
4693 next_type &= 0xffff;
4695 return REPORT(next_type == 'p' ? pending_ident() : next_type);
4698 /* interpolated case modifiers like \L \U, including \Q and \E.
4699 when we get here, PL_bufptr is at the \
4701 case LEX_INTERPCASEMOD:
4703 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4705 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4706 PL_bufptr, PL_bufend, *PL_bufptr);
4708 /* handle \E or end of string */
4709 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4711 if (PL_lex_casemods) {
4712 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4713 PL_lex_casestack[PL_lex_casemods] = '\0';
4715 if (PL_bufptr != PL_bufend
4716 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4717 || oldmod == 'F')) {
4719 PL_lex_state = LEX_INTERPCONCAT;
4722 PL_thistoken = newSVpvs("\\E");
4725 PL_lex_allbrackets--;
4728 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4729 /* Got an unpaired \E */
4730 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4731 "Useless use of \\E");
4734 while (PL_bufptr != PL_bufend &&
4735 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4738 PL_thiswhite = newSVpvs("");
4739 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4744 if (PL_bufptr != PL_bufend)
4747 PL_lex_state = LEX_INTERPCONCAT;
4751 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4752 "### Saw case modifier\n"); });
4754 if (s[1] == '\\' && s[2] == 'E') {
4758 PL_thiswhite = newSVpvs("");
4759 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4763 PL_lex_state = LEX_INTERPCONCAT;
4768 if (!PL_madskills) /* when just compiling don't need correct */
4769 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4770 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
4771 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4772 (strchr(PL_lex_casestack, 'L')
4773 || strchr(PL_lex_casestack, 'U')
4774 || strchr(PL_lex_casestack, 'F'))) {
4775 PL_lex_casestack[--PL_lex_casemods] = '\0';
4776 PL_lex_allbrackets--;
4779 if (PL_lex_casemods > 10)
4780 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4781 PL_lex_casestack[PL_lex_casemods++] = *s;
4782 PL_lex_casestack[PL_lex_casemods] = '\0';
4783 PL_lex_state = LEX_INTERPCONCAT;
4784 start_force(PL_curforce);
4785 NEXTVAL_NEXTTOKE.ival = 0;
4786 force_next((2<<24)|'(');
4787 start_force(PL_curforce);
4789 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4791 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4793 NEXTVAL_NEXTTOKE.ival = OP_LC;
4795 NEXTVAL_NEXTTOKE.ival = OP_UC;
4797 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4799 NEXTVAL_NEXTTOKE.ival = OP_FC;
4801 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4803 SV* const tmpsv = newSVpvs("\\ ");
4804 /* replace the space with the character we want to escape
4806 SvPVX(tmpsv)[1] = *s;
4812 if (PL_lex_starts) {
4818 sv_free(PL_thistoken);
4819 PL_thistoken = newSVpvs("");
4822 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4823 if (PL_lex_casemods == 1 && PL_lex_inpat)
4832 case LEX_INTERPPUSH:
4833 return REPORT(sublex_push());
4835 case LEX_INTERPSTART:
4836 if (PL_bufptr == PL_bufend)
4837 return REPORT(sublex_done());
4838 DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4839 "### Interpolated variable\n"); });
4841 PL_lex_dojoin = (*PL_bufptr == '@');
4842 PL_lex_state = LEX_INTERPNORMAL;
4843 if (PL_lex_dojoin) {
4844 start_force(PL_curforce);
4845 NEXTVAL_NEXTTOKE.ival = 0;
4847 start_force(PL_curforce);
4848 force_ident("\"", '$');
4849 start_force(PL_curforce);
4850 NEXTVAL_NEXTTOKE.ival = 0;
4852 start_force(PL_curforce);
4853 NEXTVAL_NEXTTOKE.ival = 0;
4854 force_next((2<<24)|'(');
4855 start_force(PL_curforce);
4856 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
4859 /* Convert (?{...}) and friends to 'do {...}' */
4860 if (PL_lex_inpat && *PL_bufptr == '(') {
4861 PL_parser->lex_shared->re_eval_start = PL_bufptr;
4863 if (*PL_bufptr != '{')
4865 start_force(PL_curforce);
4866 /* XXX probably need a CURMAD(something) here */
4867 PL_expect = XTERMBLOCK;
4871 if (PL_lex_starts++) {
4876 sv_free(PL_thistoken);
4877 PL_thistoken = newSVpvs("");
4880 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4881 if (!PL_lex_casemods && PL_lex_inpat)
4888 case LEX_INTERPENDMAYBE:
4889 if (intuit_more(PL_bufptr)) {
4890 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
4896 if (PL_lex_dojoin) {
4897 PL_lex_dojoin = FALSE;
4898 PL_lex_state = LEX_INTERPCONCAT;
4902 sv_free(PL_thistoken);
4903 PL_thistoken = newSVpvs("");
4906 PL_lex_allbrackets--;
4909 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4910 && SvEVALED(PL_lex_repl))
4912 if (PL_bufptr != PL_bufend)
4913 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4916 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
4917 re_eval_str. If the here-doc body’s length equals the previous
4918 value of re_eval_start, re_eval_start will now be null. So
4919 check re_eval_str as well. */
4920 if (PL_parser->lex_shared->re_eval_start
4921 || PL_parser->lex_shared->re_eval_str) {
4923 if (*PL_bufptr != ')')
4924 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
4926 /* having compiled a (?{..}) expression, return the original
4927 * text too, as a const */
4928 if (PL_parser->lex_shared->re_eval_str) {
4929 sv = PL_parser->lex_shared->re_eval_str;
4930 PL_parser->lex_shared->re_eval_str = NULL;
4932 PL_bufptr - PL_parser->lex_shared->re_eval_start);
4933 SvPV_shrink_to_cur(sv);
4935 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
4936 PL_bufptr - PL_parser->lex_shared->re_eval_start);
4937 start_force(PL_curforce);
4938 /* XXX probably need a CURMAD(something) here */
4939 NEXTVAL_NEXTTOKE.opval =
4940 (OP*)newSVOP(OP_CONST, 0,
4943 PL_parser->lex_shared->re_eval_start = NULL;
4949 case LEX_INTERPCONCAT:
4951 if (PL_lex_brackets)
4952 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
4953 (long) PL_lex_brackets);
4955 if (PL_bufptr == PL_bufend)
4956 return REPORT(sublex_done());
4958 /* m'foo' still needs to be parsed for possible (?{...}) */
4959 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
4960 SV *sv = newSVsv(PL_linestr);
4962 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4966 s = scan_const(PL_bufptr);
4968 PL_lex_state = LEX_INTERPCASEMOD;
4970 PL_lex_state = LEX_INTERPSTART;
4973 if (s != PL_bufptr) {
4974 start_force(PL_curforce);
4976 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4978 NEXTVAL_NEXTTOKE = pl_yylval;
4981 if (PL_lex_starts++) {
4985 sv_free(PL_thistoken);
4986 PL_thistoken = newSVpvs("");
4989 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4990 if (!PL_lex_casemods && PL_lex_inpat)
5003 s = scan_formline(PL_bufptr);
5004 if (!PL_lex_formbrack)
5014 PL_oldoldbufptr = PL_oldbufptr;
5020 sv_free(PL_thistoken);
5023 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5027 if (isIDFIRST_lazy_if(s,UTF))
5030 SV *dsv = newSVpvs_flags("", SVs_TEMP);
5031 const char *c = UTF ? savepv(sv_uni_display(dsv, newSVpvn_flags(s,
5033 SVs_TEMP | SVf_UTF8),
5034 10, UNI_DISPLAY_ISPRINT))
5035 : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
5036 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
5037 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
5038 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
5046 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %"SVf"<-- HERE near column %d", c, SVfARG(dsv), (int) len + 1);
5050 goto fake_eof; /* emulate EOF on ^D or ^Z */
5056 if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
5059 if (PL_lex_brackets &&
5060 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
5061 yyerror((const char *)
5063 ? "Format not terminated"
5064 : "Missing right curly or square bracket"));
5066 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5067 "### Tokener got EOF\n");
5071 if (s++ < PL_bufend)
5072 goto retry; /* ignore stray nulls */
5075 if (!PL_in_eval && !PL_preambled) {
5076 PL_preambled = TRUE;
5082 /* Generate a string of Perl code to load the debugger.
5083 * If PERL5DB is set, it will return the contents of that,
5084 * otherwise a compile-time require of perl5db.pl. */
5086 const char * const pdb = PerlEnv_getenv("PERL5DB");
5089 sv_setpv(PL_linestr, pdb);
5090 sv_catpvs(PL_linestr,";");
5092 SETERRNO(0,SS_NORMAL);
5093 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
5096 sv_setpvs(PL_linestr,"");
5097 if (PL_preambleav) {
5098 SV **svp = AvARRAY(PL_preambleav);
5099 SV **const end = svp + AvFILLp(PL_preambleav);
5101 sv_catsv(PL_linestr, *svp);
5103 sv_catpvs(PL_linestr, ";");
5105 sv_free(MUTABLE_SV(PL_preambleav));
5106 PL_preambleav = NULL;
5109 sv_catpvs(PL_linestr,
5110 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
5111 if (PL_minus_n || PL_minus_p) {
5112 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
5114 sv_catpvs(PL_linestr,"chomp;");
5117 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
5118 || *PL_splitstr == '"')
5119 && strchr(PL_splitstr + 1, *PL_splitstr))
5120 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
5122 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
5123 bytes can be used as quoting characters. :-) */
5124 const char *splits = PL_splitstr;
5125 sv_catpvs(PL_linestr, "our @F=split(q\0");
5128 if (*splits == '\\')
5129 sv_catpvn(PL_linestr, splits, 1);
5130 sv_catpvn(PL_linestr, splits, 1);
5131 } while (*splits++);
5132 /* This loop will embed the trailing NUL of
5133 PL_linestr as the last thing it does before
5135 sv_catpvs(PL_linestr, ");");
5139 sv_catpvs(PL_linestr,"our @F=split(' ');");
5142 sv_catpvs(PL_linestr, "\n");
5143 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5144 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5145 PL_last_lop = PL_last_uni = NULL;
5146 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5147 update_debugger_info(PL_linestr, NULL, 0);
5152 bof = PL_rsfp ? TRUE : FALSE;
5155 fake_eof = LEX_FAKE_EOF;
5157 PL_bufptr = PL_bufend;
5158 COPLINE_INC_WITH_HERELINES;
5159 if (!lex_next_chunk(fake_eof)) {
5160 CopLINE_dec(PL_curcop);
5162 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
5164 CopLINE_dec(PL_curcop);
5167 PL_realtokenstart = -1;
5170 /* If it looks like the start of a BOM or raw UTF-16,
5171 * check if it in fact is. */
5172 if (bof && PL_rsfp &&
5177 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
5178 bof = (offset == (Off_t)SvCUR(PL_linestr));
5179 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5180 /* offset may include swallowed CR */
5182 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
5185 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5186 s = swallow_bom((U8*)s);
5189 if (PL_parser->in_pod) {
5190 /* Incest with pod. */
5193 sv_catsv(PL_thiswhite, PL_linestr);
5195 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
5196 sv_setpvs(PL_linestr, "");
5197 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5198 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5199 PL_last_lop = PL_last_uni = NULL;
5200 PL_parser->in_pod = 0;
5203 if (PL_rsfp || PL_parser->filtered)
5205 } while (PL_parser->in_pod);
5206 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5207 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5208 PL_last_lop = PL_last_uni = NULL;
5209 if (CopLINE(PL_curcop) == 1) {
5210 while (s < PL_bufend && isSPACE(*s))
5212 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5216 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5220 if (*s == '#' && *(s+1) == '!')
5222 #ifdef ALTERNATE_SHEBANG
5224 static char const as[] = ALTERNATE_SHEBANG;
5225 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5226 d = s + (sizeof(as) - 1);
5228 #endif /* ALTERNATE_SHEBANG */
5237 while (*d && !isSPACE(*d))
5241 #ifdef ARG_ZERO_IS_SCRIPT
5242 if (ipathend > ipath) {
5244 * HP-UX (at least) sets argv[0] to the script name,
5245 * which makes $^X incorrect. And Digital UNIX and Linux,
5246 * at least, set argv[0] to the basename of the Perl
5247 * interpreter. So, having found "#!", we'll set it right.
5249 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5251 assert(SvPOK(x) || SvGMAGICAL(x));
5252 if (sv_eq(x, CopFILESV(PL_curcop))) {
5253 sv_setpvn(x, ipath, ipathend - ipath);
5259 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
5260 const char * const lstart = SvPV_const(x,llen);
5262 bstart += blen - llen;
5263 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5264 sv_setpvn(x, ipath, ipathend - ipath);
5269 TAINT_NOT; /* $^X is always tainted, but that's OK */
5271 #endif /* ARG_ZERO_IS_SCRIPT */
5276 d = instr(s,"perl -");
5278 d = instr(s,"perl");
5280 /* avoid getting into infinite loops when shebang
5281 * line contains "Perl" rather than "perl" */
5283 for (d = ipathend-4; d >= ipath; --d) {
5284 if ((*d == 'p' || *d == 'P')
5285 && !ibcmp(d, "perl", 4))
5295 #ifdef ALTERNATE_SHEBANG
5297 * If the ALTERNATE_SHEBANG on this system starts with a
5298 * character that can be part of a Perl expression, then if
5299 * we see it but not "perl", we're probably looking at the
5300 * start of Perl code, not a request to hand off to some
5301 * other interpreter. Similarly, if "perl" is there, but
5302 * not in the first 'word' of the line, we assume the line
5303 * contains the start of the Perl program.
5305 if (d && *s != '#') {
5306 const char *c = ipath;
5307 while (*c && !strchr("; \t\r\n\f\v#", *c))
5310 d = NULL; /* "perl" not in first word; ignore */
5312 *s = '#'; /* Don't try to parse shebang line */
5314 #endif /* ALTERNATE_SHEBANG */
5319 !instr(s,"indir") &&
5320 instr(PL_origargv[0],"perl"))
5327 while (s < PL_bufend && isSPACE(*s))
5329 if (s < PL_bufend) {
5330 Newx(newargv,PL_origargc+3,char*);
5332 while (s < PL_bufend && !isSPACE(*s))
5335 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5338 newargv = PL_origargv;
5341 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5343 Perl_croak(aTHX_ "Can't exec %s", ipath);
5346 while (*d && !isSPACE(*d))
5348 while (SPACE_OR_TAB(*d))
5352 const bool switches_done = PL_doswitches;
5353 const U32 oldpdb = PL_perldb;
5354 const bool oldn = PL_minus_n;
5355 const bool oldp = PL_minus_p;
5359 bool baduni = FALSE;
5361 const char *d2 = d1 + 1;
5362 if (parse_unicode_opts((const char **)&d2)
5366 if (baduni || *d1 == 'M' || *d1 == 'm') {
5367 const char * const m = d1;
5368 while (*d1 && !isSPACE(*d1))
5370 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5373 d1 = moreswitches(d1);
5375 if (PL_doswitches && !switches_done) {
5376 int argc = PL_origargc;
5377 char **argv = PL_origargv;
5380 } while (argc && argv[0][0] == '-' && argv[0][1]);
5381 init_argv_symbols(argc,argv);
5383 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5384 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5385 /* if we have already added "LINE: while (<>) {",
5386 we must not do it again */
5388 sv_setpvs(PL_linestr, "");
5389 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5390 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5391 PL_last_lop = PL_last_uni = NULL;
5392 PL_preambled = FALSE;
5393 if (PERLDB_LINE || PERLDB_SAVESRC)
5394 (void)gv_fetchfile(PL_origfilename);
5401 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5402 PL_lex_state = LEX_FORMLINE;
5403 start_force(PL_curforce);
5404 NEXTVAL_NEXTTOKE.ival = 0;
5405 force_next(FORMRBRACK);
5410 #ifdef PERL_STRICT_CR
5411 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5413 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5415 case ' ': case '\t': case '\f': case 013:
5417 PL_realtokenstart = -1;
5420 PL_thiswhite = newSVpvs("");
5421 sv_catpvn(PL_thiswhite, s, 1);
5429 PL_realtokenstart = -1;
5433 if (PL_lex_state != LEX_NORMAL ||
5434 (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5435 if (*s == '#' && s == PL_linestart && PL_in_eval
5436 && !PL_rsfp && !PL_parser->filtered) {
5437 /* handle eval qq[#line 1 "foo"\n ...] */
5438 CopLINE_dec(PL_curcop);
5441 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5443 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5447 const bool in_comment = *s == '#';
5449 while (d < PL_bufend && *d != '\n')
5453 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5454 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5458 PL_thiswhite = newSVpvn(s, d - s);
5461 if (in_comment && d == PL_bufend
5462 && PL_lex_state == LEX_INTERPNORMAL
5463 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5464 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5467 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5468 PL_lex_state = LEX_FORMLINE;
5469 start_force(PL_curforce);
5470 NEXTVAL_NEXTTOKE.ival = 0;
5471 force_next(FORMRBRACK);
5477 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5478 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5481 TOKEN(PEG); /* make sure any #! line is accessible */
5486 /* if (PL_madskills && PL_lex_formbrack) { */
5488 while (d < PL_bufend && *d != '\n')
5492 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5493 Perl_croak(aTHX_ "panic: input overflow");
5494 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5496 PL_thiswhite = newSVpvs("");
5497 if (CopLINE(PL_curcop) == 1) {
5498 sv_setpvs(PL_thiswhite, "");
5501 sv_catpvn(PL_thiswhite, s, d - s);
5515 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5523 while (s < PL_bufend && SPACE_OR_TAB(*s))
5526 if (strnEQ(s,"=>",2)) {
5527 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
5528 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5529 OPERATOR('-'); /* unary minus */
5531 PL_last_uni = PL_oldbufptr;
5533 case 'r': ftst = OP_FTEREAD; break;
5534 case 'w': ftst = OP_FTEWRITE; break;
5535 case 'x': ftst = OP_FTEEXEC; break;
5536 case 'o': ftst = OP_FTEOWNED; break;
5537 case 'R': ftst = OP_FTRREAD; break;
5538 case 'W': ftst = OP_FTRWRITE; break;
5539 case 'X': ftst = OP_FTREXEC; break;
5540 case 'O': ftst = OP_FTROWNED; break;
5541 case 'e': ftst = OP_FTIS; break;
5542 case 'z': ftst = OP_FTZERO; break;
5543 case 's': ftst = OP_FTSIZE; break;
5544 case 'f': ftst = OP_FTFILE; break;
5545 case 'd': ftst = OP_FTDIR; break;
5546 case 'l': ftst = OP_FTLINK; break;
5547 case 'p': ftst = OP_FTPIPE; break;
5548 case 'S': ftst = OP_FTSOCK; break;
5549 case 'u': ftst = OP_FTSUID; break;
5550 case 'g': ftst = OP_FTSGID; break;
5551 case 'k': ftst = OP_FTSVTX; break;
5552 case 'b': ftst = OP_FTBLK; break;
5553 case 'c': ftst = OP_FTCHR; break;
5554 case 't': ftst = OP_FTTTY; break;
5555 case 'T': ftst = OP_FTTEXT; break;
5556 case 'B': ftst = OP_FTBINARY; break;
5557 case 'M': case 'A': case 'C':
5558 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5560 case 'M': ftst = OP_FTMTIME; break;
5561 case 'A': ftst = OP_FTATIME; break;
5562 case 'C': ftst = OP_FTCTIME; break;
5570 PL_last_lop_op = (OPCODE)ftst;
5571 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5572 "### Saw file test %c\n", (int)tmp);
5577 /* Assume it was a minus followed by a one-letter named
5578 * subroutine call (or a -bareword), then. */
5579 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5580 "### '-%c' looked like a file test but was not\n",
5587 const char tmp = *s++;
5590 if (PL_expect == XOPERATOR)
5595 else if (*s == '>') {
5598 if (isIDFIRST_lazy_if(s,UTF)) {
5599 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5607 if (PL_expect == XOPERATOR) {
5608 if (*s == '=' && !PL_lex_allbrackets &&
5609 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5616 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5618 OPERATOR('-'); /* unary minus */
5624 const char tmp = *s++;
5627 if (PL_expect == XOPERATOR)
5632 if (PL_expect == XOPERATOR) {
5633 if (*s == '=' && !PL_lex_allbrackets &&
5634 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5641 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5648 if (PL_expect != XOPERATOR) {
5649 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5650 PL_expect = XOPERATOR;
5651 force_ident(PL_tokenbuf, '*');
5659 if (*s == '=' && !PL_lex_allbrackets &&
5660 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5666 if (*s == '=' && !PL_lex_allbrackets &&
5667 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5674 if (PL_expect == XOPERATOR) {
5675 if (s[1] == '=' && !PL_lex_allbrackets &&
5676 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5681 PL_tokenbuf[0] = '%';
5682 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5683 sizeof PL_tokenbuf - 1, FALSE);
5684 if (!PL_tokenbuf[1]) {
5687 PL_expect = XOPERATOR;
5688 force_ident_maybe_lex('%');
5692 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5693 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5698 if (PL_lex_brackets > 100)
5699 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5700 PL_lex_brackstack[PL_lex_brackets++] = 0;
5701 PL_lex_allbrackets++;
5703 const char tmp = *s++;
5708 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5710 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5718 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5725 goto just_a_word_zero_gv;
5728 switch (PL_expect) {
5734 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5736 PL_bufptr = s; /* update in case we back off */
5739 "Use of := for an empty attribute list is not allowed");
5746 PL_expect = XTERMBLOCK;
5749 stuffstart = s - SvPVX(PL_linestr) - 1;
5753 while (isIDFIRST_lazy_if(s,UTF)) {
5756 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5757 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5758 if (tmp < 0) tmp = -tmp;
5773 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5775 d = scan_str(d,TRUE,TRUE,FALSE, FALSE);
5777 /* MUST advance bufptr here to avoid bogus
5778 "at end of line" context messages from yyerror().
5780 PL_bufptr = s + len;
5781 yyerror("Unterminated attribute parameter in attribute list");
5785 return REPORT(0); /* EOF indicator */
5789 sv_catsv(sv, PL_lex_stuff);
5790 attrs = op_append_elem(OP_LIST, attrs,
5791 newSVOP(OP_CONST, 0, sv));
5792 SvREFCNT_dec(PL_lex_stuff);
5793 PL_lex_stuff = NULL;
5796 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5798 if (PL_in_my == KEY_our) {
5799 deprecate(":unique");
5802 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5805 /* NOTE: any CV attrs applied here need to be part of
5806 the CVf_BUILTIN_ATTRS define in cv.h! */
5807 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5809 CvLVALUE_on(PL_compcv);
5811 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5813 deprecate(":locked");
5815 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5817 CvMETHOD_on(PL_compcv);
5819 /* After we've set the flags, it could be argued that
5820 we don't need to do the attributes.pm-based setting
5821 process, and shouldn't bother appending recognized
5822 flags. To experiment with that, uncomment the
5823 following "else". (Note that's already been
5824 uncommented. That keeps the above-applied built-in
5825 attributes from being intercepted (and possibly
5826 rejected) by a package's attribute routines, but is
5827 justified by the performance win for the common case
5828 of applying only built-in attributes.) */
5830 attrs = op_append_elem(OP_LIST, attrs,
5831 newSVOP(OP_CONST, 0,
5835 if (*s == ':' && s[1] != ':')
5838 break; /* require real whitespace or :'s */
5839 /* XXX losing whitespace on sequential attributes here */
5843 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5844 if (*s != ';' && *s != '}' && *s != tmp
5845 && (tmp != '=' || *s != ')')) {
5846 const char q = ((*s == '\'') ? '"' : '\'');
5847 /* If here for an expression, and parsed no attrs, back
5849 if (tmp == '=' && !attrs) {
5853 /* MUST advance bufptr here to avoid bogus "at end of line"
5854 context messages from yyerror().
5857 yyerror( (const char *)
5859 ? Perl_form(aTHX_ "Invalid separator character "
5860 "%c%c%c in attribute list", q, *s, q)
5861 : "Unterminated attribute list" ) );
5869 start_force(PL_curforce);
5870 NEXTVAL_NEXTTOKE.opval = attrs;
5871 CURMAD('_', PL_nextwhite);
5876 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5877 (s - SvPVX(PL_linestr)) - stuffstart);
5882 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5886 PL_lex_allbrackets--;
5890 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5891 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
5895 PL_lex_allbrackets++;
5898 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5904 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5907 PL_lex_allbrackets--;
5913 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5916 if (PL_lex_brackets <= 0)
5917 yyerror("Unmatched right square bracket");
5920 PL_lex_allbrackets--;
5921 if (PL_lex_state == LEX_INTERPNORMAL) {
5922 if (PL_lex_brackets == 0) {
5923 if (*s == '-' && s[1] == '>')
5924 PL_lex_state = LEX_INTERPENDMAYBE;
5925 else if (*s != '[' && *s != '{')
5926 PL_lex_state = LEX_INTERPEND;
5933 if (PL_lex_brackets > 100) {
5934 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5936 switch (PL_expect) {
5938 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5939 PL_lex_allbrackets++;
5940 OPERATOR(HASHBRACK);
5942 while (s < PL_bufend && SPACE_OR_TAB(*s))
5945 PL_tokenbuf[0] = '\0';
5946 if (d < PL_bufend && *d == '-') {
5947 PL_tokenbuf[0] = '-';
5949 while (d < PL_bufend && SPACE_OR_TAB(*d))
5952 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5953 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5955 while (d < PL_bufend && SPACE_OR_TAB(*d))
5958 const char minus = (PL_tokenbuf[0] == '-');
5959 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5967 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5968 PL_lex_allbrackets++;
5973 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5974 PL_lex_allbrackets++;
5979 if (PL_oldoldbufptr == PL_last_lop)
5980 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5982 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5983 PL_lex_allbrackets++;
5986 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5988 /* This hack is to get the ${} in the message. */
5990 yyerror("syntax error");
5993 OPERATOR(HASHBRACK);
5995 /* This hack serves to disambiguate a pair of curlies
5996 * as being a block or an anon hash. Normally, expectation
5997 * determines that, but in cases where we're not in a
5998 * position to expect anything in particular (like inside
5999 * eval"") we have to resolve the ambiguity. This code
6000 * covers the case where the first term in the curlies is a
6001 * quoted string. Most other cases need to be explicitly
6002 * disambiguated by prepending a "+" before the opening
6003 * curly in order to force resolution as an anon hash.
6005 * XXX should probably propagate the outer expectation
6006 * into eval"" to rely less on this hack, but that could
6007 * potentially break current behavior of eval"".
6011 if (*s == '\'' || *s == '"' || *s == '`') {
6012 /* common case: get past first string, handling escapes */
6013 for (t++; t < PL_bufend && *t != *s;)
6014 if (*t++ == '\\' && (*t == '\\' || *t == *s))
6018 else if (*s == 'q') {
6021 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6022 && !isWORDCHAR(*t))))
6024 /* skip q//-like construct */
6026 char open, close, term;
6029 while (t < PL_bufend && isSPACE(*t))
6031 /* check for q => */
6032 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6033 OPERATOR(HASHBRACK);
6037 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
6041 for (t++; t < PL_bufend; t++) {
6042 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6044 else if (*t == open)
6048 for (t++; t < PL_bufend; t++) {
6049 if (*t == '\\' && t+1 < PL_bufend)
6051 else if (*t == close && --brackets <= 0)
6053 else if (*t == open)
6060 /* skip plain q word */
6061 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
6064 else if (isWORDCHAR_lazy_if(t,UTF)) {
6066 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
6069 while (t < PL_bufend && isSPACE(*t))
6071 /* if comma follows first term, call it an anon hash */
6072 /* XXX it could be a comma expression with loop modifiers */
6073 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6074 || (*t == '=' && t[1] == '>')))
6075 OPERATOR(HASHBRACK);
6076 if (PL_expect == XREF)
6079 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6085 pl_yylval.ival = CopLINE(PL_curcop);
6086 if (isSPACE(*s) || *s == '#')
6087 PL_copline = NOLINE; /* invalidate current command line number */
6088 TOKEN(formbrack ? '=' : '{');
6090 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6094 if (PL_lex_brackets <= 0)
6095 yyerror("Unmatched right curly bracket");
6097 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6098 PL_lex_allbrackets--;
6099 if (PL_lex_state == LEX_INTERPNORMAL) {
6100 if (PL_lex_brackets == 0) {
6101 if (PL_expect & XFAKEBRACK) {
6102 PL_expect &= XENUMMASK;
6103 PL_lex_state = LEX_INTERPEND;
6108 PL_thiswhite = newSVpvs("");
6109 sv_catpvs(PL_thiswhite,"}");
6112 return yylex(); /* ignore fake brackets */
6114 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6115 && SvEVALED(PL_lex_repl))
6116 PL_lex_state = LEX_INTERPEND;
6117 else if (*s == '-' && s[1] == '>')
6118 PL_lex_state = LEX_INTERPENDMAYBE;
6119 else if (*s != '[' && *s != '{')
6120 PL_lex_state = LEX_INTERPEND;
6123 if (PL_expect & XFAKEBRACK) {
6124 PL_expect &= XENUMMASK;
6126 return yylex(); /* ignore fake brackets */
6128 start_force(PL_curforce);
6130 curmad('X', newSVpvn(s-1,1));
6131 CURMAD('_', PL_thiswhite);
6133 force_next(formbrack ? '.' : '}');
6134 if (formbrack) LEAVE;
6136 if (PL_madskills && !PL_thistoken)
6137 PL_thistoken = newSVpvs("");
6139 if (formbrack == 2) { /* means . where arguments were expected */
6140 start_force(PL_curforce);
6148 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6149 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6156 if (PL_expect == XOPERATOR) {
6157 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
6158 && isIDFIRST_lazy_if(s,UTF))
6160 CopLINE_dec(PL_curcop);
6161 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6162 CopLINE_inc(PL_curcop);
6164 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6165 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6172 PL_tokenbuf[0] = '&';
6173 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf + 1,
6174 sizeof PL_tokenbuf - 1, TRUE);
6175 if (PL_tokenbuf[1]) {
6176 PL_expect = XOPERATOR;
6177 force_ident_maybe_lex('&');
6181 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6187 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6188 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6195 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6196 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6204 const char tmp = *s++;
6206 if (!PL_lex_allbrackets &&
6207 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6214 if (!PL_lex_allbrackets &&
6215 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
6223 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6224 && strchr("+-*/%.^&|<",tmp))
6225 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6226 "Reversed %c= operator",(int)tmp);
6228 if (PL_expect == XSTATE && isALPHA(tmp) &&
6229 (s == PL_linestart+1 || s[-2] == '\n') )
6231 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6232 || PL_lex_state != LEX_NORMAL) {
6237 if (strnEQ(s,"=cut",4)) {
6253 PL_thiswhite = newSVpvs("");
6254 sv_catpvn(PL_thiswhite, PL_linestart,
6255 PL_bufend - PL_linestart);
6259 PL_parser->in_pod = 1;
6263 if (PL_expect == XBLOCK) {
6265 #ifdef PERL_STRICT_CR
6266 while (SPACE_OR_TAB(*t))
6268 while (SPACE_OR_TAB(*t) || *t == '\r')
6271 if (*t == '\n' || *t == '#') {
6274 SAVEI8(PL_parser->form_lex_state);
6275 SAVEI32(PL_lex_formbrack);
6276 PL_parser->form_lex_state = PL_lex_state;
6277 PL_lex_formbrack = PL_lex_brackets + 1;
6281 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6290 const char tmp = *s++;
6292 /* was this !=~ where !~ was meant?
6293 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6295 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6296 const char *t = s+1;
6298 while (t < PL_bufend && isSPACE(*t))
6301 if (*t == '/' || *t == '?' ||
6302 ((*t == 'm' || *t == 's' || *t == 'y')
6303 && !isWORDCHAR(t[1])) ||
6304 (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6305 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6306 "!=~ should be !~");
6308 if (!PL_lex_allbrackets &&
6309 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6321 if (PL_expect != XOPERATOR) {
6322 if (s[1] != '<' && !strchr(s,'>'))
6325 s = scan_heredoc(s);
6327 s = scan_inputsymbol(s);
6328 PL_expect = XOPERATOR;
6329 TOKEN(sublex_start());
6335 if (*s == '=' && !PL_lex_allbrackets &&
6336 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6340 SHop(OP_LEFT_SHIFT);
6345 if (!PL_lex_allbrackets &&
6346 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6353 if (!PL_lex_allbrackets &&
6354 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6362 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6370 const char tmp = *s++;
6372 if (*s == '=' && !PL_lex_allbrackets &&
6373 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6377 SHop(OP_RIGHT_SHIFT);
6379 else if (tmp == '=') {
6380 if (!PL_lex_allbrackets &&
6381 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6389 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6398 if (PL_expect == XOPERATOR) {
6399 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6400 return deprecate_commaless_var_list();
6404 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6405 PL_tokenbuf[0] = '@';
6406 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6407 sizeof PL_tokenbuf - 1, FALSE);
6408 if (PL_expect == XOPERATOR)
6409 no_op("Array length", s);
6410 if (!PL_tokenbuf[1])
6412 PL_expect = XOPERATOR;
6413 force_ident_maybe_lex('#');
6417 PL_tokenbuf[0] = '$';
6418 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6419 sizeof PL_tokenbuf - 1, FALSE);
6420 if (PL_expect == XOPERATOR)
6422 if (!PL_tokenbuf[1]) {
6424 yyerror("Final $ should be \\$ or $name");
6430 const char tmp = *s;
6431 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6434 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6435 && intuit_more(s)) {
6437 PL_tokenbuf[0] = '@';
6438 if (ckWARN(WARN_SYNTAX)) {
6441 while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
6444 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6445 while (t < PL_bufend && *t != ']')
6447 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6448 "Multidimensional syntax %.*s not supported",
6449 (int)((t - PL_bufptr) + 1), PL_bufptr);
6453 else if (*s == '{') {
6455 PL_tokenbuf[0] = '%';
6456 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6457 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6459 char tmpbuf[sizeof PL_tokenbuf];
6462 } while (isSPACE(*t));
6463 if (isIDFIRST_lazy_if(t,UTF)) {
6465 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6470 && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6471 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6472 "You need to quote \"%"SVf"\"",
6473 SVfARG(newSVpvn_flags(tmpbuf, len,
6474 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
6480 PL_expect = XOPERATOR;
6481 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6482 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6483 if (!islop || PL_last_lop_op == OP_GREPSTART)
6484 PL_expect = XOPERATOR;
6485 else if (strchr("$@\"'`q", *s))
6486 PL_expect = XTERM; /* e.g. print $fh "foo" */
6487 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6488 PL_expect = XTERM; /* e.g. print $fh &sub */
6489 else if (isIDFIRST_lazy_if(s,UTF)) {
6490 char tmpbuf[sizeof PL_tokenbuf];
6492 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6493 if ((t2 = keyword(tmpbuf, len, 0))) {
6494 /* binary operators exclude handle interpretations */
6506 PL_expect = XTERM; /* e.g. print $fh length() */
6511 PL_expect = XTERM; /* e.g. print $fh subr() */
6514 else if (isDIGIT(*s))
6515 PL_expect = XTERM; /* e.g. print $fh 3 */
6516 else if (*s == '.' && isDIGIT(s[1]))
6517 PL_expect = XTERM; /* e.g. print $fh .3 */
6518 else if ((*s == '?' || *s == '-' || *s == '+')
6519 && !isSPACE(s[1]) && s[1] != '=')
6520 PL_expect = XTERM; /* e.g. print $fh -1 */
6521 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6523 PL_expect = XTERM; /* e.g. print $fh /.../
6524 XXX except DORDOR operator
6526 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6528 PL_expect = XTERM; /* print $fh <<"EOF" */
6531 force_ident_maybe_lex('$');
6535 if (PL_expect == XOPERATOR)
6537 PL_tokenbuf[0] = '@';
6538 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6539 if (!PL_tokenbuf[1]) {
6542 if (PL_lex_state == LEX_NORMAL)
6544 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6546 PL_tokenbuf[0] = '%';
6548 /* Warn about @ where they meant $. */
6549 if (*s == '[' || *s == '{') {
6550 if (ckWARN(WARN_SYNTAX)) {
6551 const char *t = s + 1;
6552 while (*t && (isWORDCHAR_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
6553 t += UTF ? UTF8SKIP(t) : 1;
6554 if (*t == '}' || *t == ']') {
6556 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6557 /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */
6558 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6559 "Scalar value %"SVf" better written as $%"SVf,
6560 SVfARG(newSVpvn_flags(PL_bufptr, (STRLEN)(t-PL_bufptr),
6561 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))),
6562 SVfARG(newSVpvn_flags(PL_bufptr+1, (STRLEN)(t-PL_bufptr-1),
6563 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))));
6568 PL_expect = XOPERATOR;
6569 force_ident_maybe_lex('@');
6572 case '/': /* may be division, defined-or, or pattern */
6573 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6574 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6575 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6580 case '?': /* may either be conditional or pattern */
6581 if (PL_expect == XOPERATOR) {
6584 if (!PL_lex_allbrackets &&
6585 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6589 PL_lex_allbrackets++;
6595 /* A // operator. */
6596 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6597 (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6598 LEX_FAKEEOF_LOGIC)) {
6606 if (*s == '=' && !PL_lex_allbrackets &&
6607 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6616 /* Disable warning on "study /blah/" */
6617 if (PL_oldoldbufptr == PL_last_uni
6618 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6619 || memNE(PL_last_uni, "study", 5)
6620 || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
6624 deprecate("?PATTERN? without explicit operator");
6625 s = scan_pat(s,OP_MATCH);
6626 TERM(sublex_start());
6630 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6631 #ifdef PERL_STRICT_CR
6634 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6636 && (s == PL_linestart || s[-1] == '\n') )
6639 formbrack = 2; /* dot seen where arguments expected */
6642 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6646 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6649 if (!PL_lex_allbrackets &&
6650 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6657 pl_yylval.ival = OPf_SPECIAL;
6663 if (*s == '=' && !PL_lex_allbrackets &&
6664 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6671 case '0': case '1': case '2': case '3': case '4':
6672 case '5': case '6': case '7': case '8': case '9':
6673 s = scan_num(s, &pl_yylval);
6674 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6675 if (PL_expect == XOPERATOR)
6680 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
6681 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6682 if (PL_expect == XOPERATOR) {
6683 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6684 return deprecate_commaless_var_list();
6691 pl_yylval.ival = OP_CONST;
6692 TERM(sublex_start());
6695 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
6696 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6697 if (PL_expect == XOPERATOR) {
6698 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6699 return deprecate_commaless_var_list();
6706 pl_yylval.ival = OP_CONST;
6707 /* FIXME. I think that this can be const if char *d is replaced by
6708 more localised variables. */
6709 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6710 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6711 pl_yylval.ival = OP_STRINGIFY;
6715 TERM(sublex_start());
6718 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
6719 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6720 if (PL_expect == XOPERATOR)
6721 no_op("Backticks",s);
6724 readpipe_override();
6725 TERM(sublex_start());
6729 if (PL_lex_inwhat && isDIGIT(*s))
6730 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6732 if (PL_expect == XOPERATOR)
6733 no_op("Backslash",s);
6737 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6738 char *start = s + 2;
6739 while (isDIGIT(*start) || *start == '_')
6741 if (*start == '.' && isDIGIT(start[1])) {
6742 s = scan_num(s, &pl_yylval);
6745 else if ((*start == ':' && start[1] == ':')
6746 || (PL_expect == XSTATE && *start == ':'))
6748 else if (PL_expect == XSTATE) {
6750 while (d < PL_bufend && isSPACE(*d)) d++;
6751 if (*d == ':') goto keylookup;
6753 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6754 if (!isALPHA(*start) && (PL_expect == XTERM
6755 || PL_expect == XREF || PL_expect == XSTATE
6756 || PL_expect == XTERMORDORDOR)) {
6757 GV *const gv = gv_fetchpvn_flags(s, start - s,
6758 UTF ? SVf_UTF8 : 0, SVt_PVCV);
6760 s = scan_num(s, &pl_yylval);
6767 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6820 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6822 /* Some keywords can be followed by any delimiter, including ':' */
6823 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6825 /* x::* is just a word, unless x is "CORE" */
6826 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6830 while (d < PL_bufend && isSPACE(*d))
6831 d++; /* no comments skipped here, or s### is misparsed */
6833 /* Is this a word before a => operator? */
6834 if (*d == '=' && d[1] == '>') {
6837 = (OP*)newSVOP(OP_CONST, 0,
6838 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6839 pl_yylval.opval->op_private = OPpCONST_BARE;
6843 /* Check for plugged-in keyword */
6847 char *saved_bufptr = PL_bufptr;
6849 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6851 if (result == KEYWORD_PLUGIN_DECLINE) {
6852 /* not a plugged-in keyword */
6853 PL_bufptr = saved_bufptr;
6854 } else if (result == KEYWORD_PLUGIN_STMT) {
6855 pl_yylval.opval = o;
6858 return REPORT(PLUGSTMT);
6859 } else if (result == KEYWORD_PLUGIN_EXPR) {
6860 pl_yylval.opval = o;
6862 PL_expect = XOPERATOR;
6863 return REPORT(PLUGEXPR);
6865 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6870 /* Check for built-in keyword */
6871 tmp = keyword(PL_tokenbuf, len, 0);
6873 /* Is this a label? */
6874 if (!anydelim && PL_expect == XSTATE
6875 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6877 pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
6878 pl_yylval.pval[len] = '\0';
6879 pl_yylval.pval[len+1] = UTF ? 1 : 0;
6884 /* Check for lexical sub */
6885 if (PL_expect != XOPERATOR) {
6886 char tmpbuf[sizeof PL_tokenbuf + 1];
6888 Copy(PL_tokenbuf, tmpbuf+1, len, char);
6889 off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
6890 if (off != NOT_IN_PAD) {
6891 assert(off); /* we assume this is boolean-true below */
6892 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
6893 HV * const stash = PAD_COMPNAME_OURSTASH(off);
6894 HEK * const stashname = HvNAME_HEK(stash);
6895 sv = newSVhek(stashname);
6896 sv_catpvs(sv, "::");
6897 sv_catpvn_flags(sv, PL_tokenbuf, len,
6898 (UTF ? SV_CATUTF8 : SV_CATBYTES));
6899 gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
6904 rv2cv_op = newOP(OP_PADANY, 0);
6905 rv2cv_op->op_targ = off;
6906 rv2cv_op = (OP*)newCVREF(0, rv2cv_op);
6907 cv = (CV *)PAD_SV(off);
6915 if (tmp < 0) { /* second-class keyword? */
6916 GV *ogv = NULL; /* override (winner) */
6917 GV *hgv = NULL; /* hidden (loser) */
6918 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6920 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6921 UTF ? SVf_UTF8 : 0, SVt_PVCV)) &&
6924 if (GvIMPORTED_CV(gv))
6926 else if (! CvMETHOD(cv))
6930 (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
6931 UTF ? -(I32)len : (I32)len, FALSE)) &&
6932 (gv = *gvp) && isGV_with_GP(gv) &&
6933 GvCVu(gv) && GvIMPORTED_CV(gv))
6940 tmp = 0; /* overridden by import or by GLOBAL */
6943 && -tmp==KEY_lock /* XXX generalizable kludge */
6946 tmp = 0; /* any sub overrides "weak" keyword */
6948 else { /* no override */
6950 if (tmp == KEY_dump) {
6951 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6952 "dump() better written as CORE::dump()");
6956 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6957 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6958 "Ambiguous call resolved as CORE::%s(), "
6959 "qualify as such or use &",
6967 default: /* not a keyword */
6968 /* Trade off - by using this evil construction we can pull the
6969 variable gv into the block labelled keylookup. If not, then
6970 we have to give it function scope so that the goto from the
6971 earlier ':' case doesn't bypass the initialisation. */
6973 just_a_word_zero_gv:
6985 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6986 const char penultchar =
6987 lastchar && PL_bufptr - 2 >= PL_linestart
6991 SV *nextPL_nextwhite = 0;
6995 /* Get the rest if it looks like a package qualifier */
6997 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6999 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7002 Perl_croak(aTHX_ "Bad name after %"SVf"%s",
7003 SVfARG(newSVpvn_flags(PL_tokenbuf, len,
7004 (UTF ? SVf_UTF8 : 0) | SVs_TEMP )),
7005 *s == '\'' ? "'" : "::");
7010 if (PL_expect == XOPERATOR) {
7011 if (PL_bufptr == PL_linestart) {
7012 CopLINE_dec(PL_curcop);
7013 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7014 CopLINE_inc(PL_curcop);
7017 no_op("Bareword",s);
7020 /* Look for a subroutine with this name in current package,
7021 unless this is a lexical sub, or name is "Foo::",
7022 in which case Foo is a bareword
7023 (and a package name). */
7025 if (len > 2 && !PL_madskills &&
7026 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
7028 if (ckWARN(WARN_BAREWORD)
7029 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7030 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7031 "Bareword \"%"SVf"\" refers to nonexistent package",
7032 SVfARG(newSVpvn_flags(PL_tokenbuf, len,
7033 (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
7035 PL_tokenbuf[len] = '\0';
7041 /* Mustn't actually add anything to a symbol table.
7042 But also don't want to "initialise" any placeholder
7043 constants that might already be there into full
7044 blown PVGVs with attached PVCV. */
7045 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7046 GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
7052 /* if we saw a global override before, get the right name */
7055 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
7056 len ? len : strlen(PL_tokenbuf));
7058 SV * const tmp_sv = sv;
7059 sv = newSVpvs("CORE::GLOBAL::");
7060 sv_catsv(sv, tmp_sv);
7061 SvREFCNT_dec(tmp_sv);
7065 if (PL_madskills && !PL_thistoken) {
7066 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
7067 PL_thistoken = newSVpvn(start,s - start);
7068 PL_realtokenstart = s - SvPVX(PL_linestr);
7072 /* Presume this is going to be a bareword of some sort. */
7074 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
7075 pl_yylval.opval->op_private = OPpCONST_BARE;
7077 /* And if "Foo::", then that's what it certainly is. */
7083 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
7084 const_op->op_private = OPpCONST_BARE;
7085 rv2cv_op = newCVREF(0, const_op);
7086 cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0);
7089 /* See if it's the indirect object for a list operator. */
7091 if (PL_oldoldbufptr &&
7092 PL_oldoldbufptr < PL_bufptr &&
7093 (PL_oldoldbufptr == PL_last_lop
7094 || PL_oldoldbufptr == PL_last_uni) &&
7095 /* NO SKIPSPACE BEFORE HERE! */
7096 (PL_expect == XREF ||
7097 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
7099 bool immediate_paren = *s == '(';
7101 /* (Now we can afford to cross potential line boundary.) */
7102 s = SKIPSPACE2(s,nextPL_nextwhite);
7104 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
7107 /* Two barewords in a row may indicate method call. */
7109 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
7110 (tmp = intuit_method(s, gv, cv))) {
7112 if (tmp == METHOD && !PL_lex_allbrackets &&
7113 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7114 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7118 /* If not a declared subroutine, it's an indirect object. */
7119 /* (But it's an indir obj regardless for sort.) */
7120 /* Also, if "_" follows a filetest operator, it's a bareword */
7123 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
7125 (PL_last_lop_op != OP_MAPSTART &&
7126 PL_last_lop_op != OP_GREPSTART))))
7127 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7128 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
7131 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7136 PL_expect = XOPERATOR;
7139 s = SKIPSPACE2(s,nextPL_nextwhite);
7140 PL_nextwhite = nextPL_nextwhite;
7145 /* Is this a word before a => operator? */
7146 if (*s == '=' && s[1] == '>' && !pkgname) {
7149 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
7150 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
7151 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
7155 /* If followed by a paren, it's certainly a subroutine. */
7160 while (SPACE_OR_TAB(*d))
7162 if (*d == ')' && (sv = cv_const_sv(cv))) {
7169 PL_nextwhite = PL_thiswhite;
7172 start_force(PL_curforce);
7174 NEXTVAL_NEXTTOKE.opval =
7175 off ? rv2cv_op : pl_yylval.opval;
7176 PL_expect = XOPERATOR;
7179 PL_nextwhite = nextPL_nextwhite;
7180 curmad('X', PL_thistoken);
7181 PL_thistoken = newSVpvs("");
7185 op_free(pl_yylval.opval), force_next(PRIVATEREF);
7186 else op_free(rv2cv_op), force_next(WORD);
7191 /* If followed by var or block, call it a method (unless sub) */
7193 if ((*s == '$' || *s == '{') && !cv) {
7195 PL_last_lop = PL_oldbufptr;
7196 PL_last_lop_op = OP_METHOD;
7197 if (!PL_lex_allbrackets &&
7198 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7199 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7203 /* If followed by a bareword, see if it looks like indir obj. */
7206 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
7207 && (tmp = intuit_method(s, gv, cv))) {
7209 if (tmp == METHOD && !PL_lex_allbrackets &&
7210 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7211 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7215 /* Not a method, so call it a subroutine (if defined) */
7218 if (lastchar == '-' && penultchar != '-') {
7219 const SV *tmpsv = newSVpvn_flags( PL_tokenbuf, len ? len : strlen(PL_tokenbuf), (UTF ? SVf_UTF8 : 0) | SVs_TEMP );
7220 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7221 "Ambiguous use of -%"SVf" resolved as -&%"SVf"()",
7222 SVfARG(tmpsv), SVfARG(tmpsv));
7224 /* Check for a constant sub */
7225 if ((sv = cv_const_sv(cv))) {
7228 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7229 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7230 pl_yylval.opval->op_private = OPpCONST_FOLDED;
7231 pl_yylval.opval->op_flags |= OPf_SPECIAL;
7235 op_free(pl_yylval.opval);
7236 pl_yylval.opval = rv2cv_op;
7237 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7238 PL_last_lop = PL_oldbufptr;
7239 PL_last_lop_op = OP_ENTERSUB;
7240 /* Is there a prototype? */
7247 STRLEN protolen = CvPROTOLEN(cv);
7248 const char *proto = CvPROTO(cv);
7252 if ((optional = *proto == ';'))
7255 while (*proto == ';');
7259 *proto == '$' || *proto == '_'
7260 || *proto == '*' || *proto == '+'
7265 *proto == '\\' && proto[1] && proto[2] == '\0'
7268 UNIPROTO(UNIOPSUB,optional);
7269 if (*proto == '\\' && proto[1] == '[') {
7270 const char *p = proto + 2;
7271 while(*p && *p != ']')
7273 if(*p == ']' && !p[1])
7274 UNIPROTO(UNIOPSUB,optional);
7276 if (*proto == '&' && *s == '{') {
7278 sv_setpvs(PL_subname, "__ANON__");
7280 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7281 if (!PL_lex_allbrackets &&
7282 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7283 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7290 PL_nextwhite = PL_thiswhite;
7293 start_force(PL_curforce);
7294 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7297 PL_nextwhite = nextPL_nextwhite;
7298 curmad('X', PL_thistoken);
7299 PL_thistoken = newSVpvs("");
7301 force_next(off ? PRIVATEREF : WORD);
7302 if (!PL_lex_allbrackets &&
7303 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7304 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7309 /* Guess harder when madskills require "best effort". */
7310 if (PL_madskills && (!gv || !GvCVu(gv))) {
7311 int probable_sub = 0;
7312 if (strchr("\"'`$@%0123456789!*+{[<", *s))
7314 else if (isALPHA(*s)) {
7318 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
7319 if (!keyword(tmpbuf, tmplen, 0))
7322 while (d < PL_bufend && isSPACE(*d))
7324 if (*d == '=' && d[1] == '>')
7329 gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
7331 op_free(pl_yylval.opval);
7332 pl_yylval.opval = rv2cv_op;
7333 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7334 PL_last_lop = PL_oldbufptr;
7335 PL_last_lop_op = OP_ENTERSUB;
7336 PL_nextwhite = PL_thiswhite;
7338 start_force(PL_curforce);
7339 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7341 PL_nextwhite = nextPL_nextwhite;
7342 curmad('X', PL_thistoken);
7343 PL_thistoken = newSVpvs("");
7344 force_next(off ? PRIVATEREF : WORD);
7345 if (!PL_lex_allbrackets &&
7346 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7347 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7351 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7353 force_next(off ? PRIVATEREF : WORD);
7354 if (!PL_lex_allbrackets &&
7355 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7356 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7361 /* Call it a bare word */
7363 if (PL_hints & HINT_STRICT_SUBS)
7364 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7367 /* after "print" and similar functions (corresponding to
7368 * "F? L" in opcode.pl), whatever wasn't already parsed as
7369 * a filehandle should be subject to "strict subs".
7370 * Likewise for the optional indirect-object argument to system
7371 * or exec, which can't be a bareword */
7372 if ((PL_last_lop_op == OP_PRINT
7373 || PL_last_lop_op == OP_PRTF
7374 || PL_last_lop_op == OP_SAY
7375 || PL_last_lop_op == OP_SYSTEM
7376 || PL_last_lop_op == OP_EXEC)
7377 && (PL_hints & HINT_STRICT_SUBS))
7378 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7379 if (lastchar != '-') {
7380 if (ckWARN(WARN_RESERVED)) {
7384 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7385 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7393 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
7394 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7395 "Operator or semicolon missing before %c%"SVf,
7396 lastchar, SVfARG(newSVpvn_flags(PL_tokenbuf,
7397 strlen(PL_tokenbuf),
7398 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
7399 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7400 "Ambiguous use of %c resolved as operator %c",
7401 lastchar, lastchar);
7408 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7413 (OP*)newSVOP(OP_CONST, 0,
7414 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7417 case KEY___PACKAGE__:
7419 (OP*)newSVOP(OP_CONST, 0,
7421 ? newSVhek(HvNAME_HEK(PL_curstash))
7428 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7429 const char *pname = "main";
7432 if (PL_tokenbuf[2] == 'D')
7435 PL_curstash ? PL_curstash : PL_defstash;
7436 pname = HvNAME_get(stash);
7437 plen = HvNAMELEN (stash);
7438 if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
7440 gv = gv_fetchpvn_flags(
7441 Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
7442 plen+6, GV_ADD|putf8, SVt_PVIO
7446 GvIOp(gv) = newIO();
7447 IoIFP(GvIOp(gv)) = PL_rsfp;
7448 #if defined(HAS_FCNTL) && defined(F_SETFD)
7450 const int fd = PerlIO_fileno(PL_rsfp);
7451 fcntl(fd,F_SETFD,fd >= 3);
7454 /* Mark this internal pseudo-handle as clean */
7455 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7456 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7457 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7459 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7460 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7461 /* if the script was opened in binmode, we need to revert
7462 * it to text mode for compatibility; but only iff it has CRs
7463 * XXX this is a questionable hack at best. */
7464 if (PL_bufend-PL_bufptr > 2
7465 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7468 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7469 loc = PerlIO_tell(PL_rsfp);
7470 (void)PerlIO_seek(PL_rsfp, 0L, 0);
7473 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7475 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7476 #endif /* NETWARE */
7478 PerlIO_seek(PL_rsfp, loc, 0);
7482 #ifdef PERLIO_LAYERS
7485 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7486 else if (PL_encoding) {
7493 XPUSHs(PL_encoding);
7495 call_method("name", G_SCALAR);
7499 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7500 Perl_form(aTHX_ ":encoding(%"SVf")",
7509 if (PL_realtokenstart >= 0) {
7510 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7512 PL_endwhite = newSVpvs("");
7513 sv_catsv(PL_endwhite, PL_thiswhite);
7515 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7516 PL_realtokenstart = -1;
7518 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7528 FUN0OP(newPVOP(OP_RUNCV,0,NULL));
7537 if (PL_expect == XSTATE) {
7544 if (*s == ':' && s[1] == ':') {
7548 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7549 if ((*s == ':' && s[1] == ':')
7550 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7554 Copy(PL_bufptr, PL_tokenbuf, olen, char);
7558 Perl_croak(aTHX_ "CORE::%"SVf" is not a keyword",
7559 SVfARG(newSVpvn_flags(PL_tokenbuf, len,
7560 (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
7563 else if (tmp == KEY_require || tmp == KEY_do
7565 /* that's a way to remember we saw "CORE::" */
7578 LOP(OP_ACCEPT,XTERM);
7581 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7586 LOP(OP_ATAN2,XTERM);
7592 LOP(OP_BINMODE,XTERM);
7595 LOP(OP_BLESS,XTERM);
7604 /* We have to disambiguate the two senses of
7605 "continue". If the next token is a '{' then
7606 treat it as the start of a continue block;
7607 otherwise treat it as a control operator.
7617 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7627 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7636 if (!PL_cryptseen) {
7637 PL_cryptseen = TRUE;
7641 LOP(OP_CRYPT,XTERM);
7644 LOP(OP_CHMOD,XTERM);
7647 LOP(OP_CHOWN,XTERM);
7650 LOP(OP_CONNECT,XTERM);
7670 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7672 if (len && !keyword(PL_tokenbuf + 1, len, 0)) {
7675 force_ident_maybe_lex('&');
7680 if (orig_keyword == KEY_do) {
7689 PL_hints |= HINT_BLOCK_SCOPE;
7699 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7700 STR_WITH_LEN("NDBM_File::"),
7701 STR_WITH_LEN("DB_File::"),
7702 STR_WITH_LEN("GDBM_File::"),
7703 STR_WITH_LEN("SDBM_File::"),
7704 STR_WITH_LEN("ODBM_File::"),
7706 LOP(OP_DBMOPEN,XTERM);
7712 PL_expect = XOPERATOR;
7713 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7720 pl_yylval.ival = CopLINE(PL_curcop);
7724 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7738 if (*s == '{') { /* block eval */
7739 PL_expect = XTERMBLOCK;
7740 UNIBRACK(OP_ENTERTRY);
7742 else { /* string eval */
7744 UNIBRACK(OP_ENTEREVAL);
7749 UNIBRACK(-OP_ENTEREVAL);
7763 case KEY_endhostent:
7769 case KEY_endservent:
7772 case KEY_endprotoent:
7783 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7785 pl_yylval.ival = CopLINE(PL_curcop);
7787 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7790 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7793 if ((PL_bufend - p) >= 3 &&
7794 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7796 else if ((PL_bufend - p) >= 4 &&
7797 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7800 if (isIDFIRST_lazy_if(p,UTF)) {
7801 p = scan_ident(p, PL_bufend,
7802 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
7806 Perl_croak(aTHX_ "Missing $ on loop variable");
7808 s = SvPVX(PL_linestr) + soff;
7814 LOP(OP_FORMLINE,XTERM);
7823 LOP(OP_FCNTL,XTERM);
7829 LOP(OP_FLOCK,XTERM);
7832 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7837 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7842 LOP(OP_GREPSTART, XREF);
7845 PL_expect = XOPERATOR;
7846 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7861 case KEY_getpriority:
7862 LOP(OP_GETPRIORITY,XTERM);
7864 case KEY_getprotobyname:
7867 case KEY_getprotobynumber:
7868 LOP(OP_GPBYNUMBER,XTERM);
7870 case KEY_getprotoent:
7882 case KEY_getpeername:
7883 UNI(OP_GETPEERNAME);
7885 case KEY_gethostbyname:
7888 case KEY_gethostbyaddr:
7889 LOP(OP_GHBYADDR,XTERM);
7891 case KEY_gethostent:
7894 case KEY_getnetbyname:
7897 case KEY_getnetbyaddr:
7898 LOP(OP_GNBYADDR,XTERM);
7903 case KEY_getservbyname:
7904 LOP(OP_GSBYNAME,XTERM);
7906 case KEY_getservbyport:
7907 LOP(OP_GSBYPORT,XTERM);
7909 case KEY_getservent:
7912 case KEY_getsockname:
7913 UNI(OP_GETSOCKNAME);
7915 case KEY_getsockopt:
7916 LOP(OP_GSOCKOPT,XTERM);
7931 pl_yylval.ival = CopLINE(PL_curcop);
7936 orig_keyword==KEY_glob ? (orig_keyword=0, -OP_GLOB) : OP_GLOB,
7944 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7946 pl_yylval.ival = CopLINE(PL_curcop);
7950 LOP(OP_INDEX,XTERM);
7956 LOP(OP_IOCTL,XTERM);
7968 PL_expect = XOPERATOR;
7969 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7986 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7991 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8005 LOP(OP_LISTEN,XTERM);
8014 s = scan_pat(s,OP_MATCH);
8015 TERM(sublex_start());
8018 LOP(OP_MAPSTART, XREF);
8021 LOP(OP_MKDIR,XTERM);
8024 LOP(OP_MSGCTL,XTERM);
8027 LOP(OP_MSGGET,XTERM);
8030 LOP(OP_MSGRCV,XTERM);
8033 LOP(OP_MSGSND,XTERM);
8038 PL_in_my = (U16)tmp;
8040 if (isIDFIRST_lazy_if(s,UTF)) {
8044 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8045 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
8047 if (!FEATURE_LEXSUBS_IS_ENABLED)
8049 "Experimental \"%s\" subs not enabled",
8050 tmp == KEY_my ? "my" :
8051 tmp == KEY_state ? "state" : "our");
8052 Perl_ck_warner_d(aTHX_
8053 packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
8054 "The lexical_subs feature is experimental");
8057 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
8058 if (!PL_in_my_stash) {
8061 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
8062 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
8065 if (PL_madskills) { /* just add type to declarator token */
8066 sv_catsv(PL_thistoken, PL_nextwhite);
8068 sv_catpvn(PL_thistoken, start, s - start);
8076 PL_expect = XOPERATOR;
8077 s = force_word(s,WORD,TRUE,FALSE,FALSE);
8081 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8086 s = tokenize_use(0, s);
8090 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
8093 if (!PL_lex_allbrackets &&
8094 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8095 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8101 if (isIDFIRST_lazy_if(s,UTF)) {
8103 for (d = s; isWORDCHAR_lazy_if(d,UTF);) {
8104 d += UTF ? UTF8SKIP(d) : 1;
8106 while (UTF8_IS_CONTINUED(*d) && _is_utf8_mark((U8*)d)) {
8107 d += UTF ? UTF8SKIP(d) : 1;
8111 for (t=d; isSPACE(*t);)
8113 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8115 && !(t[0] == '=' && t[1] == '>')
8116 && !(t[0] == ':' && t[1] == ':')
8117 && !keyword(s, d-s, 0)
8119 SV *tmpsv = newSVpvn_flags(s, (STRLEN)(d-s),
8120 SVs_TEMP | (UTF ? SVf_UTF8 : 0));
8121 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8122 "Precedence problem: open %"SVf" should be open(%"SVf")",
8123 SVfARG(tmpsv), SVfARG(tmpsv));
8129 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8131 pl_yylval.ival = OP_OR;
8141 LOP(OP_OPEN_DIR,XTERM);
8144 checkcomma(s,PL_tokenbuf,"filehandle");
8148 checkcomma(s,PL_tokenbuf,"filehandle");
8167 s = force_word(s,WORD,FALSE,TRUE,FALSE);
8169 s = force_strict_version(s);
8170 PL_lex_expect = XBLOCK;
8174 LOP(OP_PIPE_OP,XTERM);
8177 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8180 pl_yylval.ival = OP_CONST;
8181 TERM(sublex_start());
8188 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8191 PL_expect = XOPERATOR;
8192 if (SvCUR(PL_lex_stuff)) {
8193 int warned_comma = !ckWARN(WARN_QW);
8194 int warned_comment = warned_comma;
8195 d = SvPV_force(PL_lex_stuff, len);
8197 for (; isSPACE(*d) && len; --len, ++d)
8202 if (!warned_comma || !warned_comment) {
8203 for (; !isSPACE(*d) && len; --len, ++d) {
8204 if (!warned_comma && *d == ',') {
8205 Perl_warner(aTHX_ packWARN(WARN_QW),
8206 "Possible attempt to separate words with commas");
8209 else if (!warned_comment && *d == '#') {
8210 Perl_warner(aTHX_ packWARN(WARN_QW),
8211 "Possible attempt to put comments in qw() list");
8217 for (; !isSPACE(*d) && len; --len, ++d)
8220 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8221 words = op_append_elem(OP_LIST, words,
8222 newSVOP(OP_CONST, 0, tokeq(sv)));
8227 words = newNULLLIST();
8229 SvREFCNT_dec(PL_lex_stuff);
8230 PL_lex_stuff = NULL;
8232 PL_expect = XOPERATOR;
8233 pl_yylval.opval = sawparens(words);
8238 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8241 pl_yylval.ival = OP_STRINGIFY;
8242 if (SvIVX(PL_lex_stuff) == '\'')
8243 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
8244 TERM(sublex_start());
8247 s = scan_pat(s,OP_QR);
8248 TERM(sublex_start());
8251 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8254 readpipe_override();
8255 TERM(sublex_start());
8262 PL_expect = XOPERATOR;
8264 s = force_version(s, FALSE);
8266 else if (*s != 'v' || !isDIGIT(s[1])
8267 || (s = force_version(s, TRUE), *s == 'v'))
8269 *PL_tokenbuf = '\0';
8270 s = force_word(s,WORD,TRUE,TRUE,FALSE);
8271 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
8272 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8273 GV_ADD | (UTF ? SVf_UTF8 : 0));
8275 yyerror("<> should be quotes");
8277 if (orig_keyword == KEY_require) {
8285 PL_last_uni = PL_oldbufptr;
8286 PL_last_lop_op = OP_REQUIRE;
8288 return REPORT( (int)REQUIRE );
8294 PL_expect = XOPERATOR;
8295 s = force_word(s,WORD,TRUE,FALSE,FALSE);
8299 LOP(OP_RENAME,XTERM);
8308 LOP(OP_RINDEX,XTERM);
8317 UNIDOR(OP_READLINE);
8320 UNIDOR(OP_BACKTICK);
8329 LOP(OP_REVERSE,XTERM);
8332 UNIDOR(OP_READLINK);
8339 if (pl_yylval.opval)
8340 TERM(sublex_start());
8342 TOKEN(1); /* force error */
8345 checkcomma(s,PL_tokenbuf,"filehandle");
8355 LOP(OP_SELECT,XTERM);
8361 LOP(OP_SEMCTL,XTERM);
8364 LOP(OP_SEMGET,XTERM);
8367 LOP(OP_SEMOP,XTERM);
8373 LOP(OP_SETPGRP,XTERM);
8375 case KEY_setpriority:
8376 LOP(OP_SETPRIORITY,XTERM);
8378 case KEY_sethostent:
8384 case KEY_setservent:
8387 case KEY_setprotoent:
8397 LOP(OP_SEEKDIR,XTERM);
8399 case KEY_setsockopt:
8400 LOP(OP_SSOCKOPT,XTERM);
8406 LOP(OP_SHMCTL,XTERM);
8409 LOP(OP_SHMGET,XTERM);
8412 LOP(OP_SHMREAD,XTERM);
8415 LOP(OP_SHMWRITE,XTERM);
8418 LOP(OP_SHUTDOWN,XTERM);
8427 LOP(OP_SOCKET,XTERM);
8429 case KEY_socketpair:
8430 LOP(OP_SOCKPAIR,XTERM);
8433 checkcomma(s,PL_tokenbuf,"subroutine name");
8436 s = force_word(s,WORD,TRUE,TRUE,FALSE);
8440 LOP(OP_SPLIT,XTERM);
8443 LOP(OP_SPRINTF,XTERM);
8446 LOP(OP_SPLICE,XTERM);
8461 LOP(OP_SUBSTR,XTERM);
8467 char * const tmpbuf = PL_tokenbuf + 1;
8468 SSize_t tboffset = 0;
8469 expectation attrful;
8470 bool have_name, have_proto;
8471 const int key = tmp;
8476 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
8477 SV *subtoken = PL_madskills
8478 ? newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr))
8483 s = SKIPSPACE2(s,tmpwhite);
8489 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
8490 (*s == ':' && s[1] == ':'))
8493 SV *nametoke = NULL;
8497 attrful = XATTRBLOCK;
8498 /* remember buffer pos'n for later force_word */
8499 tboffset = s - PL_oldbufptr;
8500 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8504 nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
8507 if (memchr(tmpbuf, ':', len) || key != KEY_sub
8509 PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
8511 sv_setpvn(PL_subname, tmpbuf, len);
8513 sv_setsv(PL_subname,PL_curstname);
8514 sv_catpvs(PL_subname,"::");
8515 sv_catpvn(PL_subname,tmpbuf,len);
8517 if (SvUTF8(PL_linestr))
8518 SvUTF8_on(PL_subname);
8524 CURMAD('X', nametoke);
8525 CURMAD('_', tmpwhite);
8526 force_ident_maybe_lex('&');
8528 s = SKIPSPACE2(d,tmpwhite);
8534 if (key == KEY_my || key == KEY_our || key==KEY_state)
8537 /* diag_listed_as: Missing name in "%s sub" */
8539 "Missing name in \"%s\"", PL_bufptr);
8541 PL_expect = XTERMBLOCK;
8542 attrful = XATTRTERM;
8543 sv_setpvs(PL_subname,"?");
8547 if (key == KEY_format) {
8549 PL_thistoken = subtoken;
8553 (void) force_word(PL_oldbufptr + tboffset, WORD,
8559 /* Look for a prototype */
8562 bool bad_proto = FALSE;
8563 bool in_brackets = FALSE;
8564 char greedy_proto = ' ';
8565 bool proto_after_greedy_proto = FALSE;
8566 bool must_be_last = FALSE;
8567 bool underscore = FALSE;
8568 bool seen_underscore = FALSE;
8569 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
8572 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8574 Perl_croak(aTHX_ "Prototype not terminated");
8575 /* strip spaces and check for bad characters */
8576 d = SvPV(PL_lex_stuff, tmplen);
8578 for (p = d; tmplen; tmplen--, ++p) {
8582 if (warnillegalproto) {
8584 proto_after_greedy_proto = TRUE;
8585 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
8590 if ( !strchr(";@%", *p) )
8597 else if ( *p == ']' ) {
8598 in_brackets = FALSE;
8600 else if ( (*p == '@' || *p == '%') &&
8601 ( tmp < 2 || d[tmp-2] != '\\' ) &&
8603 must_be_last = TRUE;
8606 else if ( *p == '_' ) {
8607 underscore = seen_underscore = TRUE;
8614 if (proto_after_greedy_proto)
8615 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8616 "Prototype after '%c' for %"SVf" : %s",
8617 greedy_proto, SVfARG(PL_subname), d);
8619 SV *dsv = newSVpvs_flags("", SVs_TEMP);
8620 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8621 "Illegal character %sin prototype for %"SVf" : %s",
8622 seen_underscore ? "after '_' " : "",
8624 SvUTF8(PL_lex_stuff)
8625 ? sv_uni_display(dsv,
8626 newSVpvn_flags(d, tmp, SVs_TEMP | SVf_UTF8),
8628 UNI_DISPLAY_ISPRINT)
8629 : pv_pretty(dsv, d, tmp, 60, NULL, NULL,
8630 PERL_PV_ESCAPE_NONASCII));
8632 SvCUR_set(PL_lex_stuff, tmp);
8637 CURMAD('q', PL_thisopen);
8638 CURMAD('_', tmpwhite);
8639 CURMAD('=', PL_thisstuff);
8640 CURMAD('Q', PL_thisclose);
8641 NEXTVAL_NEXTTOKE.opval =
8642 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8643 PL_lex_stuff = NULL;
8646 s = SKIPSPACE2(s,tmpwhite);
8654 if (*s == ':' && s[1] != ':')
8655 PL_expect = attrful;
8656 else if (*s != '{' && key == KEY_sub) {
8658 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8659 else if (*s != ';' && *s != '}')
8660 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8667 curmad('^', newSVpvs(""));
8668 CURMAD('_', tmpwhite);
8672 PL_thistoken = subtoken;
8675 NEXTVAL_NEXTTOKE.opval =
8676 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8677 PL_lex_stuff = NULL;
8683 sv_setpvs(PL_subname, "__ANON__");
8685 sv_setpvs(PL_subname, "__ANON__::__ANON__");
8689 force_ident_maybe_lex('&');
8695 LOP(OP_SYSTEM,XREF);
8698 LOP(OP_SYMLINK,XTERM);
8701 LOP(OP_SYSCALL,XTERM);
8704 LOP(OP_SYSOPEN,XTERM);
8707 LOP(OP_SYSSEEK,XTERM);
8710 LOP(OP_SYSREAD,XTERM);
8713 LOP(OP_SYSWRITE,XTERM);
8718 TERM(sublex_start());
8739 LOP(OP_TRUNCATE,XTERM);
8751 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8753 pl_yylval.ival = CopLINE(PL_curcop);
8757 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8759 pl_yylval.ival = CopLINE(PL_curcop);
8763 LOP(OP_UNLINK,XTERM);
8769 LOP(OP_UNPACK,XTERM);
8772 LOP(OP_UTIME,XTERM);
8778 LOP(OP_UNSHIFT,XTERM);
8781 s = tokenize_use(1, s);
8791 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8793 pl_yylval.ival = CopLINE(PL_curcop);
8797 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8799 pl_yylval.ival = CopLINE(PL_curcop);
8803 PL_hints |= HINT_BLOCK_SCOPE;
8810 LOP(OP_WAITPID,XTERM);
8819 ctl_l[0] = toCTRL('L');
8821 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
8824 /* Make sure $^L is defined */
8825 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
8830 if (PL_expect == XOPERATOR) {
8831 if (*s == '=' && !PL_lex_allbrackets &&
8832 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8840 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8842 pl_yylval.ival = OP_XOR;
8848 #pragma segment Main
8854 Looks up an identifier in the pad or in a package
8857 PRIVATEREF if this is a lexical name.
8858 WORD if this belongs to a package.
8861 if we're in a my declaration
8862 croak if they tried to say my($foo::bar)
8863 build the ops for a my() declaration
8864 if it's an access to a my() variable
8865 build ops for access to a my() variable
8866 if in a dq string, and they've said @foo and we can't find @foo
8868 build ops for a bareword
8872 S_pending_ident(pTHX)
8876 const char pit = (char)pl_yylval.ival;
8877 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8878 /* All routes through this function want to know if there is a colon. */
8879 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8881 DEBUG_T({ PerlIO_printf(Perl_debug_log,
8882 "### Pending identifier '%s'\n", PL_tokenbuf); });
8884 /* if we're in a my(), we can't allow dynamics here.
8885 $foo'bar has already been turned into $foo::bar, so
8886 just check for colons.
8888 if it's a legal name, the OP is a PADANY.
8891 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
8893 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8894 "variable %s in \"our\"",
8895 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
8896 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8900 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8901 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
8902 UTF ? SVf_UTF8 : 0);
8904 pl_yylval.opval = newOP(OP_PADANY, 0);
8905 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8906 UTF ? SVf_UTF8 : 0);
8912 build the ops for accesses to a my() variable.
8917 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8918 UTF ? SVf_UTF8 : 0);
8919 if (tmp != NOT_IN_PAD) {
8920 /* might be an "our" variable" */
8921 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8922 /* build ops for a bareword */
8923 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8924 HEK * const stashname = HvNAME_HEK(stash);
8925 SV * const sym = newSVhek(stashname);
8926 sv_catpvs(sym, "::");
8927 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
8928 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8929 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8933 ? (GV_ADDMULTI | GV_ADDINEVAL)
8936 ((PL_tokenbuf[0] == '$') ? SVt_PV
8937 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8942 pl_yylval.opval = newOP(OP_PADANY, 0);
8943 pl_yylval.opval->op_targ = tmp;
8949 Whine if they've said @foo in a doublequoted string,
8950 and @foo isn't a variable we can find in the symbol
8953 if (ckWARN(WARN_AMBIGUOUS) &&
8954 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8955 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8956 ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8957 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8958 /* DO NOT warn for @- and @+ */
8959 && !( PL_tokenbuf[2] == '\0' &&
8960 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8963 /* Downgraded from fatal to warning 20000522 mjd */
8964 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8965 "Possible unintended interpolation of %"SVf" in string",
8966 SVfARG(newSVpvn_flags(PL_tokenbuf, tokenbuf_len,
8967 SVs_TEMP | ( UTF ? SVf_UTF8 : 0 ))));
8971 /* build ops for a bareword */
8972 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
8973 newSVpvn_flags(PL_tokenbuf + 1,
8975 UTF ? SVf_UTF8 : 0 ));
8976 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8978 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8979 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
8980 | ( UTF ? SVf_UTF8 : 0 ),
8981 ((PL_tokenbuf[0] == '$') ? SVt_PV
8982 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8988 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8992 PERL_ARGS_ASSERT_CHECKCOMMA;
8994 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
8995 if (ckWARN(WARN_SYNTAX)) {
8998 for (w = s+2; *w && level; w++) {
9006 /* the list of chars below is for end of statements or
9007 * block / parens, boolean operators (&&, ||, //) and branch
9008 * constructs (or, and, if, until, unless, while, err, for).
9009 * Not a very solid hack... */
9010 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9011 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9012 "%s (...) interpreted as function",name);
9015 while (s < PL_bufend && isSPACE(*s))
9019 while (s < PL_bufend && isSPACE(*s))
9021 if (isIDFIRST_lazy_if(s,UTF)) {
9022 const char * const w = s;
9023 s += UTF ? UTF8SKIP(s) : 1;
9024 while (isWORDCHAR_lazy_if(s,UTF))
9025 s += UTF ? UTF8SKIP(s) : 1;
9026 while (s < PL_bufend && isSPACE(*s))
9030 if (keyword(w, s - w, 0))
9033 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9034 if (gv && GvCVu(gv))
9036 Perl_croak(aTHX_ "No comma allowed after %s", what);
9041 /* Either returns sv, or mortalizes/frees sv and returns a new SV*.
9042 Best used as sv=new_constant(..., sv, ...).
9043 If s, pv are NULL, calls subroutine with one argument,
9044 and <type> is used with error messages only.
9045 <type> is assumed to be well formed UTF-8 */
9048 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9049 SV *sv, SV *pv, const char *type, STRLEN typelen)
9052 HV * table = GvHV(PL_hintgv); /* ^H */
9057 const char *why1 = "", *why2 = "", *why3 = "";
9059 PERL_ARGS_ASSERT_NEW_CONSTANT;
9060 /* We assume that this is true: */
9061 if (*key == 'c') { assert (strEQ(key, "charnames")); }
9064 /* charnames doesn't work well if there have been errors found */
9065 if (PL_error_count > 0 && *key == 'c')
9067 SvREFCNT_dec_NN(sv);
9068 return &PL_sv_undef;
9071 sv_2mortal(sv); /* Parent created it permanently */
9073 || ! (PL_hints & HINT_LOCALIZE_HH)
9074 || ! (cvp = hv_fetch(table, key, keylen, FALSE))
9079 /* Here haven't found what we're looking for. If it is charnames,
9080 * perhaps it needs to be loaded. Try doing that before giving up */
9082 Perl_load_module(aTHX_
9084 newSVpvs("_charnames"),
9085 /* version parameter; no need to specify it, as if
9086 * we get too early a version, will fail anyway,
9087 * not being able to find '_charnames' */
9093 table = GvHV(PL_hintgv);
9095 && (PL_hints & HINT_LOCALIZE_HH)
9096 && (cvp = hv_fetch(table, key, keylen, FALSE))
9102 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9103 msg = Perl_form(aTHX_
9104 "Constant(%.*s) unknown",
9105 (int)(type ? typelen : len),
9111 why3 = "} is not defined";
9114 msg = Perl_form(aTHX_
9115 /* The +3 is for '\N{'; -4 for that, plus '}' */
9116 "Unknown charname '%.*s'", (int)typelen - 4, type + 3
9120 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
9121 (int)(type ? typelen : len),
9122 (type ? type: s), why1, why2, why3);
9125 yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9126 return SvREFCNT_inc_simple_NN(sv);
9131 pv = newSVpvn_flags(s, len, SVs_TEMP);
9133 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9135 typesv = &PL_sv_undef;
9137 PUSHSTACKi(PERLSI_OVERLOAD);
9149 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9153 /* Check the eval first */
9154 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9156 const char * errstr;
9157 sv_catpvs(errsv, "Propagated");
9158 errstr = SvPV_const(errsv, errlen);
9159 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9161 res = SvREFCNT_inc_simple_NN(sv);
9165 SvREFCNT_inc_simple_void_NN(res);
9174 why1 = "Call to &{$^H{";
9176 why3 = "}} did not return a defined value";
9178 (void)sv_2mortal(sv);
9185 /* Returns a NUL terminated string, with the length of the string written to
9189 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9193 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9195 PERL_ARGS_ASSERT_SCAN_WORD;
9199 Perl_croak(aTHX_ "%s", ident_too_long);
9201 || (!UTF && isALPHANUMERIC_L1(*s))) /* UTF handled below */
9205 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
9210 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
9214 else if (UTF && UTF8_IS_START(*s) && isWORDCHAR_utf8((U8*)s)) {
9215 char *t = s + UTF8SKIP(s);
9217 while (UTF8_IS_CONTINUED(*t) && _is_utf8_mark((U8*)t))
9221 Perl_croak(aTHX_ "%s", ident_too_long);
9222 Copy(s, d, len, char);
9235 S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck_uni)
9238 char *bracket = NULL;
9241 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9243 PERL_ARGS_ASSERT_SCAN_IDENT;
9248 while (isDIGIT(*s)) {
9250 Perl_croak(aTHX_ "%s", ident_too_long);
9257 Perl_croak(aTHX_ "%s", ident_too_long);
9258 if (isWORDCHAR(*s)) /* UTF handled below */
9260 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
9265 else if (*s == ':' && s[1] == ':') {
9269 else if (UTF && UTF8_IS_START(*s) && isWORDCHAR_utf8((U8*)s)) {
9270 char *t = s + UTF8SKIP(s);
9271 while (UTF8_IS_CONTINUED(*t) && _is_utf8_mark((U8*)t))
9273 if (d + (t - s) > e)
9274 Perl_croak(aTHX_ "%s", ident_too_long);
9275 Copy(s, d, t - s, char);
9286 if (PL_lex_state != LEX_NORMAL)
9287 PL_lex_state = LEX_INTERPENDMAYBE;
9290 if (*s == '$' && s[1] &&
9291 (isWORDCHAR_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
9301 const STRLEN skip = UTF8SKIP(s);
9304 for ( i = 0; i < skip; i++ )
9312 if (*d == '^' && *s && isCONTROLVAR(*s)) {
9316 else if (ck_uni && !bracket)
9319 if (isSPACE(s[-1])) {
9321 const char ch = *s++;
9322 if (!SPACE_OR_TAB(ch)) {
9328 if (isIDFIRST_lazy_if(d,UTF)) {
9332 while ((end < send && isWORDCHAR_lazy_if(end,UTF)) || *end == ':') {
9333 end += UTF8SKIP(end);
9334 while (end < send && UTF8_IS_CONTINUED(*end) && _is_utf8_mark((U8*)end))
9335 end += UTF8SKIP(end);
9337 Copy(s, d, end - s, char);
9342 while ((isWORDCHAR(*s) || *s == ':') && d < e)
9345 Perl_croak(aTHX_ "%s", ident_too_long);
9348 while (s < send && SPACE_OR_TAB(*s))
9350 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9351 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9352 const char * const brack =
9354 ((*s == '[') ? "[...]" : "{...}");
9355 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9356 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9357 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9358 funny, dest, brack, funny, dest, brack);
9361 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9362 PL_lex_allbrackets++;
9366 /* Handle extended ${^Foo} variables
9367 * 1999-02-27 mjd-perl-patch@plover.com */
9368 else if (!isWORDCHAR(*d) && !isPRINT(*d) /* isCTRL(d) */
9372 while (isWORDCHAR(*s) && d < e) {
9376 Perl_croak(aTHX_ "%s", ident_too_long);
9381 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9382 PL_lex_state = LEX_INTERPEND;
9385 if (PL_lex_state == LEX_NORMAL) {
9386 if (ckWARN(WARN_AMBIGUOUS) &&
9387 (keyword(dest, d - dest, 0)
9388 || get_cvn_flags(dest, d - dest, UTF ? SVf_UTF8 : 0)))
9390 SV *tmp = newSVpvn_flags( dest, d - dest,
9391 SVs_TEMP | (UTF ? SVf_UTF8 : 0) );
9394 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9395 "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9396 funny, tmp, funny, tmp);
9401 s = bracket; /* let the parser handle it */
9405 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9406 PL_lex_state = LEX_INTERPEND;
9411 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
9413 /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
9414 * the parse starting at 's', based on the subset that are valid in this
9415 * context input to this routine in 'valid_flags'. Advances s. Returns
9416 * TRUE if the input should be treated as a valid flag, so the next char
9417 * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
9418 * first call on the current regex. This routine will set it to any
9419 * charset modifier found. The caller shouldn't change it. This way,
9420 * another charset modifier encountered in the parse can be detected as an
9421 * error, as we have decided to allow only one */
9424 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9426 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9427 if (isWORDCHAR_lazy_if(*s, UTF)) {
9428 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9429 UTF ? SVf_UTF8 : 0);
9431 /* Pretend that it worked, so will continue processing before
9440 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
9441 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
9442 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
9443 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
9444 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
9445 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9446 case LOCALE_PAT_MOD:
9448 goto multiple_charsets;
9450 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9453 case UNICODE_PAT_MOD:
9455 goto multiple_charsets;
9457 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9460 case ASCII_RESTRICT_PAT_MOD:
9462 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9466 /* Error if previous modifier wasn't an 'a', but if it was, see
9467 * if, and accept, a second occurrence (only) */
9469 || get_regex_charset(*pmfl)
9470 != REGEX_ASCII_RESTRICTED_CHARSET)
9472 goto multiple_charsets;
9474 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9478 case DEPENDS_PAT_MOD:
9480 goto multiple_charsets;
9482 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9491 if (*charset != c) {
9492 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9494 else if (c == 'a') {
9495 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9498 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9501 /* Pretend that it worked, so will continue processing before dieing */
9507 S_scan_pat(pTHX_ char *start, I32 type)
9511 char *s = scan_str(start,!!PL_madskills,FALSE, PL_reg_state.re_reparsing,
9512 TRUE /* look for escaped bracketed metas */ );
9513 const char * const valid_flags =
9514 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9515 char charset = '\0'; /* character set modifier */
9520 PERL_ARGS_ASSERT_SCAN_PAT;
9522 /* this was only needed for the initial scan_str; set it to false
9523 * so that any (?{}) code blocks etc are parsed normally */
9524 PL_reg_state.re_reparsing = FALSE;
9526 const char * const delimiter = skipspace(start);
9530 ? "Search pattern not terminated or ternary operator parsed as search pattern"
9531 : "Search pattern not terminated" ));
9534 pm = (PMOP*)newPMOP(type, 0);
9535 if (PL_multi_open == '?') {
9536 /* This is the only point in the code that sets PMf_ONCE: */
9537 pm->op_pmflags |= PMf_ONCE;
9539 /* Hence it's safe to do this bit of PMOP book-keeping here, which
9540 allows us to restrict the list needed by reset to just the ??
9542 assert(type != OP_TRANS);
9544 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9547 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9550 elements = mg->mg_len / sizeof(PMOP**);
9551 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9552 ((PMOP**)mg->mg_ptr) [elements++] = pm;
9553 mg->mg_len = elements * sizeof(PMOP**);
9554 PmopSTASH_set(pm,PL_curstash);
9561 /* if qr/...(?{..}).../, then need to parse the pattern within a new
9562 * anon CV. False positives like qr/[(?{]/ are harmless */
9564 if (type == OP_QR) {
9566 char *e, *p = SvPV(PL_lex_stuff, len);
9568 for (; p < e; p++) {
9569 if (p[0] == '(' && p[1] == '?'
9570 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9572 pm->op_pmflags |= PMf_HAS_CV;
9576 pm->op_pmflags |= PMf_IS_QR;
9579 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
9581 if (PL_madskills && modstart != s) {
9582 SV* tmptoken = newSVpvn(modstart, s - modstart);
9583 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9586 /* issue a warning if /c is specified,but /g is not */
9587 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9589 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9590 "Use of /c modifier is meaningless without /g" );
9593 PL_lex_op = (OP*)pm;
9594 pl_yylval.ival = OP_MATCH;
9599 S_scan_subst(pTHX_ char *start)
9606 char charset = '\0'; /* character set modifier */
9611 PERL_ARGS_ASSERT_SCAN_SUBST;
9613 pl_yylval.ival = OP_NULL;
9615 s = scan_str(start,!!PL_madskills,FALSE,FALSE,
9616 TRUE /* look for escaped bracketed metas */ );
9619 Perl_croak(aTHX_ "Substitution pattern not terminated");
9621 if (s[-1] == PL_multi_open)
9625 CURMAD('q', PL_thisopen);
9626 CURMAD('_', PL_thiswhite);
9627 CURMAD('E', PL_thisstuff);
9628 CURMAD('Q', PL_thisclose);
9629 PL_realtokenstart = s - SvPVX(PL_linestr);
9633 first_start = PL_multi_start;
9634 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
9637 SvREFCNT_dec(PL_lex_stuff);
9638 PL_lex_stuff = NULL;
9640 Perl_croak(aTHX_ "Substitution replacement not terminated");
9642 PL_multi_start = first_start; /* so whole substitution is taken together */
9644 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9648 CURMAD('z', PL_thisopen);
9649 CURMAD('R', PL_thisstuff);
9650 CURMAD('Z', PL_thisclose);
9656 if (*s == EXEC_PAT_MOD) {
9660 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9669 curmad('m', newSVpvn(modstart, s - modstart));
9670 append_madprops(PL_thismad, (OP*)pm, 0);
9674 if ((pm->op_pmflags & PMf_CONTINUE)) {
9675 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9679 SV * const repl = newSVpvs("");
9682 pm->op_pmflags |= PMf_EVAL;
9685 sv_catpvs(repl, "eval ");
9687 sv_catpvs(repl, "do ");
9689 sv_catpvs(repl, "{");
9690 sv_catsv(repl, PL_sublex_info.repl);
9691 sv_catpvs(repl, "}");
9693 SvREFCNT_dec(PL_sublex_info.repl);
9694 PL_sublex_info.repl = repl;
9697 PL_lex_op = (OP*)pm;
9698 pl_yylval.ival = OP_SUBST;
9703 S_scan_trans(pTHX_ char *start)
9711 bool nondestruct = 0;
9716 PERL_ARGS_ASSERT_SCAN_TRANS;
9718 pl_yylval.ival = OP_NULL;
9720 s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE);
9722 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9724 if (s[-1] == PL_multi_open)
9728 CURMAD('q', PL_thisopen);
9729 CURMAD('_', PL_thiswhite);
9730 CURMAD('E', PL_thisstuff);
9731 CURMAD('Q', PL_thisclose);
9732 PL_realtokenstart = s - SvPVX(PL_linestr);
9736 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
9739 SvREFCNT_dec(PL_lex_stuff);
9740 PL_lex_stuff = NULL;
9742 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9745 CURMAD('z', PL_thisopen);
9746 CURMAD('R', PL_thisstuff);
9747 CURMAD('Z', PL_thisclose);
9750 complement = del = squash = 0;
9757 complement = OPpTRANS_COMPLEMENT;
9760 del = OPpTRANS_DELETE;
9763 squash = OPpTRANS_SQUASH;
9775 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9776 o->op_private &= ~OPpTRANS_ALL;
9777 o->op_private |= del|squash|complement|
9778 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9779 (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF : 0);
9782 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9787 curmad('m', newSVpvn(modstart, s - modstart));
9788 append_madprops(PL_thismad, o, 0);
9797 Takes a pointer to the first < in <<FOO.
9798 Returns a pointer to the byte following <<FOO.
9800 This function scans a heredoc, which involves different methods
9801 depending on whether we are in a string eval, quoted construct, etc.
9802 This is because PL_linestr could containing a single line of input, or
9803 a whole string being evalled, or the contents of the current quote-
9806 The two basic methods are:
9807 - Steal lines from the input stream
9808 - Scan the heredoc in PL_linestr and remove it therefrom
9810 In a file scope or filtered eval, the first method is used; in a
9811 string eval, the second.
9813 In a quote-like operator, we have to choose between the two,
9814 depending on where we can find a newline. We peek into outer lex-
9815 ing scopes until we find one with a newline in it. If we reach the
9816 outermost lexing scope and it is a file, we use the stream method.
9817 Otherwise it is treated as an eval.
9821 S_scan_heredoc(pTHX_ char *s)
9824 I32 op_type = OP_SCALAR;
9831 const bool infile = PL_rsfp || PL_parser->filtered;
9832 LEXSHARED *shared = PL_parser->lex_shared;
9834 I32 stuffstart = s - SvPVX(PL_linestr);
9837 PL_realtokenstart = -1;
9840 PERL_ARGS_ASSERT_SCAN_HEREDOC;
9843 d = PL_tokenbuf + 1;
9844 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9845 *PL_tokenbuf = '\n';
9847 while (SPACE_OR_TAB(*peek))
9849 if (*peek == '`' || *peek == '\'' || *peek =='"') {
9852 s = delimcpy(d, e, s, PL_bufend, term, &len);
9854 Perl_croak(aTHX_ "Unterminated delimiter for here document");
9860 /* <<\FOO is equivalent to <<'FOO' */
9864 if (!isWORDCHAR_lazy_if(s,UTF))
9865 deprecate("bare << to mean <<\"\"");
9866 for (; isWORDCHAR_lazy_if(s,UTF); s++) {
9871 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9872 Perl_croak(aTHX_ "Delimiter for here document is too long");
9875 len = d - PL_tokenbuf;
9879 tstart = PL_tokenbuf + 1;
9880 PL_thisclose = newSVpvn(tstart, len - 1);
9881 tstart = SvPVX(PL_linestr) + stuffstart;
9882 PL_thisopen = newSVpvn(tstart, s - tstart);
9883 stuffstart = s - SvPVX(PL_linestr);
9886 #ifndef PERL_STRICT_CR
9887 d = strchr(s, '\r');
9889 char * const olds = s;
9891 while (s < PL_bufend) {
9897 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9906 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9912 tstart = SvPVX(PL_linestr) + stuffstart;
9914 sv_catpvn(PL_thisstuff, tstart, s - tstart);
9916 PL_thisstuff = newSVpvn(tstart, s - tstart);
9919 stuffstart = s - SvPVX(PL_linestr);
9922 tmpstr = newSV_type(SVt_PVIV);
9926 SvIV_set(tmpstr, -1);
9928 else if (term == '`') {
9929 op_type = OP_BACKTICK;
9930 SvIV_set(tmpstr, '\\');
9933 PL_multi_start = CopLINE(PL_curcop) + 1;
9934 PL_multi_open = PL_multi_close = '<';
9935 /* inside a string eval or quote-like operator */
9936 if (!infile || PL_lex_inwhat) {
9939 char * const olds = s;
9940 PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
9941 /* These two fields are not set until an inner lexing scope is
9942 entered. But we need them set here. */
9943 shared->ls_bufptr = s;
9944 shared->ls_linestr = PL_linestr;
9946 /* Look for a newline. If the current buffer does not have one,
9947 peek into the line buffer of the parent lexing scope, going
9948 up as many levels as necessary to find one with a newline
9951 while (!(s = (char *)memchr(
9952 (void *)shared->ls_bufptr, '\n',
9953 SvEND(shared->ls_linestr)-shared->ls_bufptr
9955 shared = shared->ls_prev;
9956 /* shared is only null if we have gone beyond the outermost
9957 lexing scope. In a file, we will have broken out of the
9958 loop in the previous iteration. In an eval, the string buf-
9959 fer ends with "\n;", so the while condition below will have
9960 evaluated to false. So shared can never be null. */
9962 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
9963 most lexing scope. In a file, shared->ls_linestr at that
9964 level is just one line, so there is no body to steal. */
9965 if (infile && !shared->ls_prev) {
9971 s = (char*)memchr((void*)s, '\n', PL_bufend - s);
9974 linestr = shared->ls_linestr;
9975 bufend = SvEND(linestr);
9977 while (s < bufend &&
9978 (*s != '\n' || memNE(s,PL_tokenbuf,len)) ) {
9980 ++shared->herelines;
9985 sv_setpvn(tmpstr,d+1,s-d);
9989 sv_catpvn(PL_thisstuff, d + 1, s - d);
9991 PL_thisstuff = newSVpvn(d + 1, s - d);
9992 stuffstart = s - SvPVX(PL_linestr);
9996 /* the preceding stmt passes a newline */
9997 shared->herelines++;
9999 /* s now points to the newline after the heredoc terminator.
10000 d points to the newline before the body of the heredoc.
10003 /* We are going to modify linestr in place here, so set
10004 aside copies of the string if necessary for re-evals or
10006 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10007 check shared->re_eval_str. */
10008 if (shared->re_eval_start || shared->re_eval_str) {
10009 /* Set aside the rest of the regexp */
10010 if (!shared->re_eval_str)
10011 shared->re_eval_str =
10012 newSVpvn(shared->re_eval_start,
10013 bufend - shared->re_eval_start);
10014 shared->re_eval_start -= s-d;
10016 if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
10017 CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
10018 cx->blk_eval.cur_text == linestr)
10020 cx->blk_eval.cur_text = newSVsv(linestr);
10021 SvSCREAM_on(cx->blk_eval.cur_text);
10023 /* Copy everything from s onwards back to d. */
10024 Move(s,d,bufend-s + 1,char);
10025 SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10026 /* Setting PL_bufend only applies when we have not dug deeper
10027 into other scopes, because sublex_done sets PL_bufend to
10028 SvEND(PL_linestr). */
10029 if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
10036 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
10037 term = PL_tokenbuf[1];
10039 linestr_save = PL_linestr; /* must restore this afterwards */
10040 d = s; /* and this */
10041 PL_linestr = newSVpvs("");
10042 PL_bufend = SvPVX(PL_linestr);
10045 if (PL_madskills) {
10046 tstart = SvPVX(PL_linestr) + stuffstart;
10048 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10050 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10053 PL_bufptr = PL_bufend;
10054 CopLINE_set(PL_curcop,
10055 PL_multi_start + shared->herelines);
10056 if (!lex_next_chunk(LEX_NO_TERM)
10057 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
10058 SvREFCNT_dec(linestr_save);
10061 CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
10062 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10063 lex_grow_linestr(SvCUR(PL_linestr) + 2);
10064 sv_catpvs(PL_linestr, "\n\0");
10068 stuffstart = s - SvPVX(PL_linestr);
10070 shared->herelines++;
10071 PL_last_lop = PL_last_uni = NULL;
10072 #ifndef PERL_STRICT_CR
10073 if (PL_bufend - PL_linestart >= 2) {
10074 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
10075 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10077 PL_bufend[-2] = '\n';
10079 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10081 else if (PL_bufend[-1] == '\r')
10082 PL_bufend[-1] = '\n';
10084 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10085 PL_bufend[-1] = '\n';
10087 if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
10088 SvREFCNT_dec(PL_linestr);
10089 PL_linestr = linestr_save;
10090 PL_linestart = SvPVX(linestr_save);
10091 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10096 sv_catsv(tmpstr,PL_linestr);
10100 PL_multi_end = CopLINE(PL_curcop);
10101 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10102 SvPV_shrink_to_cur(tmpstr);
10105 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10107 else if (PL_encoding)
10108 sv_recode_to_utf8(tmpstr, PL_encoding);
10110 PL_lex_stuff = tmpstr;
10111 pl_yylval.ival = op_type;
10115 SvREFCNT_dec(tmpstr);
10116 CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
10117 missingterm(PL_tokenbuf + 1);
10120 /* scan_inputsymbol
10121 takes: current position in input buffer
10122 returns: new position in input buffer
10123 side-effects: pl_yylval and lex_op are set.
10128 <FH> read from filehandle
10129 <pkg::FH> read from package qualified filehandle
10130 <pkg'FH> read from package qualified filehandle
10131 <$fh> read from filehandle in $fh
10132 <*.h> filename glob
10137 S_scan_inputsymbol(pTHX_ char *start)
10140 char *s = start; /* current position in buffer */
10143 char *d = PL_tokenbuf; /* start of temp holding space */
10144 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
10146 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10148 end = strchr(s, '\n');
10151 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
10153 /* die if we didn't have space for the contents of the <>,
10154 or if it didn't end, or if we see a newline
10157 if (len >= (I32)sizeof PL_tokenbuf)
10158 Perl_croak(aTHX_ "Excessively long <> operator");
10160 Perl_croak(aTHX_ "Unterminated <> operator");
10165 Remember, only scalar variables are interpreted as filehandles by
10166 this code. Anything more complex (e.g., <$fh{$num}>) will be
10167 treated as a glob() call.
10168 This code makes use of the fact that except for the $ at the front,
10169 a scalar variable and a filehandle look the same.
10171 if (*d == '$' && d[1]) d++;
10173 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10174 while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
10175 d += UTF ? UTF8SKIP(d) : 1;
10177 /* If we've tried to read what we allow filehandles to look like, and
10178 there's still text left, then it must be a glob() and not a getline.
10179 Use scan_str to pull out the stuff between the <> and treat it
10180 as nothing more than a string.
10183 if (d - PL_tokenbuf != len) {
10184 pl_yylval.ival = OP_GLOB;
10185 s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE);
10187 Perl_croak(aTHX_ "Glob not terminated");
10191 bool readline_overriden = FALSE;
10194 /* we're in a filehandle read situation */
10197 /* turn <> into <ARGV> */
10199 Copy("ARGV",d,5,char);
10201 /* Check whether readline() is overriden */
10202 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
10204 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
10206 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
10207 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
10208 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
10209 readline_overriden = TRUE;
10211 /* if <$fh>, create the ops to turn the variable into a
10215 /* try to find it in the pad for this block, otherwise find
10216 add symbol table ops
10218 const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
10219 if (tmp != NOT_IN_PAD) {
10220 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10221 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10222 HEK * const stashname = HvNAME_HEK(stash);
10223 SV * const sym = sv_2mortal(newSVhek(stashname));
10224 sv_catpvs(sym, "::");
10225 sv_catpv(sym, d+1);
10230 OP * const o = newOP(OP_PADSV, 0);
10232 PL_lex_op = readline_overriden
10233 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10234 op_append_elem(OP_LIST, o,
10235 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10236 : (OP*)newUNOP(OP_READLINE, 0, o);
10245 ? (GV_ADDMULTI | GV_ADDINEVAL)
10246 : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
10248 PL_lex_op = readline_overriden
10249 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10250 op_append_elem(OP_LIST,
10251 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10252 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10253 : (OP*)newUNOP(OP_READLINE, 0,
10254 newUNOP(OP_RV2SV, 0,
10255 newGVOP(OP_GV, 0, gv)));
10257 if (!readline_overriden)
10258 PL_lex_op->op_flags |= OPf_SPECIAL;
10259 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10260 pl_yylval.ival = OP_NULL;
10263 /* If it's none of the above, it must be a literal filehandle
10264 (<Foo::BAR> or <FOO>) so build a simple readline OP */
10266 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
10267 PL_lex_op = readline_overriden
10268 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10269 op_append_elem(OP_LIST,
10270 newGVOP(OP_GV, 0, gv),
10271 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10272 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
10273 pl_yylval.ival = OP_NULL;
10282 takes: start position in buffer
10283 keep_quoted preserve \ on the embedded delimiter(s)
10284 keep_delims preserve the delimiters around the string
10285 re_reparse compiling a run-time /(?{})/:
10286 collapse // to /, and skip encoding src
10287 returns: position to continue reading from buffer
10288 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10289 updates the read buffer.
10291 This subroutine pulls a string out of the input. It is called for:
10292 q single quotes q(literal text)
10293 ' single quotes 'literal text'
10294 qq double quotes qq(interpolate $here please)
10295 " double quotes "interpolate $here please"
10296 qx backticks qx(/bin/ls -l)
10297 ` backticks `/bin/ls -l`
10298 qw quote words @EXPORT_OK = qw( func() $spam )
10299 m// regexp match m/this/
10300 s/// regexp substitute s/this/that/
10301 tr/// string transliterate tr/this/that/
10302 y/// string transliterate y/this/that/
10303 ($*@) sub prototypes sub foo ($)
10304 (stuff) sub attr parameters sub foo : attr(stuff)
10305 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
10307 In most of these cases (all but <>, patterns and transliterate)
10308 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
10309 calls scan_str(). s/// makes yylex() call scan_subst() which calls
10310 scan_str(). tr/// and y/// make yylex() call scan_trans() which
10313 It skips whitespace before the string starts, and treats the first
10314 character as the delimiter. If the delimiter is one of ([{< then
10315 the corresponding "close" character )]}> is used as the closing
10316 delimiter. It allows quoting of delimiters, and if the string has
10317 balanced delimiters ([{<>}]) it allows nesting.
10319 On success, the SV with the resulting string is put into lex_stuff or,
10320 if that is already non-NULL, into lex_repl. The second case occurs only
10321 when parsing the RHS of the special constructs s/// and tr/// (y///).
10322 For convenience, the terminating delimiter character is stuffed into
10327 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
10328 bool deprecate_escaped_meta /* Should we issue a deprecation warning
10329 for certain paired metacharacters that
10330 appear escaped within it */
10334 SV *sv; /* scalar value: string */
10335 const char *tmps; /* temp string, used for delimiter matching */
10336 char *s = start; /* current position in the buffer */
10337 char term; /* terminating character */
10338 char *to; /* current position in the sv's data */
10339 I32 brackets = 1; /* bracket nesting level */
10340 bool has_utf8 = FALSE; /* is there any utf8 content? */
10341 I32 termcode; /* terminating char. code */
10342 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
10343 STRLEN termlen; /* length of terminating string */
10344 int last_off = 0; /* last position for nesting bracket */
10345 char *escaped_open = NULL;
10351 PERL_ARGS_ASSERT_SCAN_STR;
10353 /* skip space before the delimiter */
10359 if (PL_realtokenstart >= 0) {
10360 stuffstart = PL_realtokenstart;
10361 PL_realtokenstart = -1;
10364 stuffstart = start - SvPVX(PL_linestr);
10366 /* mark where we are, in case we need to report errors */
10369 /* after skipping whitespace, the next character is the terminator */
10372 termcode = termstr[0] = term;
10376 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10377 Copy(s, termstr, termlen, U8);
10378 if (!UTF8_IS_INVARIANT(term))
10382 /* mark where we are */
10383 PL_multi_start = CopLINE(PL_curcop);
10384 PL_multi_open = term;
10386 /* find corresponding closing delimiter */
10387 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10388 termcode = termstr[0] = term = tmps[5];
10390 PL_multi_close = term;
10392 /* A warning is raised if the input parameter requires it for escaped (by a
10393 * backslash) paired metacharacters {} [] and () when the delimiters are
10394 * those same characters, and the backslash is ineffective. This doesn't
10395 * happen for <>, as they aren't metas. */
10396 if (deprecate_escaped_meta
10397 && (PL_multi_open == PL_multi_close
10398 || ! ckWARN_d(WARN_DEPRECATED)
10399 || PL_multi_open == '<'))
10401 deprecate_escaped_meta = FALSE;
10404 /* create a new SV to hold the contents. 79 is the SV's initial length.
10405 What a random number. */
10406 sv = newSV_type(SVt_PVIV);
10408 SvIV_set(sv, termcode);
10409 (void)SvPOK_only(sv); /* validate pointer */
10411 /* move past delimiter and try to read a complete string */
10413 sv_catpvn(sv, s, termlen);
10416 tstart = SvPVX(PL_linestr) + stuffstart;
10417 if (PL_madskills && !PL_thisopen && !keep_delims) {
10418 PL_thisopen = newSVpvn(tstart, s - tstart);
10419 stuffstart = s - SvPVX(PL_linestr);
10423 if (PL_encoding && !UTF && !re_reparse) {
10427 int offset = s - SvPVX_const(PL_linestr);
10428 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
10429 &offset, (char*)termstr, termlen);
10430 const char * const ns = SvPVX_const(PL_linestr) + offset;
10431 char * const svlast = SvEND(sv) - 1;
10433 for (; s < ns; s++) {
10434 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10435 COPLINE_INC_WITH_HERELINES;
10438 goto read_more_line;
10440 /* handle quoted delimiters */
10441 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10443 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10445 if ((svlast-1 - t) % 2) {
10446 if (!keep_quoted) {
10447 *(svlast-1) = term;
10449 SvCUR_set(sv, SvCUR(sv) - 1);
10454 if (PL_multi_open == PL_multi_close) {
10460 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
10461 /* At here, all closes are "was quoted" one,
10462 so we don't check PL_multi_close. */
10464 if (!keep_quoted && *(t+1) == PL_multi_open)
10469 else if (*t == PL_multi_open)
10477 SvCUR_set(sv, w - SvPVX_const(sv));
10479 last_off = w - SvPVX(sv);
10480 if (--brackets <= 0)
10485 if (!keep_delims) {
10486 SvCUR_set(sv, SvCUR(sv) - 1);
10492 /* extend sv if need be */
10493 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10494 /* set 'to' to the next character in the sv's string */
10495 to = SvPVX(sv)+SvCUR(sv);
10497 /* if open delimiter is the close delimiter read unbridle */
10498 if (PL_multi_open == PL_multi_close) {
10499 for (; s < PL_bufend; s++,to++) {
10500 /* embedded newlines increment the current line number */
10501 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10502 COPLINE_INC_WITH_HERELINES;
10503 /* handle quoted delimiters */
10504 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10507 || (re_reparse && s[1] == '\\'))
10510 /* any other quotes are simply copied straight through */
10514 /* terminate when run out of buffer (the for() condition), or
10515 have found the terminator */
10516 else if (*s == term) {
10519 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10522 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10528 /* if the terminator isn't the same as the start character (e.g.,
10529 matched brackets), we have to allow more in the quoting, and
10530 be prepared for nested brackets.
10533 /* read until we run out of string, or we find the terminator */
10534 for (; s < PL_bufend; s++,to++) {
10535 /* embedded newlines increment the line count */
10536 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10537 COPLINE_INC_WITH_HERELINES;
10538 /* backslashes can escape the open or closing characters */
10539 if (*s == '\\' && s+1 < PL_bufend) {
10540 if (!keep_quoted &&
10541 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10545 /* Here, 'deprecate_escaped_meta' is true iff the
10546 * delimiters are paired metacharacters, and 's' points
10547 * to an occurrence of one of them within the string,
10548 * which was preceded by a backslash. If this is a
10549 * context where the delimiter is also a metacharacter,
10550 * the backslash is useless, and deprecated. () and []
10551 * are meta in any context. {} are meta only when
10552 * appearing in a quantifier or in things like '\p{'.
10553 * They also aren't meta unless there is a matching
10554 * closed, escaped char later on within the string.
10555 * If 's' points to an open, set a flag; if to a close,
10556 * test that flag, and raise a warning if it was set */
10558 if (deprecate_escaped_meta) {
10559 if (*s == PL_multi_open) {
10563 else if (regcurly(s,
10564 TRUE /* Look for a closing
10566 || (s - start > 2 /* Look for e.g.
10568 && _generic_isCC(*(s-2), _CC_BACKSLASH_FOO_LBRACE_IS_META)))
10573 else if (escaped_open) {
10574 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
10575 "Useless use of '\\'; doesn't escape metacharacter '%c'", PL_multi_open);
10576 escaped_open = NULL;
10583 /* allow nested opens and closes */
10584 else if (*s == PL_multi_close && --brackets <= 0)
10586 else if (*s == PL_multi_open)
10588 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10593 /* terminate the copied string and update the sv's end-of-string */
10595 SvCUR_set(sv, to - SvPVX_const(sv));
10598 * this next chunk reads more into the buffer if we're not done yet
10602 break; /* handle case where we are done yet :-) */
10604 #ifndef PERL_STRICT_CR
10605 if (to - SvPVX_const(sv) >= 2) {
10606 if ((to[-2] == '\r' && to[-1] == '\n') ||
10607 (to[-2] == '\n' && to[-1] == '\r'))
10611 SvCUR_set(sv, to - SvPVX_const(sv));
10613 else if (to[-1] == '\r')
10616 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10621 /* if we're out of file, or a read fails, bail and reset the current
10622 line marker so we can report where the unterminated string began
10625 if (PL_madskills) {
10626 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10628 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10630 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10633 COPLINE_INC_WITH_HERELINES;
10634 PL_bufptr = PL_bufend;
10635 if (!lex_next_chunk(0)) {
10637 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10646 /* at this point, we have successfully read the delimited string */
10648 if (!PL_encoding || UTF || re_reparse) {
10650 if (PL_madskills) {
10651 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10652 const int len = s - tstart;
10654 sv_catpvn(PL_thisstuff, tstart, len);
10656 PL_thisstuff = newSVpvn(tstart, len);
10657 if (!PL_thisclose && !keep_delims)
10658 PL_thisclose = newSVpvn(s,termlen);
10663 sv_catpvn(sv, s, termlen);
10668 if (PL_madskills) {
10669 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10670 const int len = s - tstart - termlen;
10672 sv_catpvn(PL_thisstuff, tstart, len);
10674 PL_thisstuff = newSVpvn(tstart, len);
10675 if (!PL_thisclose && !keep_delims)
10676 PL_thisclose = newSVpvn(s - termlen,termlen);
10680 if (has_utf8 || (PL_encoding && !re_reparse))
10683 PL_multi_end = CopLINE(PL_curcop);
10685 /* if we allocated too much space, give some back */
10686 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10687 SvLEN_set(sv, SvCUR(sv) + 1);
10688 SvPV_renew(sv, SvLEN(sv));
10691 /* decide whether this is the first or second quoted string we've read
10696 PL_sublex_info.repl = sv;
10704 takes: pointer to position in buffer
10705 returns: pointer to new position in buffer
10706 side-effects: builds ops for the constant in pl_yylval.op
10708 Read a number in any of the formats that Perl accepts:
10710 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10711 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
10714 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10716 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10719 If it reads a number without a decimal point or an exponent, it will
10720 try converting the number to an integer and see if it can do so
10721 without loss of precision.
10725 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10728 const char *s = start; /* current position in buffer */
10729 char *d; /* destination in temp buffer */
10730 char *e; /* end of temp buffer */
10731 NV nv; /* number read, as a double */
10732 SV *sv = NULL; /* place to put the converted number */
10733 bool floatit; /* boolean: int or float? */
10734 const char *lastub = NULL; /* position of last underbar */
10735 static const char* const number_too_long = "Number too long";
10737 PERL_ARGS_ASSERT_SCAN_NUM;
10739 /* We use the first character to decide what type of number this is */
10743 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
10745 /* if it starts with a 0, it could be an octal number, a decimal in
10746 0.13 disguise, or a hexadecimal number, or a binary number. */
10750 u holds the "number so far"
10751 shift the power of 2 of the base
10752 (hex == 4, octal == 3, binary == 1)
10753 overflowed was the number more than we can hold?
10755 Shift is used when we add a digit. It also serves as an "are
10756 we in octal/hex/binary?" indicator to disallow hex characters
10757 when in octal mode.
10762 bool overflowed = FALSE;
10763 bool just_zero = TRUE; /* just plain 0 or binary number? */
10764 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10765 static const char* const bases[5] =
10766 { "", "binary", "", "octal", "hexadecimal" };
10767 static const char* const Bases[5] =
10768 { "", "Binary", "", "Octal", "Hexadecimal" };
10769 static const char* const maxima[5] =
10771 "0b11111111111111111111111111111111",
10775 const char *base, *Base, *max;
10777 /* check for hex */
10778 if (s[1] == 'x' || s[1] == 'X') {
10782 } else if (s[1] == 'b' || s[1] == 'B') {
10787 /* check for a decimal in disguise */
10788 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10790 /* so it must be octal */
10797 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10798 "Misplaced _ in number");
10802 base = bases[shift];
10803 Base = Bases[shift];
10804 max = maxima[shift];
10806 /* read the rest of the number */
10808 /* x is used in the overflow test,
10809 b is the digit we're adding on. */
10814 /* if we don't mention it, we're done */
10818 /* _ are ignored -- but warned about if consecutive */
10820 if (lastub && s == lastub + 1)
10821 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10822 "Misplaced _ in number");
10826 /* 8 and 9 are not octal */
10827 case '8': case '9':
10829 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10833 case '2': case '3': case '4':
10834 case '5': case '6': case '7':
10836 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10839 case '0': case '1':
10840 b = *s++ & 15; /* ASCII digit -> value of digit */
10844 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10845 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10846 /* make sure they said 0x */
10849 b = (*s++ & 7) + 9;
10851 /* Prepare to put the digit we have onto the end
10852 of the number so far. We check for overflows.
10858 x = u << shift; /* make room for the digit */
10860 if ((x >> shift) != u
10861 && !(PL_hints & HINT_NEW_BINARY)) {
10864 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10865 "Integer overflow in %s number",
10868 u = x | b; /* add the digit to the end */
10871 n *= nvshift[shift];
10872 /* If an NV has not enough bits in its
10873 * mantissa to represent an UV this summing of
10874 * small low-order numbers is a waste of time
10875 * (because the NV cannot preserve the
10876 * low-order bits anyway): we could just
10877 * remember when did we overflow and in the
10878 * end just multiply n by the right
10886 /* if we get here, we had success: make a scalar value from
10891 /* final misplaced underbar check */
10892 if (s[-1] == '_') {
10893 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10897 if (n > 4294967295.0)
10898 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10899 "%s number > %s non-portable",
10905 if (u > 0xffffffff)
10906 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10907 "%s number > %s non-portable",
10912 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10913 sv = new_constant(start, s - start, "integer",
10914 sv, NULL, NULL, 0);
10915 else if (PL_hints & HINT_NEW_BINARY)
10916 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10921 handle decimal numbers.
10922 we're also sent here when we read a 0 as the first digit
10924 case '1': case '2': case '3': case '4': case '5':
10925 case '6': case '7': case '8': case '9': case '.':
10928 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10931 /* read next group of digits and _ and copy into d */
10932 while (isDIGIT(*s) || *s == '_') {
10933 /* skip underscores, checking for misplaced ones
10937 if (lastub && s == lastub + 1)
10938 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10939 "Misplaced _ in number");
10943 /* check for end of fixed-length buffer */
10945 Perl_croak(aTHX_ "%s", number_too_long);
10946 /* if we're ok, copy the character */
10951 /* final misplaced underbar check */
10952 if (lastub && s == lastub + 1) {
10953 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10956 /* read a decimal portion if there is one. avoid
10957 3..5 being interpreted as the number 3. followed
10960 if (*s == '.' && s[1] != '.') {
10965 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10966 "Misplaced _ in number");
10970 /* copy, ignoring underbars, until we run out of digits.
10972 for (; isDIGIT(*s) || *s == '_'; s++) {
10973 /* fixed length buffer check */
10975 Perl_croak(aTHX_ "%s", number_too_long);
10977 if (lastub && s == lastub + 1)
10978 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10979 "Misplaced _ in number");
10985 /* fractional part ending in underbar? */
10986 if (s[-1] == '_') {
10987 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10988 "Misplaced _ in number");
10990 if (*s == '.' && isDIGIT(s[1])) {
10991 /* oops, it's really a v-string, but without the "v" */
10997 /* read exponent part, if present */
10998 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
11002 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
11003 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
11005 /* stray preinitial _ */
11007 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11008 "Misplaced _ in number");
11012 /* allow positive or negative exponent */
11013 if (*s == '+' || *s == '-')
11016 /* stray initial _ */
11018 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11019 "Misplaced _ in number");
11023 /* read digits of exponent */
11024 while (isDIGIT(*s) || *s == '_') {
11027 Perl_croak(aTHX_ "%s", number_too_long);
11031 if (((lastub && s == lastub + 1) ||
11032 (!isDIGIT(s[1]) && s[1] != '_')))
11033 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11034 "Misplaced _ in number");
11042 We try to do an integer conversion first if no characters
11043 indicating "float" have been found.
11048 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
11050 if (flags == IS_NUMBER_IN_UV) {
11052 sv = newSViv(uv); /* Prefer IVs over UVs. */
11055 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11056 if (uv <= (UV) IV_MIN)
11057 sv = newSViv(-(IV)uv);
11064 /* terminate the string */
11066 nv = Atof(PL_tokenbuf);
11071 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
11072 const char *const key = floatit ? "float" : "integer";
11073 const STRLEN keylen = floatit ? 5 : 7;
11074 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
11075 key, keylen, sv, NULL, NULL, 0);
11079 /* if it starts with a v, it could be a v-string */
11082 sv = newSV(5); /* preallocate storage space */
11083 ENTER_with_name("scan_vstring");
11085 s = scan_vstring(s, PL_bufend, sv);
11086 SvREFCNT_inc_simple_void_NN(sv);
11087 LEAVE_with_name("scan_vstring");
11091 /* make the op for the constant and return */
11094 lvalp->opval = newSVOP(OP_CONST, 0, sv);
11096 lvalp->opval = NULL;
11102 S_scan_formline(pTHX_ char *s)
11107 SV * const stuff = newSVpvs("");
11108 bool needargs = FALSE;
11109 bool eofmt = FALSE;
11111 char *tokenstart = s;
11112 SV* savewhite = NULL;
11114 if (PL_madskills) {
11115 savewhite = PL_thiswhite;
11120 PERL_ARGS_ASSERT_SCAN_FORMLINE;
11122 while (!needargs) {
11125 #ifdef PERL_STRICT_CR
11126 while (SPACE_OR_TAB(*t))
11129 while (SPACE_OR_TAB(*t) || *t == '\r')
11132 if (*t == '\n' || t == PL_bufend) {
11137 eol = (char *) memchr(s,'\n',PL_bufend-s);
11141 for (t = s; t < eol; t++) {
11142 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
11144 goto enough; /* ~~ must be first line in formline */
11146 if (*t == '@' || *t == '^')
11150 sv_catpvn(stuff, s, eol-s);
11151 #ifndef PERL_STRICT_CR
11152 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
11153 char *end = SvPVX(stuff) + SvCUR(stuff);
11156 SvCUR_set(stuff, SvCUR(stuff) - 1);
11164 if ((PL_rsfp || PL_parser->filtered)
11165 && PL_parser->form_lex_state == LEX_NORMAL) {
11168 if (PL_madskills) {
11170 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
11172 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
11175 PL_bufptr = PL_bufend;
11176 COPLINE_INC_WITH_HERELINES;
11177 got_some = lex_next_chunk(0);
11178 CopLINE_dec(PL_curcop);
11181 tokenstart = PL_bufptr;
11189 if (!SvCUR(stuff) || needargs)
11190 PL_lex_state = PL_parser->form_lex_state;
11191 if (SvCUR(stuff)) {
11192 PL_expect = XSTATE;
11194 start_force(PL_curforce);
11195 NEXTVAL_NEXTTOKE.ival = 0;
11196 force_next(FORMLBRACK);
11199 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
11201 else if (PL_encoding)
11202 sv_recode_to_utf8(stuff, PL_encoding);
11204 start_force(PL_curforce);
11205 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
11209 SvREFCNT_dec(stuff);
11211 PL_lex_formbrack = 0;
11214 if (PL_madskills) {
11216 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
11218 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
11219 PL_thiswhite = savewhite;
11226 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
11229 const I32 oldsavestack_ix = PL_savestack_ix;
11230 CV* const outsidecv = PL_compcv;
11232 SAVEI32(PL_subline);
11233 save_item(PL_subname);
11234 SAVESPTR(PL_compcv);
11236 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
11237 CvFLAGS(PL_compcv) |= flags;
11239 PL_subline = CopLINE(PL_curcop);
11240 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
11241 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
11242 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
11243 if (outsidecv && CvPADLIST(outsidecv))
11244 CvPADLIST(PL_compcv)->xpadl_outid =
11245 PadlistNAMES(CvPADLIST(outsidecv));
11247 return oldsavestack_ix;
11251 #pragma segment Perl_yylex
11254 S_yywarn(pTHX_ const char *const s, U32 flags)
11258 PERL_ARGS_ASSERT_YYWARN;
11260 PL_in_eval |= EVAL_WARNONLY;
11261 yyerror_pv(s, flags);
11262 PL_in_eval &= ~EVAL_WARNONLY;
11267 Perl_yyerror(pTHX_ const char *const s)
11269 PERL_ARGS_ASSERT_YYERROR;
11270 return yyerror_pvn(s, strlen(s), 0);
11274 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11276 PERL_ARGS_ASSERT_YYERROR_PV;
11277 return yyerror_pvn(s, strlen(s), flags);
11281 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11284 const char *context = NULL;
11287 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
11288 int yychar = PL_parser->yychar;
11290 PERL_ARGS_ASSERT_YYERROR_PVN;
11292 if (!yychar || (yychar == ';' && !PL_rsfp))
11293 sv_catpvs(where_sv, "at EOF");
11294 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
11295 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
11296 PL_oldbufptr != PL_bufptr) {
11299 The code below is removed for NetWare because it abends/crashes on NetWare
11300 when the script has error such as not having the closing quotes like:
11301 if ($var eq "value)
11302 Checking of white spaces is anyway done in NetWare code.
11305 while (isSPACE(*PL_oldoldbufptr))
11308 context = PL_oldoldbufptr;
11309 contlen = PL_bufptr - PL_oldoldbufptr;
11311 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
11312 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
11315 The code below is removed for NetWare because it abends/crashes on NetWare
11316 when the script has error such as not having the closing quotes like:
11317 if ($var eq "value)
11318 Checking of white spaces is anyway done in NetWare code.
11321 while (isSPACE(*PL_oldbufptr))
11324 context = PL_oldbufptr;
11325 contlen = PL_bufptr - PL_oldbufptr;
11327 else if (yychar > 255)
11328 sv_catpvs(where_sv, "next token ???");
11329 else if (yychar == -2) { /* YYEMPTY */
11330 if (PL_lex_state == LEX_NORMAL ||
11331 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
11332 sv_catpvs(where_sv, "at end of line");
11333 else if (PL_lex_inpat)
11334 sv_catpvs(where_sv, "within pattern");
11336 sv_catpvs(where_sv, "within string");
11339 sv_catpvs(where_sv, "next char ");
11341 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11342 else if (isPRINT_LC(yychar)) {
11343 const char string = yychar;
11344 sv_catpvn(where_sv, &string, 1);
11347 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11349 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
11350 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
11351 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
11353 Perl_sv_catpvf(aTHX_ msg, "near \"%"SVf"\"\n",
11354 SVfARG(newSVpvn_flags(context, contlen,
11355 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
11357 Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
11358 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11359 Perl_sv_catpvf(aTHX_ msg,
11360 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
11361 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11364 if (PL_in_eval & EVAL_WARNONLY) {
11365 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
11369 if (PL_error_count >= 10) {
11371 if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
11372 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11373 SVfARG(errsv), OutCopFILE(PL_curcop));
11375 Perl_croak(aTHX_ "%s has too many errors.\n",
11376 OutCopFILE(PL_curcop));
11379 PL_in_my_stash = NULL;
11383 #pragma segment Main
11387 S_swallow_bom(pTHX_ U8 *s)
11390 const STRLEN slen = SvCUR(PL_linestr);
11392 PERL_ARGS_ASSERT_SWALLOW_BOM;
11396 if (s[1] == 0xFE) {
11397 /* UTF-16 little-endian? (or UTF-32LE?) */
11398 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
11399 /* diag_listed_as: Unsupported script encoding %s */
11400 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11401 #ifndef PERL_NO_UTF16_FILTER
11402 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11404 if (PL_bufend > (char*)s) {
11405 s = add_utf16_textfilter(s, TRUE);
11408 /* diag_listed_as: Unsupported script encoding %s */
11409 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11414 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
11415 #ifndef PERL_NO_UTF16_FILTER
11416 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11418 if (PL_bufend > (char *)s) {
11419 s = add_utf16_textfilter(s, FALSE);
11422 /* diag_listed_as: Unsupported script encoding %s */
11423 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11428 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
11429 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11430 s += 3; /* UTF-8 */
11436 if (s[2] == 0xFE && s[3] == 0xFF) {
11437 /* UTF-32 big-endian */
11438 /* diag_listed_as: Unsupported script encoding %s */
11439 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11442 else if (s[2] == 0 && s[3] != 0) {
11445 * are a good indicator of UTF-16BE. */
11446 #ifndef PERL_NO_UTF16_FILTER
11447 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11448 s = add_utf16_textfilter(s, FALSE);
11450 /* diag_listed_as: Unsupported script encoding %s */
11451 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11457 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
11458 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11459 s += 4; /* UTF-8 */
11465 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11468 * are a good indicator of UTF-16LE. */
11469 #ifndef PERL_NO_UTF16_FILTER
11470 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11471 s = add_utf16_textfilter(s, TRUE);
11473 /* diag_listed_as: Unsupported script encoding %s */
11474 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11482 #ifndef PERL_NO_UTF16_FILTER
11484 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11487 SV *const filter = FILTER_DATA(idx);
11488 /* We re-use this each time round, throwing the contents away before we
11490 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11491 SV *const utf8_buffer = filter;
11492 IV status = IoPAGE(filter);
11493 const bool reverse = cBOOL(IoLINES(filter));
11496 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11498 /* As we're automatically added, at the lowest level, and hence only called
11499 from this file, we can be sure that we're not called in block mode. Hence
11500 don't bother writing code to deal with block mode. */
11502 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11505 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11507 DEBUG_P(PerlIO_printf(Perl_debug_log,
11508 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11509 FPTR2DPTR(void *, S_utf16_textfilter),
11510 reverse ? 'l' : 'b', idx, maxlen, status,
11511 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11518 /* First, look in our buffer of existing UTF-8 data: */
11519 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11523 } else if (status == 0) {
11525 IoPAGE(filter) = 0;
11526 nl = SvEND(utf8_buffer);
11529 STRLEN got = nl - SvPVX(utf8_buffer);
11530 /* Did we have anything to append? */
11532 sv_catpvn(sv, SvPVX(utf8_buffer), got);
11533 /* Everything else in this code works just fine if SVp_POK isn't
11534 set. This, however, needs it, and we need it to work, else
11535 we loop infinitely because the buffer is never consumed. */
11536 sv_chop(utf8_buffer, nl);
11540 /* OK, not a complete line there, so need to read some more UTF-16.
11541 Read an extra octect if the buffer currently has an odd number. */
11545 if (SvCUR(utf16_buffer) >= 2) {
11546 /* Location of the high octet of the last complete code point.
11547 Gosh, UTF-16 is a pain. All the benefits of variable length,
11548 *coupled* with all the benefits of partial reads and
11550 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11551 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11553 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11557 /* We have the first half of a surrogate. Read more. */
11558 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11561 status = FILTER_READ(idx + 1, utf16_buffer,
11562 160 + (SvCUR(utf16_buffer) & 1));
11563 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
11564 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11567 IoPAGE(filter) = status;
11572 chars = SvCUR(utf16_buffer) >> 1;
11573 have = SvCUR(utf8_buffer);
11574 SvGROW(utf8_buffer, have + chars * 3 + 1);
11577 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11578 (U8*)SvPVX_const(utf8_buffer) + have,
11579 chars * 2, &newlen);
11581 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11582 (U8*)SvPVX_const(utf8_buffer) + have,
11583 chars * 2, &newlen);
11585 SvCUR_set(utf8_buffer, have + newlen);
11588 /* No need to keep this SV "well-formed" with a '\0' after the end, as
11589 it's private to us, and utf16_to_utf8{,reversed} take a
11590 (pointer,length) pair, rather than a NUL-terminated string. */
11591 if(SvCUR(utf16_buffer) & 1) {
11592 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11593 SvCUR_set(utf16_buffer, 1);
11595 SvCUR_set(utf16_buffer, 0);
11598 DEBUG_P(PerlIO_printf(Perl_debug_log,
11599 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11601 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11602 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11607 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11609 SV *filter = filter_add(S_utf16_textfilter, NULL);
11611 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11613 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11614 sv_setpvs(filter, "");
11615 IoLINES(filter) = reversed;
11616 IoPAGE(filter) = 1; /* Not EOF */
11618 /* Sadly, we have to return a valid pointer, come what may, so we have to
11619 ignore any error return from this. */
11620 SvCUR_set(PL_linestr, 0);
11621 if (FILTER_READ(0, PL_linestr, 0)) {
11622 SvUTF8_on(PL_linestr);
11624 SvUTF8_on(PL_linestr);
11626 PL_bufend = SvEND(PL_linestr);
11627 return (U8*)SvPVX(PL_linestr);
11632 Returns a pointer to the next character after the parsed
11633 vstring, as well as updating the passed in sv.
11635 Function must be called like
11637 sv = sv_2mortal(newSV(5));
11638 s = scan_vstring(s,e,sv);
11640 where s and e are the start and end of the string.
11641 The sv should already be large enough to store the vstring
11642 passed in, for performance reasons.
11644 This function may croak if fatal warnings are enabled in the
11645 calling scope, hence the sv_2mortal in the example (to prevent
11646 a leak). Make sure to do SvREFCNT_inc afterwards if you use
11652 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11655 const char *pos = s;
11656 const char *start = s;
11658 PERL_ARGS_ASSERT_SCAN_VSTRING;
11660 if (*pos == 'v') pos++; /* get past 'v' */
11661 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11663 if ( *pos != '.') {
11664 /* this may not be a v-string if followed by => */
11665 const char *next = pos;
11666 while (next < e && isSPACE(*next))
11668 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11669 /* return string not v-string */
11670 sv_setpvn(sv,(char *)s,pos-s);
11671 return (char *)pos;
11675 if (!isALPHA(*pos)) {
11676 U8 tmpbuf[UTF8_MAXBYTES+1];
11679 s++; /* get past 'v' */
11684 /* this is atoi() that tolerates underscores */
11687 const char *end = pos;
11689 while (--end >= s) {
11691 const UV orev = rev;
11692 rev += (*end - '0') * mult;
11695 /* diag_listed_as: Integer overflow in %s number */
11696 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11697 "Integer overflow in decimal number");
11701 if (rev > 0x7FFFFFFF)
11702 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11704 /* Append native character for the rev point */
11705 tmpend = uvchr_to_utf8(tmpbuf, rev);
11706 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11707 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11709 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11715 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11719 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11726 Perl_keyword_plugin_standard(pTHX_
11727 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11729 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11730 PERL_UNUSED_CONTEXT;
11731 PERL_UNUSED_ARG(keyword_ptr);
11732 PERL_UNUSED_ARG(keyword_len);
11733 PERL_UNUSED_ARG(op_ptr);
11734 return KEYWORD_PLUGIN_DECLINE;
11737 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11739 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11741 SAVEI32(PL_lex_brackets);
11742 if (PL_lex_brackets > 100)
11743 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11744 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11745 SAVEI32(PL_lex_allbrackets);
11746 PL_lex_allbrackets = 0;
11747 SAVEI8(PL_lex_fakeeof);
11748 PL_lex_fakeeof = (U8)fakeeof;
11749 if(yyparse(gramtype) && !PL_parser->error_count)
11750 qerror(Perl_mess(aTHX_ "Parse error"));
11753 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11755 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11759 SAVEVPTR(PL_eval_root);
11760 PL_eval_root = NULL;
11761 parse_recdescent(gramtype, fakeeof);
11767 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11769 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11772 if (flags & ~PARSE_OPTIONAL)
11773 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11774 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11775 if (!exprop && !(flags & PARSE_OPTIONAL)) {
11776 if (!PL_parser->error_count)
11777 qerror(Perl_mess(aTHX_ "Parse error"));
11778 exprop = newOP(OP_NULL, 0);
11784 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11786 Parse a Perl arithmetic expression. This may contain operators of precedence
11787 down to the bit shift operators. The expression must be followed (and thus
11788 terminated) either by a comparison or lower-precedence operator or by
11789 something that would normally terminate an expression such as semicolon.
11790 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11791 otherwise it is mandatory. It is up to the caller to ensure that the
11792 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11793 the source of the code to be parsed and the lexical context for the
11796 The op tree representing the expression is returned. If an optional
11797 expression is absent, a null pointer is returned, otherwise the pointer
11800 If an error occurs in parsing or compilation, in most cases a valid op
11801 tree is returned anyway. The error is reflected in the parser state,
11802 normally resulting in a single exception at the top level of parsing
11803 which covers all the compilation errors that occurred. Some compilation
11804 errors, however, will throw an exception immediately.
11810 Perl_parse_arithexpr(pTHX_ U32 flags)
11812 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11816 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11818 Parse a Perl term expression. This may contain operators of precedence
11819 down to the assignment operators. The expression must be followed (and thus
11820 terminated) either by a comma or lower-precedence operator or by
11821 something that would normally terminate an expression such as semicolon.
11822 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11823 otherwise it is mandatory. It is up to the caller to ensure that the
11824 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11825 the source of the code to be parsed and the lexical context for the
11828 The op tree representing the expression is returned. If an optional
11829 expression is absent, a null pointer is returned, otherwise the pointer
11832 If an error occurs in parsing or compilation, in most cases a valid op
11833 tree is returned anyway. The error is reflected in the parser state,
11834 normally resulting in a single exception at the top level of parsing
11835 which covers all the compilation errors that occurred. Some compilation
11836 errors, however, will throw an exception immediately.
11842 Perl_parse_termexpr(pTHX_ U32 flags)
11844 return parse_expr(LEX_FAKEEOF_COMMA, flags);
11848 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11850 Parse a Perl list expression. This may contain operators of precedence
11851 down to the comma operator. The expression must be followed (and thus
11852 terminated) either by a low-precedence logic operator such as C<or> or by
11853 something that would normally terminate an expression such as semicolon.
11854 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11855 otherwise it is mandatory. It is up to the caller to ensure that the
11856 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11857 the source of the code to be parsed and the lexical context for the
11860 The op tree representing the expression is returned. If an optional
11861 expression is absent, a null pointer is returned, otherwise the pointer
11864 If an error occurs in parsing or compilation, in most cases a valid op
11865 tree is returned anyway. The error is reflected in the parser state,
11866 normally resulting in a single exception at the top level of parsing
11867 which covers all the compilation errors that occurred. Some compilation
11868 errors, however, will throw an exception immediately.
11874 Perl_parse_listexpr(pTHX_ U32 flags)
11876 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11880 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
11882 Parse a single complete Perl expression. This allows the full
11883 expression grammar, including the lowest-precedence operators such
11884 as C<or>. The expression must be followed (and thus terminated) by a
11885 token that an expression would normally be terminated by: end-of-file,
11886 closing bracketing punctuation, semicolon, or one of the keywords that
11887 signals a postfix expression-statement modifier. If I<flags> includes
11888 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11889 mandatory. It is up to the caller to ensure that the dynamic parser
11890 state (L</PL_parser> et al) is correctly set to reflect the source of
11891 the code to be parsed and the lexical context for the expression.
11893 The op tree representing the expression is returned. If an optional
11894 expression is absent, a null pointer is returned, otherwise the pointer
11897 If an error occurs in parsing or compilation, in most cases a valid op
11898 tree is returned anyway. The error is reflected in the parser state,
11899 normally resulting in a single exception at the top level of parsing
11900 which covers all the compilation errors that occurred. Some compilation
11901 errors, however, will throw an exception immediately.
11907 Perl_parse_fullexpr(pTHX_ U32 flags)
11909 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11913 =for apidoc Amx|OP *|parse_block|U32 flags
11915 Parse a single complete Perl code block. This consists of an opening
11916 brace, a sequence of statements, and a closing brace. The block
11917 constitutes a lexical scope, so C<my> variables and various compile-time
11918 effects can be contained within it. It is up to the caller to ensure
11919 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11920 reflect the source of the code to be parsed and the lexical context for
11923 The op tree representing the code block is returned. This is always a
11924 real op, never a null pointer. It will normally be a C<lineseq> list,
11925 including C<nextstate> or equivalent ops. No ops to construct any kind
11926 of runtime scope are included by virtue of it being a block.
11928 If an error occurs in parsing or compilation, in most cases a valid op
11929 tree (most likely null) is returned anyway. The error is reflected in
11930 the parser state, normally resulting in a single exception at the top
11931 level of parsing which covers all the compilation errors that occurred.
11932 Some compilation errors, however, will throw an exception immediately.
11934 The I<flags> parameter is reserved for future use, and must always
11941 Perl_parse_block(pTHX_ U32 flags)
11944 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11945 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11949 =for apidoc Amx|OP *|parse_barestmt|U32 flags
11951 Parse a single unadorned Perl statement. This may be a normal imperative
11952 statement or a declaration that has compile-time effect. It does not
11953 include any label or other affixture. It is up to the caller to ensure
11954 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11955 reflect the source of the code to be parsed and the lexical context for
11958 The op tree representing the statement is returned. This may be a
11959 null pointer if the statement is null, for example if it was actually
11960 a subroutine definition (which has compile-time side effects). If not
11961 null, it will be ops directly implementing the statement, suitable to
11962 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
11963 equivalent op (except for those embedded in a scope contained entirely
11964 within the statement).
11966 If an error occurs in parsing or compilation, in most cases a valid op
11967 tree (most likely null) is returned anyway. The error is reflected in
11968 the parser state, normally resulting in a single exception at the top
11969 level of parsing which covers all the compilation errors that occurred.
11970 Some compilation errors, however, will throw an exception immediately.
11972 The I<flags> parameter is reserved for future use, and must always
11979 Perl_parse_barestmt(pTHX_ U32 flags)
11982 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11983 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11987 =for apidoc Amx|SV *|parse_label|U32 flags
11989 Parse a single label, possibly optional, of the type that may prefix a
11990 Perl statement. It is up to the caller to ensure that the dynamic parser
11991 state (L</PL_parser> et al) is correctly set to reflect the source of
11992 the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
11993 label is optional, otherwise it is mandatory.
11995 The name of the label is returned in the form of a fresh scalar. If an
11996 optional label is absent, a null pointer is returned.
11998 If an error occurs in parsing, which can only occur if the label is
11999 mandatory, a valid label is returned anyway. The error is reflected in
12000 the parser state, normally resulting in a single exception at the top
12001 level of parsing which covers all the compilation errors that occurred.
12007 Perl_parse_label(pTHX_ U32 flags)
12009 if (flags & ~PARSE_OPTIONAL)
12010 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
12011 if (PL_lex_state == LEX_KNOWNEXT) {
12012 PL_parser->yychar = yylex();
12013 if (PL_parser->yychar == LABEL) {
12014 char * const lpv = pl_yylval.pval;
12015 STRLEN llen = strlen(lpv);
12016 PL_parser->yychar = YYEMPTY;
12017 return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
12024 STRLEN wlen, bufptr_pos;
12027 if (!isIDFIRST_lazy_if(s, UTF))
12029 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
12030 if (word_takes_any_delimeter(s, wlen))
12032 bufptr_pos = s - SvPVX(PL_linestr);
12034 lex_read_space(LEX_KEEP_PREVIOUS);
12036 s = SvPVX(PL_linestr) + bufptr_pos;
12037 if (t[0] == ':' && t[1] != ':') {
12038 PL_oldoldbufptr = PL_oldbufptr;
12041 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
12045 if (flags & PARSE_OPTIONAL) {
12048 qerror(Perl_mess(aTHX_ "Parse error"));
12049 return newSVpvs("x");
12056 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
12058 Parse a single complete Perl statement. This may be a normal imperative
12059 statement or a declaration that has compile-time effect, and may include
12060 optional labels. It is up to the caller to ensure that the dynamic
12061 parser state (L</PL_parser> et al) is correctly set to reflect the source
12062 of the code to be parsed and the lexical context for the statement.
12064 The op tree representing the statement is returned. This may be a
12065 null pointer if the statement is null, for example if it was actually
12066 a subroutine definition (which has compile-time side effects). If not
12067 null, it will be the result of a L</newSTATEOP> call, normally including
12068 a C<nextstate> or equivalent op.
12070 If an error occurs in parsing or compilation, in most cases a valid op
12071 tree (most likely null) is returned anyway. The error is reflected in
12072 the parser state, normally resulting in a single exception at the top
12073 level of parsing which covers all the compilation errors that occurred.
12074 Some compilation errors, however, will throw an exception immediately.
12076 The I<flags> parameter is reserved for future use, and must always
12083 Perl_parse_fullstmt(pTHX_ U32 flags)
12086 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
12087 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
12091 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
12093 Parse a sequence of zero or more Perl statements. These may be normal
12094 imperative statements, including optional labels, or declarations
12095 that have compile-time effect, or any mixture thereof. The statement
12096 sequence ends when a closing brace or end-of-file is encountered in a
12097 place where a new statement could have validly started. It is up to
12098 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
12099 is correctly set to reflect the source of the code to be parsed and the
12100 lexical context for the statements.
12102 The op tree representing the statement sequence is returned. This may
12103 be a null pointer if the statements were all null, for example if there
12104 were no statements or if there were only subroutine definitions (which
12105 have compile-time side effects). If not null, it will be a C<lineseq>
12106 list, normally including C<nextstate> or equivalent ops.
12108 If an error occurs in parsing or compilation, in most cases a valid op
12109 tree is returned anyway. The error is reflected in the parser state,
12110 normally resulting in a single exception at the top level of parsing
12111 which covers all the compilation errors that occurred. Some compilation
12112 errors, however, will throw an exception immediately.
12114 The I<flags> parameter is reserved for future use, and must always
12121 Perl_parse_stmtseq(pTHX_ U32 flags)
12126 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
12127 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
12128 c = lex_peek_unichar(0);
12129 if (c != -1 && c != /*{*/'}')
12130 qerror(Perl_mess(aTHX_ "Parse error"));
12136 * c-indentation-style: bsd
12137 * c-basic-offset: 4
12138 * indent-tabs-mode: nil
12141 * ex: set ts=8 sts=4 sw=4 et: