3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 =head1 Lexer interface
27 This is the lower layer of the Perl parser, managing characters and tokens.
29 =for apidoc AmU|yy_parser *|PL_parser
31 Pointer to a structure encapsulating the state of the parsing operation
32 currently in progress. The pointer can be locally changed to perform
33 a nested parse without interfering with the state of an outer parse.
34 Individual members of C<PL_parser> have their own documentation.
40 #define PERL_IN_TOKE_C
42 #include "dquote_static.c"
44 #define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
47 #define pl_yylval (PL_parser->yylval)
49 /* XXX temporary backwards compatibility */
50 #define PL_lex_brackets (PL_parser->lex_brackets)
51 #define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52 #define PL_lex_fakeeof (PL_parser->lex_fakeeof)
53 #define PL_lex_brackstack (PL_parser->lex_brackstack)
54 #define PL_lex_casemods (PL_parser->lex_casemods)
55 #define PL_lex_casestack (PL_parser->lex_casestack)
56 #define PL_lex_defer (PL_parser->lex_defer)
57 #define PL_lex_dojoin (PL_parser->lex_dojoin)
58 #define PL_lex_expect (PL_parser->lex_expect)
59 #define PL_lex_formbrack (PL_parser->lex_formbrack)
60 #define PL_lex_inpat (PL_parser->lex_inpat)
61 #define PL_lex_inwhat (PL_parser->lex_inwhat)
62 #define PL_lex_op (PL_parser->lex_op)
63 #define PL_lex_repl (PL_parser->lex_repl)
64 #define PL_lex_starts (PL_parser->lex_starts)
65 #define PL_lex_stuff (PL_parser->lex_stuff)
66 #define PL_multi_start (PL_parser->multi_start)
67 #define PL_multi_open (PL_parser->multi_open)
68 #define PL_multi_close (PL_parser->multi_close)
69 #define PL_preambled (PL_parser->preambled)
70 #define PL_sublex_info (PL_parser->sublex_info)
71 #define PL_linestr (PL_parser->linestr)
72 #define PL_expect (PL_parser->expect)
73 #define PL_copline (PL_parser->copline)
74 #define PL_bufptr (PL_parser->bufptr)
75 #define PL_oldbufptr (PL_parser->oldbufptr)
76 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
77 #define PL_linestart (PL_parser->linestart)
78 #define PL_bufend (PL_parser->bufend)
79 #define PL_last_uni (PL_parser->last_uni)
80 #define PL_last_lop (PL_parser->last_lop)
81 #define PL_last_lop_op (PL_parser->last_lop_op)
82 #define PL_lex_state (PL_parser->lex_state)
83 #define PL_rsfp (PL_parser->rsfp)
84 #define PL_rsfp_filters (PL_parser->rsfp_filters)
85 #define PL_in_my (PL_parser->in_my)
86 #define PL_in_my_stash (PL_parser->in_my_stash)
87 #define PL_tokenbuf (PL_parser->tokenbuf)
88 #define PL_multi_end (PL_parser->multi_end)
89 #define PL_error_count (PL_parser->error_count)
92 # define PL_endwhite (PL_parser->endwhite)
93 # define PL_faketokens (PL_parser->faketokens)
94 # define PL_lasttoke (PL_parser->lasttoke)
95 # define PL_nextwhite (PL_parser->nextwhite)
96 # define PL_realtokenstart (PL_parser->realtokenstart)
97 # define PL_skipwhite (PL_parser->skipwhite)
98 # define PL_thisclose (PL_parser->thisclose)
99 # define PL_thismad (PL_parser->thismad)
100 # define PL_thisopen (PL_parser->thisopen)
101 # define PL_thisstuff (PL_parser->thisstuff)
102 # define PL_thistoken (PL_parser->thistoken)
103 # define PL_thiswhite (PL_parser->thiswhite)
104 # define PL_thiswhite (PL_parser->thiswhite)
105 # define PL_nexttoke (PL_parser->nexttoke)
106 # define PL_curforce (PL_parser->curforce)
108 # define PL_nexttoke (PL_parser->nexttoke)
109 # define PL_nexttype (PL_parser->nexttype)
110 # define PL_nextval (PL_parser->nextval)
113 static const char ident_too_long[] = "Identifier too long";
116 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
117 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
119 # define CURMAD(slot,sv)
120 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
123 #define XENUMMASK 0x3f
124 #define XFAKEEOF 0x40
125 #define XFAKEBRACK 0x80
127 #ifdef USE_UTF8_SCRIPTS
128 # define UTF (!IN_BYTES)
130 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
133 /* The maximum number of characters preceding the unrecognized one to display */
134 #define UNRECOGNIZED_PRECEDE_COUNT 10
136 /* In variables named $^X, these are the legal values for X.
137 * 1999-02-27 mjd-perl-patch@plover.com */
138 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
140 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
142 /* LEX_* are values for PL_lex_state, the state of the lexer.
143 * They are arranged oddly so that the guard on the switch statement
144 * can get by with a single comparison (if the compiler is smart enough).
146 * These values refer to the various states within a sublex parse,
147 * i.e. within a double quotish string
150 /* #define LEX_NOTPARSING 11 is done in perl.h. */
152 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
153 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
154 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
155 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
156 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
158 /* at end of code, eg "$x" followed by: */
159 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
160 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
162 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
163 string or after \E, $foo, etc */
164 #define LEX_INTERPCONST 2 /* NOT USED */
165 #define LEX_FORMLINE 1 /* expecting a format line */
166 #define LEX_KNOWNEXT 0 /* next token known; just return it */
170 static const char* const lex_state_names[] = {
189 #include "keywords.h"
191 /* CLINE is a macro that ensures PL_copline has a sane value */
196 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
199 # define SKIPSPACE0(s) skipspace0(s)
200 # define SKIPSPACE1(s) skipspace1(s)
201 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
202 # define PEEKSPACE(s) skipspace2(s,0)
204 # define SKIPSPACE0(s) skipspace(s)
205 # define SKIPSPACE1(s) skipspace(s)
206 # define SKIPSPACE2(s,tsv) skipspace(s)
207 # define PEEKSPACE(s) skipspace(s)
211 * Convenience functions to return different tokens and prime the
212 * lexer for the next token. They all take an argument.
214 * TOKEN : generic token (used for '(', DOLSHARP, etc)
215 * OPERATOR : generic operator
216 * AOPERATOR : assignment operator
217 * PREBLOCK : beginning the block after an if, while, foreach, ...
218 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
219 * PREREF : *EXPR where EXPR is not a simple identifier
220 * TERM : expression term
221 * LOOPX : loop exiting command (goto, last, dump, etc)
222 * FTST : file test operator
223 * FUN0 : zero-argument function
224 * FUN0OP : zero-argument function, with its op created in this file
225 * FUN1 : not used, except for not, which isn't a UNIOP
226 * BOop : bitwise or or xor
228 * SHop : shift operator
229 * PWop : power operator
230 * PMop : pattern-matching operator
231 * Aop : addition-level operator
232 * Mop : multiplication-level operator
233 * Eop : equality-testing operator
234 * Rop : relational operator <= != gt
236 * Also see LOP and lop() below.
239 #ifdef DEBUGGING /* Serve -DT. */
240 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
242 # define REPORT(retval) (retval)
245 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
246 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
247 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
248 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
249 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
250 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
251 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
252 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
253 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
254 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
255 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
256 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
257 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
258 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
259 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
260 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
261 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
262 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
263 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
264 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
265 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
267 /* This bit of chicanery makes a unary function followed by
268 * a parenthesis into a function with one argument, highest precedence.
269 * The UNIDOR macro is for unary functions that can be followed by the //
270 * operator (such as C<shift // 0>).
272 #define UNI3(f,x,have_x) { \
273 pl_yylval.ival = f; \
274 if (have_x) PL_expect = x; \
276 PL_last_uni = PL_oldbufptr; \
277 PL_last_lop_op = f; \
279 return REPORT( (int)FUNC1 ); \
281 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
283 #define UNI(f) UNI3(f,XTERM,1)
284 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
285 #define UNIPROTO(f,optional) { \
286 if (optional) PL_last_uni = PL_oldbufptr; \
290 #define UNIBRACK(f) UNI3(f,0,0)
292 /* grandfather return to old style */
295 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
296 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
297 pl_yylval.ival = (f); \
303 #define COPLINE_INC_WITH_HERELINES \
305 CopLINE_inc(PL_curcop); \
306 if (PL_parser->lex_shared->herelines) \
307 CopLINE(PL_curcop) += PL_parser->lex_shared->herelines, \
308 PL_parser->lex_shared->herelines = 0; \
314 /* how to interpret the pl_yylval associated with the token */
318 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
323 static struct debug_tokens {
325 enum token_type type;
327 } const debug_tokens[] =
329 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
330 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
331 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
332 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
333 { ARROW, TOKENTYPE_NONE, "ARROW" },
334 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
335 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
336 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
337 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
338 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
339 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
340 { DO, TOKENTYPE_NONE, "DO" },
341 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
342 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
343 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
344 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
345 { ELSE, TOKENTYPE_NONE, "ELSE" },
346 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
347 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
348 { FOR, TOKENTYPE_IVAL, "FOR" },
349 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
350 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
351 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
352 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
353 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
354 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
355 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
356 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
357 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
358 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
359 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
360 { IF, TOKENTYPE_IVAL, "IF" },
361 { LABEL, TOKENTYPE_PVAL, "LABEL" },
362 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
363 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
364 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
365 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
366 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
367 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
368 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
369 { MY, TOKENTYPE_IVAL, "MY" },
370 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
371 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
372 { OROP, TOKENTYPE_IVAL, "OROP" },
373 { OROR, TOKENTYPE_NONE, "OROR" },
374 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
375 { PEG, TOKENTYPE_NONE, "PEG" },
376 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
377 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
378 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
379 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
380 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
381 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
382 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
383 { PREINC, TOKENTYPE_NONE, "PREINC" },
384 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
385 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
386 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
387 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
388 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
389 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
390 { SUB, TOKENTYPE_NONE, "SUB" },
391 { THING, TOKENTYPE_OPVAL, "THING" },
392 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
393 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
394 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
395 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
396 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
397 { USE, TOKENTYPE_IVAL, "USE" },
398 { WHEN, TOKENTYPE_IVAL, "WHEN" },
399 { WHILE, TOKENTYPE_IVAL, "WHILE" },
400 { WORD, TOKENTYPE_OPVAL, "WORD" },
401 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
402 { 0, TOKENTYPE_NONE, NULL }
405 /* dump the returned token in rv, plus any optional arg in pl_yylval */
408 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
412 PERL_ARGS_ASSERT_TOKEREPORT;
415 const char *name = NULL;
416 enum token_type type = TOKENTYPE_NONE;
417 const struct debug_tokens *p;
418 SV* const report = newSVpvs("<== ");
420 for (p = debug_tokens; p->token; p++) {
421 if (p->token == (int)rv) {
428 Perl_sv_catpv(aTHX_ report, name);
429 else if ((char)rv > ' ' && (char)rv <= '~')
430 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
432 sv_catpvs(report, "EOF");
434 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
439 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
441 case TOKENTYPE_OPNUM:
442 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
443 PL_op_name[lvalp->ival]);
446 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
448 case TOKENTYPE_OPVAL:
450 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
451 PL_op_name[lvalp->opval->op_type]);
452 if (lvalp->opval->op_type == OP_CONST) {
453 Perl_sv_catpvf(aTHX_ report, " %s",
454 SvPEEK(cSVOPx_sv(lvalp->opval)));
459 sv_catpvs(report, "(opval=null)");
462 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
468 /* print the buffer with suitable escapes */
471 S_printbuf(pTHX_ const char *const fmt, const char *const s)
473 SV* const tmp = newSVpvs("");
475 PERL_ARGS_ASSERT_PRINTBUF;
477 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
484 S_deprecate_commaless_var_list(pTHX) {
486 deprecate("comma-less variable list");
487 return REPORT(','); /* grandfather non-comma-format format */
493 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
494 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
498 S_ao(pTHX_ int toketype)
501 if (*PL_bufptr == '=') {
503 if (toketype == ANDAND)
504 pl_yylval.ival = OP_ANDASSIGN;
505 else if (toketype == OROR)
506 pl_yylval.ival = OP_ORASSIGN;
507 else if (toketype == DORDOR)
508 pl_yylval.ival = OP_DORASSIGN;
516 * When Perl expects an operator and finds something else, no_op
517 * prints the warning. It always prints "<something> found where
518 * operator expected. It prints "Missing semicolon on previous line?"
519 * if the surprise occurs at the start of the line. "do you need to
520 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
521 * where the compiler doesn't know if foo is a method call or a function.
522 * It prints "Missing operator before end of line" if there's nothing
523 * after the missing operator, or "... before <...>" if there is something
524 * after the missing operator.
528 S_no_op(pTHX_ const char *const what, char *s)
531 char * const oldbp = PL_bufptr;
532 const bool is_first = (PL_oldbufptr == PL_linestart);
534 PERL_ARGS_ASSERT_NO_OP;
540 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
541 if (ckWARN_d(WARN_SYNTAX)) {
543 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
544 "\t(Missing semicolon on previous line?)\n");
545 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
547 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':');
548 t += UTF ? UTF8SKIP(t) : 1)
550 if (t < PL_bufptr && isSPACE(*t))
551 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
552 "\t(Do you need to predeclare %"SVf"?)\n",
553 SVfARG(newSVpvn_flags(PL_oldoldbufptr, (STRLEN)(t - PL_oldoldbufptr),
554 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
558 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
559 "\t(Missing operator before %"SVf"?)\n",
560 SVfARG(newSVpvn_flags(oldbp, (STRLEN)(s - oldbp),
561 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
569 * Complain about missing quote/regexp/heredoc terminator.
570 * If it's called with NULL then it cauterizes the line buffer.
571 * If we're in a delimited string and the delimiter is a control
572 * character, it's reformatted into a two-char sequence like ^C.
577 S_missingterm(pTHX_ char *s)
583 char * const nl = strrchr(s,'\n');
587 else if (isCNTRL(PL_multi_close)) {
589 tmpbuf[1] = (char)toCTRL(PL_multi_close);
594 *tmpbuf = (char)PL_multi_close;
598 q = strchr(s,'"') ? '\'' : '"';
599 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
605 * Check whether the named feature is enabled.
608 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
611 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
613 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
615 assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
617 if (namelen > MAX_FEATURE_LEN)
619 memcpy(&he_name[8], name, namelen);
621 return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
622 REFCOUNTED_HE_EXISTS));
626 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
627 * utf16-to-utf8-reversed.
630 #ifdef PERL_CR_FILTER
634 const char *s = SvPVX_const(sv);
635 const char * const e = s + SvCUR(sv);
637 PERL_ARGS_ASSERT_STRIP_RETURN;
639 /* outer loop optimized to do nothing if there are no CR-LFs */
641 if (*s++ == '\r' && *s == '\n') {
642 /* hit a CR-LF, need to copy the rest */
646 if (*s == '\r' && s[1] == '\n')
657 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
659 const I32 count = FILTER_READ(idx+1, sv, maxlen);
660 if (count > 0 && !maxlen)
667 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
669 Creates and initialises a new lexer/parser state object, supplying
670 a context in which to lex and parse from a new source of Perl code.
671 A pointer to the new state object is placed in L</PL_parser>. An entry
672 is made on the save stack so that upon unwinding the new state object
673 will be destroyed and the former value of L</PL_parser> will be restored.
674 Nothing else need be done to clean up the parsing context.
676 The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
677 non-null, provides a string (in SV form) containing code to be parsed.
678 A copy of the string is made, so subsequent modification of I<line>
679 does not affect parsing. I<rsfp>, if non-null, provides an input stream
680 from which code will be read to be parsed. If both are non-null, the
681 code in I<line> comes first and must consist of complete lines of input,
682 and I<rsfp> supplies the remainder of the source.
684 The I<flags> parameter is reserved for future use. Currently it is only
685 used by perl internally, so extensions should always pass zero.
690 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
691 can share filters with the current parser.
692 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
693 caller, hence isn't owned by the parser, so shouldn't be closed on parser
694 destruction. This is used to handle the case of defaulting to reading the
695 script from the standard input because no filename was given on the command
696 line (without getting confused by situation where STDIN has been closed, so
697 the script handle is opened on fd 0) */
700 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
703 const char *s = NULL;
704 yy_parser *parser, *oparser;
705 if (flags && flags & ~LEX_START_FLAGS)
706 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
708 /* create and initialise a parser */
710 Newxz(parser, 1, yy_parser);
711 parser->old_parser = oparser = PL_parser;
714 parser->stack = NULL;
716 parser->stack_size = 0;
718 /* on scope exit, free this parser and restore any outer one */
720 parser->saved_curcop = PL_curcop;
722 /* initialise lexer state */
725 parser->curforce = -1;
727 parser->nexttoke = 0;
729 parser->error_count = oparser ? oparser->error_count : 0;
730 parser->copline = NOLINE;
731 parser->lex_state = LEX_NORMAL;
732 parser->expect = XSTATE;
734 parser->rsfp_filters =
735 !(flags & LEX_START_SAME_FILTER) || !oparser
737 : MUTABLE_AV(SvREFCNT_inc(
738 oparser->rsfp_filters
739 ? oparser->rsfp_filters
740 : (oparser->rsfp_filters = newAV())
743 Newx(parser->lex_brackstack, 120, char);
744 Newx(parser->lex_casestack, 12, char);
745 *parser->lex_casestack = '\0';
746 Newxz(parser->lex_shared, 1, LEXSHARED);
750 s = SvPV_const(line, len);
751 parser->linestr = flags & LEX_START_COPIED
752 ? SvREFCNT_inc_simple_NN(line)
753 : newSVpvn_flags(s, len, SvUTF8(line));
754 sv_catpvs(parser->linestr, "\n;");
756 parser->linestr = newSVpvs("\n;");
758 parser->oldoldbufptr =
761 parser->linestart = SvPVX(parser->linestr);
762 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
763 parser->last_lop = parser->last_uni = NULL;
764 parser->lex_flags = flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
765 |LEX_DONT_CLOSE_RSFP);
767 parser->in_pod = parser->filtered = 0;
771 /* delete a parser object */
774 Perl_parser_free(pTHX_ const yy_parser *parser)
777 I32 nexttoke = parser->lasttoke;
779 I32 nexttoke = parser->nexttoke;
782 PERL_ARGS_ASSERT_PARSER_FREE;
784 PL_curcop = parser->saved_curcop;
785 SvREFCNT_dec(parser->linestr);
787 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
788 PerlIO_clearerr(parser->rsfp);
789 else if (parser->rsfp && (!parser->old_parser ||
790 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
791 PerlIO_close(parser->rsfp);
792 SvREFCNT_dec(parser->rsfp_filters);
793 SvREFCNT_dec(parser->lex_stuff);
794 SvREFCNT_dec(parser->sublex_info.repl);
797 if (S_is_opval_token(parser->nexttoke[nexttoke].next_type
799 op_free(parser->nexttoke[nexttoke].next_val.opval);
801 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff))
802 op_free(parser->nextval[nexttoke].opval);
806 Safefree(parser->lex_brackstack);
807 Safefree(parser->lex_casestack);
808 Safefree(parser->lex_shared);
809 PL_parser = parser->old_parser;
815 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
817 Buffer scalar containing the chunk currently under consideration of the
818 text currently being lexed. This is always a plain string scalar (for
819 which C<SvPOK> is true). It is not intended to be used as a scalar by
820 normal scalar means; instead refer to the buffer directly by the pointer
821 variables described below.
823 The lexer maintains various C<char*> pointers to things in the
824 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
825 reallocated, all of these pointers must be updated. Don't attempt to
826 do this manually, but rather use L</lex_grow_linestr> if you need to
827 reallocate the buffer.
829 The content of the text chunk in the buffer is commonly exactly one
830 complete line of input, up to and including a newline terminator,
831 but there are situations where it is otherwise. The octets of the
832 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
833 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
834 flag on this scalar, which may disagree with it.
836 For direct examination of the buffer, the variable
837 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
838 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
839 of these pointers is usually preferable to examination of the scalar
840 through normal scalar means.
842 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
844 Direct pointer to the end of the chunk of text currently being lexed, the
845 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
846 + SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
847 always located at the end of the buffer, and does not count as part of
848 the buffer's contents.
850 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
852 Points to the current position of lexing inside the lexer buffer.
853 Characters around this point may be freely examined, within
854 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
855 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
856 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
858 Lexing code (whether in the Perl core or not) moves this pointer past
859 the characters that it consumes. It is also expected to perform some
860 bookkeeping whenever a newline character is consumed. This movement
861 can be more conveniently performed by the function L</lex_read_to>,
862 which handles newlines appropriately.
864 Interpretation of the buffer's octets can be abstracted out by
865 using the slightly higher-level functions L</lex_peek_unichar> and
866 L</lex_read_unichar>.
868 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
870 Points to the start of the current line inside the lexer buffer.
871 This is useful for indicating at which column an error occurred, and
872 not much else. This must be updated by any lexing code that consumes
873 a newline; the function L</lex_read_to> handles this detail.
879 =for apidoc Amx|bool|lex_bufutf8
881 Indicates whether the octets in the lexer buffer
882 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
883 of Unicode characters. If not, they should be interpreted as Latin-1
884 characters. This is analogous to the C<SvUTF8> flag for scalars.
886 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
887 contains valid UTF-8. Lexing code must be robust in the face of invalid
890 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
891 is significant, but not the whole story regarding the input character
892 encoding. Normally, when a file is being read, the scalar contains octets
893 and its C<SvUTF8> flag is off, but the octets should be interpreted as
894 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
895 however, the scalar may have the C<SvUTF8> flag on, and in this case its
896 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
897 is in effect. This logic may change in the future; use this function
898 instead of implementing the logic yourself.
904 Perl_lex_bufutf8(pTHX)
910 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
912 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
913 at least I<len> octets (including terminating NUL). Returns a
914 pointer to the reallocated buffer. This is necessary before making
915 any direct modification of the buffer that would increase its length.
916 L</lex_stuff_pvn> provides a more convenient way to insert text into
919 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
920 this function updates all of the lexer's variables that point directly
927 Perl_lex_grow_linestr(pTHX_ STRLEN len)
931 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
932 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
933 linestr = PL_parser->linestr;
934 buf = SvPVX(linestr);
935 if (len <= SvLEN(linestr))
937 bufend_pos = PL_parser->bufend - buf;
938 bufptr_pos = PL_parser->bufptr - buf;
939 oldbufptr_pos = PL_parser->oldbufptr - buf;
940 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
941 linestart_pos = PL_parser->linestart - buf;
942 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
943 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
944 re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
945 PL_parser->lex_shared->re_eval_start - buf : 0;
947 buf = sv_grow(linestr, len);
949 PL_parser->bufend = buf + bufend_pos;
950 PL_parser->bufptr = buf + bufptr_pos;
951 PL_parser->oldbufptr = buf + oldbufptr_pos;
952 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
953 PL_parser->linestart = buf + linestart_pos;
954 if (PL_parser->last_uni)
955 PL_parser->last_uni = buf + last_uni_pos;
956 if (PL_parser->last_lop)
957 PL_parser->last_lop = buf + last_lop_pos;
958 if (PL_parser->lex_shared->re_eval_start)
959 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
964 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
966 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
967 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
968 reallocating the buffer if necessary. This means that lexing code that
969 runs later will see the characters as if they had appeared in the input.
970 It is not recommended to do this as part of normal parsing, and most
971 uses of this facility run the risk of the inserted characters being
972 interpreted in an unintended manner.
974 The string to be inserted is represented by I<len> octets starting
975 at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
976 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
977 The characters are recoded for the lexer buffer, according to how the
978 buffer is currently being interpreted (L</lex_bufutf8>). If a string
979 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
980 function is more convenient.
986 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
990 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
991 if (flags & ~(LEX_STUFF_UTF8))
992 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
994 if (flags & LEX_STUFF_UTF8) {
997 STRLEN highhalf = 0; /* Count of variants */
998 const char *p, *e = pv+len;
999 for (p = pv; p != e; p++) {
1000 if (! UTF8_IS_INVARIANT(*p)) {
1006 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1007 bufptr = PL_parser->bufptr;
1008 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1009 SvCUR_set(PL_parser->linestr,
1010 SvCUR(PL_parser->linestr) + len+highhalf);
1011 PL_parser->bufend += len+highhalf;
1012 for (p = pv; p != e; p++) {
1014 if (! UTF8_IS_INVARIANT(c)) {
1015 *bufptr++ = UTF8_TWO_BYTE_HI(c);
1016 *bufptr++ = UTF8_TWO_BYTE_LO(c);
1018 *bufptr++ = (char)c;
1023 if (flags & LEX_STUFF_UTF8) {
1024 STRLEN highhalf = 0;
1025 const char *p, *e = pv+len;
1026 for (p = pv; p != e; p++) {
1028 if (UTF8_IS_ABOVE_LATIN1(c)) {
1029 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1030 "non-Latin-1 character into Latin-1 input");
1031 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1034 } else if (! UTF8_IS_INVARIANT(c)) {
1035 /* malformed UTF-8 */
1037 SAVESPTR(PL_warnhook);
1038 PL_warnhook = PERL_WARNHOOK_FATAL;
1039 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
1045 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1046 bufptr = PL_parser->bufptr;
1047 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1048 SvCUR_set(PL_parser->linestr,
1049 SvCUR(PL_parser->linestr) + len-highhalf);
1050 PL_parser->bufend += len-highhalf;
1053 if (UTF8_IS_INVARIANT(*p)) {
1059 *bufptr++ = TWO_BYTE_UTF8_TO_UNI(*p, *(p+1));
1065 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1066 bufptr = PL_parser->bufptr;
1067 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1068 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1069 PL_parser->bufend += len;
1070 Copy(pv, bufptr, len, char);
1076 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1078 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1079 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1080 reallocating the buffer if necessary. This means that lexing code that
1081 runs later will see the characters as if they had appeared in the input.
1082 It is not recommended to do this as part of normal parsing, and most
1083 uses of this facility run the risk of the inserted characters being
1084 interpreted in an unintended manner.
1086 The string to be inserted is represented by octets starting at I<pv>
1087 and continuing to the first nul. These octets are interpreted as either
1088 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1089 in I<flags>. The characters are recoded for the lexer buffer, according
1090 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1091 If it is not convenient to nul-terminate a string to be inserted, the
1092 L</lex_stuff_pvn> function is more appropriate.
1098 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1100 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1101 lex_stuff_pvn(pv, strlen(pv), flags);
1105 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1107 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1108 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1109 reallocating the buffer if necessary. This means that lexing code that
1110 runs later will see the characters as if they had appeared in the input.
1111 It is not recommended to do this as part of normal parsing, and most
1112 uses of this facility run the risk of the inserted characters being
1113 interpreted in an unintended manner.
1115 The string to be inserted is the string value of I<sv>. The characters
1116 are recoded for the lexer buffer, according to how the buffer is currently
1117 being interpreted (L</lex_bufutf8>). If a string to be inserted is
1118 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1119 need to construct a scalar.
1125 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1129 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1131 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1133 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1137 =for apidoc Amx|void|lex_unstuff|char *ptr
1139 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1140 I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1141 This hides the discarded text from any lexing code that runs later,
1142 as if the text had never appeared.
1144 This is not the normal way to consume lexed text. For that, use
1151 Perl_lex_unstuff(pTHX_ char *ptr)
1155 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1156 buf = PL_parser->bufptr;
1158 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1161 bufend = PL_parser->bufend;
1163 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1164 unstuff_len = ptr - buf;
1165 Move(ptr, buf, bufend+1-ptr, char);
1166 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1167 PL_parser->bufend = bufend - unstuff_len;
1171 =for apidoc Amx|void|lex_read_to|char *ptr
1173 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1174 to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1175 performing the correct bookkeeping whenever a newline character is passed.
1176 This is the normal way to consume lexed text.
1178 Interpretation of the buffer's octets can be abstracted out by
1179 using the slightly higher-level functions L</lex_peek_unichar> and
1180 L</lex_read_unichar>.
1186 Perl_lex_read_to(pTHX_ char *ptr)
1189 PERL_ARGS_ASSERT_LEX_READ_TO;
1190 s = PL_parser->bufptr;
1191 if (ptr < s || ptr > PL_parser->bufend)
1192 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1193 for (; s != ptr; s++)
1195 COPLINE_INC_WITH_HERELINES;
1196 PL_parser->linestart = s+1;
1198 PL_parser->bufptr = ptr;
1202 =for apidoc Amx|void|lex_discard_to|char *ptr
1204 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1205 up to I<ptr>. The remaining content of the buffer will be moved, and
1206 all pointers into the buffer updated appropriately. I<ptr> must not
1207 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1208 it is not permitted to discard text that has yet to be lexed.
1210 Normally it is not necessarily to do this directly, because it suffices to
1211 use the implicit discarding behaviour of L</lex_next_chunk> and things
1212 based on it. However, if a token stretches across multiple lines,
1213 and the lexing code has kept multiple lines of text in the buffer for
1214 that purpose, then after completion of the token it would be wise to
1215 explicitly discard the now-unneeded earlier lines, to avoid future
1216 multi-line tokens growing the buffer without bound.
1222 Perl_lex_discard_to(pTHX_ char *ptr)
1226 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1227 buf = SvPVX(PL_parser->linestr);
1229 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1232 if (ptr > PL_parser->bufptr)
1233 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1234 discard_len = ptr - buf;
1235 if (PL_parser->oldbufptr < ptr)
1236 PL_parser->oldbufptr = ptr;
1237 if (PL_parser->oldoldbufptr < ptr)
1238 PL_parser->oldoldbufptr = ptr;
1239 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1240 PL_parser->last_uni = NULL;
1241 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1242 PL_parser->last_lop = NULL;
1243 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1244 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1245 PL_parser->bufend -= discard_len;
1246 PL_parser->bufptr -= discard_len;
1247 PL_parser->oldbufptr -= discard_len;
1248 PL_parser->oldoldbufptr -= discard_len;
1249 if (PL_parser->last_uni)
1250 PL_parser->last_uni -= discard_len;
1251 if (PL_parser->last_lop)
1252 PL_parser->last_lop -= discard_len;
1256 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1258 Reads in the next chunk of text to be lexed, appending it to
1259 L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1260 looked to the end of the current chunk and wants to know more. It is
1261 usual, but not necessary, for lexing to have consumed the entirety of
1262 the current chunk at this time.
1264 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1265 chunk (i.e., the current chunk has been entirely consumed), normally the
1266 current chunk will be discarded at the same time that the new chunk is
1267 read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1268 will not be discarded. If the current chunk has not been entirely
1269 consumed, then it will not be discarded regardless of the flag.
1271 Returns true if some new text was added to the buffer, or false if the
1272 buffer has reached the end of the input text.
1277 #define LEX_FAKE_EOF 0x80000000
1278 #define LEX_NO_TERM 0x40000000
1281 Perl_lex_next_chunk(pTHX_ U32 flags)
1285 STRLEN old_bufend_pos, new_bufend_pos;
1286 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1287 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1288 bool got_some_for_debugger = 0;
1290 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1291 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1292 linestr = PL_parser->linestr;
1293 buf = SvPVX(linestr);
1294 if (!(flags & LEX_KEEP_PREVIOUS) &&
1295 PL_parser->bufptr == PL_parser->bufend) {
1296 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1298 if (PL_parser->last_uni != PL_parser->bufend)
1299 PL_parser->last_uni = NULL;
1300 if (PL_parser->last_lop != PL_parser->bufend)
1301 PL_parser->last_lop = NULL;
1302 last_uni_pos = last_lop_pos = 0;
1306 old_bufend_pos = PL_parser->bufend - buf;
1307 bufptr_pos = PL_parser->bufptr - buf;
1308 oldbufptr_pos = PL_parser->oldbufptr - buf;
1309 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1310 linestart_pos = PL_parser->linestart - buf;
1311 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1312 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1314 if (flags & LEX_FAKE_EOF) {
1316 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1318 } else if (filter_gets(linestr, old_bufend_pos)) {
1320 got_some_for_debugger = 1;
1321 } else if (flags & LEX_NO_TERM) {
1324 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1325 sv_setpvs(linestr, "");
1327 /* End of real input. Close filehandle (unless it was STDIN),
1328 * then add implicit termination.
1330 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1331 PerlIO_clearerr(PL_parser->rsfp);
1332 else if (PL_parser->rsfp)
1333 (void)PerlIO_close(PL_parser->rsfp);
1334 PL_parser->rsfp = NULL;
1335 PL_parser->in_pod = PL_parser->filtered = 0;
1337 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1340 if (!PL_in_eval && PL_minus_p) {
1342 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1343 PL_minus_n = PL_minus_p = 0;
1344 } else if (!PL_in_eval && PL_minus_n) {
1345 sv_catpvs(linestr, /*{*/";}");
1348 sv_catpvs(linestr, ";");
1351 buf = SvPVX(linestr);
1352 new_bufend_pos = SvCUR(linestr);
1353 PL_parser->bufend = buf + new_bufend_pos;
1354 PL_parser->bufptr = buf + bufptr_pos;
1355 PL_parser->oldbufptr = buf + oldbufptr_pos;
1356 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1357 PL_parser->linestart = buf + linestart_pos;
1358 if (PL_parser->last_uni)
1359 PL_parser->last_uni = buf + last_uni_pos;
1360 if (PL_parser->last_lop)
1361 PL_parser->last_lop = buf + last_lop_pos;
1362 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1363 PL_curstash != PL_debstash) {
1364 /* debugger active and we're not compiling the debugger code,
1365 * so store the line into the debugger's array of lines
1367 update_debugger_info(NULL, buf+old_bufend_pos,
1368 new_bufend_pos-old_bufend_pos);
1374 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1376 Looks ahead one (Unicode) character in the text currently being lexed.
1377 Returns the codepoint (unsigned integer value) of the next character,
1378 or -1 if lexing has reached the end of the input text. To consume the
1379 peeked character, use L</lex_read_unichar>.
1381 If the next character is in (or extends into) the next chunk of input
1382 text, the next chunk will be read in. Normally the current chunk will be
1383 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1384 then the current chunk will not be discarded.
1386 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1387 is encountered, an exception is generated.
1393 Perl_lex_peek_unichar(pTHX_ U32 flags)
1397 if (flags & ~(LEX_KEEP_PREVIOUS))
1398 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1399 s = PL_parser->bufptr;
1400 bufend = PL_parser->bufend;
1406 if (!lex_next_chunk(flags))
1408 s = PL_parser->bufptr;
1409 bufend = PL_parser->bufend;
1412 if (UTF8_IS_INVARIANT(head))
1414 if (UTF8_IS_START(head)) {
1415 len = UTF8SKIP(&head);
1416 while ((STRLEN)(bufend-s) < len) {
1417 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1419 s = PL_parser->bufptr;
1420 bufend = PL_parser->bufend;
1423 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1424 if (retlen == (STRLEN)-1) {
1425 /* malformed UTF-8 */
1427 SAVESPTR(PL_warnhook);
1428 PL_warnhook = PERL_WARNHOOK_FATAL;
1429 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1435 if (!lex_next_chunk(flags))
1437 s = PL_parser->bufptr;
1444 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1446 Reads the next (Unicode) character in the text currently being lexed.
1447 Returns the codepoint (unsigned integer value) of the character read,
1448 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1449 if lexing has reached the end of the input text. To non-destructively
1450 examine the next character, use L</lex_peek_unichar> instead.
1452 If the next character is in (or extends into) the next chunk of input
1453 text, the next chunk will be read in. Normally the current chunk will be
1454 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1455 then the current chunk will not be discarded.
1457 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1458 is encountered, an exception is generated.
1464 Perl_lex_read_unichar(pTHX_ U32 flags)
1467 if (flags & ~(LEX_KEEP_PREVIOUS))
1468 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1469 c = lex_peek_unichar(flags);
1472 COPLINE_INC_WITH_HERELINES;
1474 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1476 ++(PL_parser->bufptr);
1482 =for apidoc Amx|void|lex_read_space|U32 flags
1484 Reads optional spaces, in Perl style, in the text currently being
1485 lexed. The spaces may include ordinary whitespace characters and
1486 Perl-style comments. C<#line> directives are processed if encountered.
1487 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1488 at a non-space character (or the end of the input text).
1490 If spaces extend into the next chunk of input text, the next chunk will
1491 be read in. Normally the current chunk will be discarded at the same
1492 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1493 chunk will not be discarded.
1498 #define LEX_NO_NEXT_CHUNK 0x80000000
1501 Perl_lex_read_space(pTHX_ U32 flags)
1504 bool need_incline = 0;
1505 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1506 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1509 sv_free(PL_skipwhite);
1510 PL_skipwhite = NULL;
1513 PL_skipwhite = newSVpvs("");
1514 #endif /* PERL_MAD */
1515 s = PL_parser->bufptr;
1516 bufend = PL_parser->bufend;
1522 } while (!(c == '\n' || (c == 0 && s == bufend)));
1523 } else if (c == '\n') {
1525 PL_parser->linestart = s;
1530 } else if (isSPACE(c)) {
1532 } else if (c == 0 && s == bufend) {
1536 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1537 #endif /* PERL_MAD */
1538 if (flags & LEX_NO_NEXT_CHUNK)
1540 PL_parser->bufptr = s;
1541 COPLINE_INC_WITH_HERELINES;
1542 got_more = lex_next_chunk(flags);
1543 CopLINE_dec(PL_curcop);
1544 s = PL_parser->bufptr;
1545 bufend = PL_parser->bufend;
1548 if (need_incline && PL_parser->rsfp) {
1558 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1559 #endif /* PERL_MAD */
1560 PL_parser->bufptr = s;
1565 * This subroutine has nothing to do with tilting, whether at windmills
1566 * or pinball tables. Its name is short for "increment line". It
1567 * increments the current line number in CopLINE(PL_curcop) and checks
1568 * to see whether the line starts with a comment of the form
1569 * # line 500 "foo.pm"
1570 * If so, it sets the current line number and file to the values in the comment.
1574 S_incline(pTHX_ const char *s)
1582 PERL_ARGS_ASSERT_INCLINE;
1584 COPLINE_INC_WITH_HERELINES;
1585 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1586 && s+1 == PL_bufend && *s == ';') {
1587 /* fake newline in string eval */
1588 CopLINE_dec(PL_curcop);
1593 while (SPACE_OR_TAB(*s))
1595 if (strnEQ(s, "line", 4))
1599 if (SPACE_OR_TAB(*s))
1603 while (SPACE_OR_TAB(*s))
1611 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1613 while (SPACE_OR_TAB(*s))
1615 if (*s == '"' && (t = strchr(s+1, '"'))) {
1621 while (!isSPACE(*t))
1625 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1627 if (*e != '\n' && *e != '\0')
1628 return; /* false alarm */
1630 line_num = atoi(n)-1;
1633 const STRLEN len = t - s;
1634 SV *const temp_sv = CopFILESV(PL_curcop);
1639 cf = SvPVX(temp_sv);
1640 tmplen = SvCUR(temp_sv);
1646 if (!PL_rsfp && !PL_parser->filtered) {
1647 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1648 * to *{"::_<newfilename"} */
1649 /* However, the long form of evals is only turned on by the
1650 debugger - usually they're "(eval %lu)" */
1654 STRLEN tmplen2 = len;
1655 if (tmplen + 2 <= sizeof smallbuf)
1658 Newx(tmpbuf, tmplen + 2, char);
1661 memcpy(tmpbuf + 2, cf, tmplen);
1663 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1668 if (tmplen2 + 2 <= sizeof smallbuf)
1671 Newx(tmpbuf2, tmplen2 + 2, char);
1673 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1674 /* Either they malloc'd it, or we malloc'd it,
1675 so no prefix is present in ours. */
1680 memcpy(tmpbuf2 + 2, s, tmplen2);
1683 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1685 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1686 /* adjust ${"::_<newfilename"} to store the new file name */
1687 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1688 /* The line number may differ. If that is the case,
1689 alias the saved lines that are in the array.
1690 Otherwise alias the whole array. */
1691 if (CopLINE(PL_curcop) == line_num) {
1692 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1693 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1695 else if (GvAV(*gvp)) {
1696 AV * const av = GvAV(*gvp);
1697 const I32 start = CopLINE(PL_curcop)+1;
1698 I32 items = AvFILLp(av) - start;
1700 AV * const av2 = GvAVn(gv2);
1701 SV **svp = AvARRAY(av) + start;
1702 I32 l = (I32)line_num+1;
1704 av_store(av2, l++, SvREFCNT_inc(*svp++));
1709 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1711 if (tmpbuf != smallbuf) Safefree(tmpbuf);
1713 CopFILE_free(PL_curcop);
1714 CopFILE_setn(PL_curcop, s, len);
1716 CopLINE_set(PL_curcop, line_num);
1720 /* skip space before PL_thistoken */
1723 S_skipspace0(pTHX_ register char *s)
1725 PERL_ARGS_ASSERT_SKIPSPACE0;
1732 PL_thiswhite = newSVpvs("");
1733 sv_catsv(PL_thiswhite, PL_skipwhite);
1734 sv_free(PL_skipwhite);
1737 PL_realtokenstart = s - SvPVX(PL_linestr);
1741 /* skip space after PL_thistoken */
1744 S_skipspace1(pTHX_ register char *s)
1746 const char *start = s;
1747 I32 startoff = start - SvPVX(PL_linestr);
1749 PERL_ARGS_ASSERT_SKIPSPACE1;
1754 start = SvPVX(PL_linestr) + startoff;
1755 if (!PL_thistoken && PL_realtokenstart >= 0) {
1756 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1757 PL_thistoken = newSVpvn(tstart, start - tstart);
1759 PL_realtokenstart = -1;
1762 PL_nextwhite = newSVpvs("");
1763 sv_catsv(PL_nextwhite, PL_skipwhite);
1764 sv_free(PL_skipwhite);
1771 S_skipspace2(pTHX_ register char *s, SV **svp)
1774 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1775 const I32 startoff = s - SvPVX(PL_linestr);
1777 PERL_ARGS_ASSERT_SKIPSPACE2;
1780 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1781 if (!PL_madskills || !svp)
1783 start = SvPVX(PL_linestr) + startoff;
1784 if (!PL_thistoken && PL_realtokenstart >= 0) {
1785 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1786 PL_thistoken = newSVpvn(tstart, start - tstart);
1787 PL_realtokenstart = -1;
1791 *svp = newSVpvs("");
1792 sv_setsv(*svp, PL_skipwhite);
1793 sv_free(PL_skipwhite);
1802 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1804 AV *av = CopFILEAVx(PL_curcop);
1806 SV * const sv = newSV_type(SVt_PVMG);
1808 sv_setsv(sv, orig_sv);
1810 sv_setpvn(sv, buf, len);
1813 av_store(av, (I32)CopLINE(PL_curcop), sv);
1819 * Called to gobble the appropriate amount and type of whitespace.
1820 * Skips comments as well.
1824 S_skipspace(pTHX_ register char *s)
1828 #endif /* PERL_MAD */
1829 PERL_ARGS_ASSERT_SKIPSPACE;
1832 sv_free(PL_skipwhite);
1833 PL_skipwhite = NULL;
1835 #endif /* PERL_MAD */
1836 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1837 while (s < PL_bufend && SPACE_OR_TAB(*s))
1840 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1842 lex_read_space(LEX_KEEP_PREVIOUS |
1843 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1844 LEX_NO_NEXT_CHUNK : 0));
1846 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1847 if (PL_linestart > PL_bufptr)
1848 PL_bufptr = PL_linestart;
1853 PL_skipwhite = newSVpvn(start, s-start);
1854 #endif /* PERL_MAD */
1860 * Check the unary operators to ensure there's no ambiguity in how they're
1861 * used. An ambiguous piece of code would be:
1863 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1864 * the +5 is its argument.
1874 if (PL_oldoldbufptr != PL_last_uni)
1876 while (isSPACE(*PL_last_uni))
1879 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1881 if ((t = strchr(s, '(')) && t < PL_bufptr)
1884 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1885 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1886 (int)(s - PL_last_uni), PL_last_uni);
1890 * LOP : macro to build a list operator. Its behaviour has been replaced
1891 * with a subroutine, S_lop() for which LOP is just another name.
1894 #define LOP(f,x) return lop(f,x,s)
1898 * Build a list operator (or something that might be one). The rules:
1899 * - if we have a next token, then it's a list operator [why?]
1900 * - if the next thing is an opening paren, then it's a function
1901 * - else it's a list operator
1905 S_lop(pTHX_ I32 f, int x, char *s)
1909 PERL_ARGS_ASSERT_LOP;
1915 PL_last_lop = PL_oldbufptr;
1916 PL_last_lop_op = (OPCODE)f;
1925 return REPORT(FUNC);
1928 return REPORT(FUNC);
1931 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1932 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1933 return REPORT(LSTOP);
1940 * Sets up for an eventual force_next(). start_force(0) basically does
1941 * an unshift, while start_force(-1) does a push. yylex removes items
1946 S_start_force(pTHX_ int where)
1950 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1951 where = PL_lasttoke;
1952 assert(PL_curforce < 0 || PL_curforce == where);
1953 if (PL_curforce != where) {
1954 for (i = PL_lasttoke; i > where; --i) {
1955 PL_nexttoke[i] = PL_nexttoke[i-1];
1959 if (PL_curforce < 0) /* in case of duplicate start_force() */
1960 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1961 PL_curforce = where;
1964 curmad('^', newSVpvs(""));
1965 CURMAD('_', PL_nextwhite);
1970 S_curmad(pTHX_ char slot, SV *sv)
1976 if (PL_curforce < 0)
1977 where = &PL_thismad;
1979 where = &PL_nexttoke[PL_curforce].next_mad;
1985 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1987 else if (PL_encoding) {
1988 sv_recode_to_utf8(sv, PL_encoding);
1993 /* keep a slot open for the head of the list? */
1994 if (slot != '_' && *where && (*where)->mad_key == '^') {
1995 (*where)->mad_key = slot;
1996 sv_free(MUTABLE_SV(((*where)->mad_val)));
1997 (*where)->mad_val = (void*)sv;
2000 addmad(newMADsv(slot, sv), where, 0);
2003 # define start_force(where) NOOP
2004 # define curmad(slot, sv) NOOP
2009 * When the lexer realizes it knows the next token (for instance,
2010 * it is reordering tokens for the parser) then it can call S_force_next
2011 * to know what token to return the next time the lexer is called. Caller
2012 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
2013 * and possibly PL_expect to ensure the lexer handles the token correctly.
2017 S_force_next(pTHX_ I32 type)
2022 PerlIO_printf(Perl_debug_log, "### forced token:\n");
2023 tokereport(type, &NEXTVAL_NEXTTOKE);
2026 /* Don’t let opslab_force_free snatch it */
2027 if (S_is_opval_token(type & 0xffff) && NEXTVAL_NEXTTOKE.opval) {
2028 assert(!NEXTVAL_NEXTTOKE.opval->op_savefree);
2029 NEXTVAL_NEXTTOKE.opval->op_savefree = 1;
2032 if (PL_curforce < 0)
2033 start_force(PL_lasttoke);
2034 PL_nexttoke[PL_curforce].next_type = type;
2035 if (PL_lex_state != LEX_KNOWNEXT)
2036 PL_lex_defer = PL_lex_state;
2037 PL_lex_state = LEX_KNOWNEXT;
2038 PL_lex_expect = PL_expect;
2041 PL_nexttype[PL_nexttoke] = type;
2043 if (PL_lex_state != LEX_KNOWNEXT) {
2044 PL_lex_defer = PL_lex_state;
2045 PL_lex_expect = PL_expect;
2046 PL_lex_state = LEX_KNOWNEXT;
2054 int yyc = PL_parser->yychar;
2055 if (yyc != YYEMPTY) {
2058 NEXTVAL_NEXTTOKE = PL_parser->yylval;
2059 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2060 PL_lex_allbrackets--;
2062 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2063 } else if (yyc == '('/*)*/) {
2064 PL_lex_allbrackets--;
2069 PL_parser->yychar = YYEMPTY;
2074 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2077 SV * const sv = newSVpvn_utf8(start, len,
2080 && !is_ascii_string((const U8*)start, len)
2081 && is_utf8_string((const U8*)start, len));
2087 * When the lexer knows the next thing is a word (for instance, it has
2088 * just seen -> and it knows that the next char is a word char, then
2089 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2093 * char *start : buffer position (must be within PL_linestr)
2094 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2095 * int check_keyword : if true, Perl checks to make sure the word isn't
2096 * a keyword (do this if the word is a label, e.g. goto FOO)
2097 * int allow_pack : if true, : characters will also be allowed (require,
2098 * use, etc. do this)
2099 * int allow_initial_tick : used by the "sub" lexer only.
2103 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
2109 PERL_ARGS_ASSERT_FORCE_WORD;
2111 start = SKIPSPACE1(start);
2113 if (isIDFIRST_lazy_if(s,UTF) ||
2114 (allow_pack && *s == ':') ||
2115 (allow_initial_tick && *s == '\'') )
2117 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2118 if (check_keyword && keyword(PL_tokenbuf, len, 0))
2120 start_force(PL_curforce);
2122 curmad('X', newSVpvn(start,s-start));
2123 if (token == METHOD) {
2128 PL_expect = XOPERATOR;
2132 curmad('g', newSVpvs( "forced" ));
2133 NEXTVAL_NEXTTOKE.opval
2134 = (OP*)newSVOP(OP_CONST,0,
2135 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2136 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2144 * Called when the lexer wants $foo *foo &foo etc, but the program
2145 * text only contains the "foo" portion. The first argument is a pointer
2146 * to the "foo", and the second argument is the type symbol to prefix.
2147 * Forces the next token to be a "WORD".
2148 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2152 S_force_ident(pTHX_ register const char *s, int kind)
2156 PERL_ARGS_ASSERT_FORCE_IDENT;
2159 const STRLEN len = strlen(s);
2160 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2161 UTF ? SVf_UTF8 : 0));
2162 start_force(PL_curforce);
2163 NEXTVAL_NEXTTOKE.opval = o;
2166 o->op_private = OPpCONST_ENTERED;
2167 /* XXX see note in pp_entereval() for why we forgo typo
2168 warnings if the symbol must be introduced in an eval.
2170 gv_fetchpvn_flags(s, len,
2171 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2172 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2173 kind == '$' ? SVt_PV :
2174 kind == '@' ? SVt_PVAV :
2175 kind == '%' ? SVt_PVHV :
2183 S_force_ident_maybe_lex(pTHX_ char pit)
2185 start_force(PL_curforce);
2186 NEXTVAL_NEXTTOKE.ival = pit;
2191 Perl_str_to_version(pTHX_ SV *sv)
2196 const char *start = SvPV_const(sv,len);
2197 const char * const end = start + len;
2198 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2200 PERL_ARGS_ASSERT_STR_TO_VERSION;
2202 while (start < end) {
2206 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2211 retval += ((NV)n)/nshift;
2220 * Forces the next token to be a version number.
2221 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2222 * and if "guessing" is TRUE, then no new token is created (and the caller
2223 * must use an alternative parsing method).
2227 S_force_version(pTHX_ char *s, int guessing)
2233 I32 startoff = s - SvPVX(PL_linestr);
2236 PERL_ARGS_ASSERT_FORCE_VERSION;
2244 while (isDIGIT(*d) || *d == '_' || *d == '.')
2248 start_force(PL_curforce);
2249 curmad('X', newSVpvn(s,d-s));
2252 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2254 #ifdef USE_LOCALE_NUMERIC
2255 char *loc = savepv(setlocale(LC_NUMERIC, NULL));
2256 setlocale(LC_NUMERIC, "C");
2258 s = scan_num(s, &pl_yylval);
2259 #ifdef USE_LOCALE_NUMERIC
2260 setlocale(LC_NUMERIC, loc);
2263 version = pl_yylval.opval;
2264 ver = cSVOPx(version)->op_sv;
2265 if (SvPOK(ver) && !SvNIOK(ver)) {
2266 SvUPGRADE(ver, SVt_PVNV);
2267 SvNV_set(ver, str_to_version(ver));
2268 SvNOK_on(ver); /* hint that it is a version */
2271 else if (guessing) {
2274 sv_free(PL_nextwhite); /* let next token collect whitespace */
2276 s = SvPVX(PL_linestr) + startoff;
2284 if (PL_madskills && !version) {
2285 sv_free(PL_nextwhite); /* let next token collect whitespace */
2287 s = SvPVX(PL_linestr) + startoff;
2290 /* NOTE: The parser sees the package name and the VERSION swapped */
2291 start_force(PL_curforce);
2292 NEXTVAL_NEXTTOKE.opval = version;
2299 * S_force_strict_version
2300 * Forces the next token to be a version number using strict syntax rules.
2304 S_force_strict_version(pTHX_ char *s)
2309 I32 startoff = s - SvPVX(PL_linestr);
2311 const char *errstr = NULL;
2313 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2315 while (isSPACE(*s)) /* leading whitespace */
2318 if (is_STRICT_VERSION(s,&errstr)) {
2320 s = (char *)scan_version(s, ver, 0);
2321 version = newSVOP(OP_CONST, 0, ver);
2323 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2324 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2328 yyerror(errstr); /* version required */
2333 if (PL_madskills && !version) {
2334 sv_free(PL_nextwhite); /* let next token collect whitespace */
2336 s = SvPVX(PL_linestr) + startoff;
2339 /* NOTE: The parser sees the package name and the VERSION swapped */
2340 start_force(PL_curforce);
2341 NEXTVAL_NEXTTOKE.opval = version;
2349 * Tokenize a quoted string passed in as an SV. It finds the next
2350 * chunk, up to end of string or a backslash. It may make a new
2351 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2356 S_tokeq(pTHX_ SV *sv)
2365 PERL_ARGS_ASSERT_TOKEQ;
2370 s = SvPV_force(sv, len);
2371 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2374 /* This is relying on the SV being "well formed" with a trailing '\0' */
2375 while (s < send && !(*s == '\\' && s[1] == '\\'))
2380 if ( PL_hints & HINT_NEW_STRING ) {
2381 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2385 if (s + 1 < send && (s[1] == '\\'))
2386 s++; /* all that, just for this */
2391 SvCUR_set(sv, d - SvPVX_const(sv));
2393 if ( PL_hints & HINT_NEW_STRING )
2394 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2399 * Now come three functions related to double-quote context,
2400 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2401 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2402 * interact with PL_lex_state, and create fake ( ... ) argument lists
2403 * to handle functions and concatenation.
2407 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
2412 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2414 * Pattern matching will set PL_lex_op to the pattern-matching op to
2415 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2417 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2419 * Everything else becomes a FUNC.
2421 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2422 * had an OP_CONST or OP_READLINE). This just sets us up for a
2423 * call to S_sublex_push().
2427 S_sublex_start(pTHX)
2430 const I32 op_type = pl_yylval.ival;
2432 if (op_type == OP_NULL) {
2433 pl_yylval.opval = PL_lex_op;
2437 if (op_type == OP_CONST || op_type == OP_READLINE) {
2438 SV *sv = tokeq(PL_lex_stuff);
2440 if (SvTYPE(sv) == SVt_PVIV) {
2441 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2443 const char * const p = SvPV_const(sv, len);
2444 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2448 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2449 PL_lex_stuff = NULL;
2450 /* Allow <FH> // "foo" */
2451 if (op_type == OP_READLINE)
2452 PL_expect = XTERMORDORDOR;
2455 else if (op_type == OP_BACKTICK && PL_lex_op) {
2456 /* readpipe() vas overriden */
2457 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2458 pl_yylval.opval = PL_lex_op;
2460 PL_lex_stuff = NULL;
2464 PL_sublex_info.super_state = PL_lex_state;
2465 PL_sublex_info.sub_inwhat = (U16)op_type;
2466 PL_sublex_info.sub_op = PL_lex_op;
2467 PL_lex_state = LEX_INTERPPUSH;
2471 pl_yylval.opval = PL_lex_op;
2481 * Create a new scope to save the lexing state. The scope will be
2482 * ended in S_sublex_done. Returns a '(', starting the function arguments
2483 * to the uc, lc, etc. found before.
2484 * Sets PL_lex_state to LEX_INTERPCONCAT.
2494 PL_lex_state = PL_sublex_info.super_state;
2495 SAVEBOOL(PL_lex_dojoin);
2496 SAVEI32(PL_lex_brackets);
2497 SAVEI32(PL_lex_allbrackets);
2498 SAVEI32(PL_lex_formbrack);
2499 SAVEI8(PL_lex_fakeeof);
2500 SAVEI32(PL_lex_casemods);
2501 SAVEI32(PL_lex_starts);
2502 SAVEI8(PL_lex_state);
2503 SAVESPTR(PL_lex_repl);
2504 SAVEVPTR(PL_lex_inpat);
2505 SAVEI16(PL_lex_inwhat);
2506 SAVECOPLINE(PL_curcop);
2507 SAVEPPTR(PL_bufptr);
2508 SAVEPPTR(PL_bufend);
2509 SAVEPPTR(PL_oldbufptr);
2510 SAVEPPTR(PL_oldoldbufptr);
2511 SAVEPPTR(PL_last_lop);
2512 SAVEPPTR(PL_last_uni);
2513 SAVEPPTR(PL_linestart);
2514 SAVESPTR(PL_linestr);
2515 SAVEGENERICPV(PL_lex_brackstack);
2516 SAVEGENERICPV(PL_lex_casestack);
2517 SAVEGENERICPV(PL_parser->lex_shared);
2519 /* The here-doc parser needs to be able to peek into outer lexing
2520 scopes to find the body of the here-doc. So we put PL_linestr and
2521 PL_bufptr into lex_shared, to ‘share’ those values.
2523 PL_parser->lex_shared->ls_linestr = PL_linestr;
2524 PL_parser->lex_shared->ls_bufptr = PL_bufptr;
2526 PL_linestr = PL_lex_stuff;
2527 PL_lex_repl = PL_sublex_info.repl;
2528 PL_lex_stuff = NULL;
2529 PL_sublex_info.repl = NULL;
2531 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2532 = SvPVX(PL_linestr);
2533 PL_bufend += SvCUR(PL_linestr);
2534 PL_last_lop = PL_last_uni = NULL;
2535 SAVEFREESV(PL_linestr);
2536 if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2538 PL_lex_dojoin = FALSE;
2539 PL_lex_brackets = PL_lex_formbrack = 0;
2540 PL_lex_allbrackets = 0;
2541 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2542 Newx(PL_lex_brackstack, 120, char);
2543 Newx(PL_lex_casestack, 12, char);
2544 PL_lex_casemods = 0;
2545 *PL_lex_casestack = '\0';
2547 PL_lex_state = LEX_INTERPCONCAT;
2548 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2550 Newxz(shared, 1, LEXSHARED);
2551 shared->ls_prev = PL_parser->lex_shared;
2552 PL_parser->lex_shared = shared;
2554 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2555 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2556 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2557 PL_lex_inpat = PL_sublex_info.sub_op;
2559 PL_lex_inpat = NULL;
2566 * Restores lexer state after a S_sublex_push.
2573 if (!PL_lex_starts++) {
2574 SV * const sv = newSVpvs("");
2575 if (SvUTF8(PL_linestr))
2577 PL_expect = XOPERATOR;
2578 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2582 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2583 PL_lex_state = LEX_INTERPCASEMOD;
2587 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2588 assert(PL_lex_inwhat != OP_TRANSR);
2589 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2590 PL_linestr = PL_lex_repl;
2592 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2593 PL_bufend += SvCUR(PL_linestr);
2594 PL_last_lop = PL_last_uni = NULL;
2595 PL_lex_dojoin = FALSE;
2596 PL_lex_brackets = 0;
2597 PL_lex_allbrackets = 0;
2598 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2599 PL_lex_casemods = 0;
2600 *PL_lex_casestack = '\0';
2602 if (SvEVALED(PL_lex_repl)) {
2603 PL_lex_state = LEX_INTERPNORMAL;
2605 /* we don't clear PL_lex_repl here, so that we can check later
2606 whether this is an evalled subst; that means we rely on the
2607 logic to ensure sublex_done() is called again only via the
2608 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2611 PL_lex_state = LEX_INTERPCONCAT;
2621 PL_endwhite = newSVpvs("");
2622 sv_catsv(PL_endwhite, PL_thiswhite);
2626 sv_setpvs(PL_thistoken,"");
2628 PL_realtokenstart = -1;
2632 PL_bufend = SvPVX(PL_linestr);
2633 PL_bufend += SvCUR(PL_linestr);
2634 PL_expect = XOPERATOR;
2635 PL_sublex_info.sub_inwhat = 0;
2640 PERL_STATIC_INLINE SV*
2641 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2643 /* <s> points to first character of interior of \N{}, <e> to one beyond the
2644 * interior, hence to the "}". Finds what the name resolves to, returning
2645 * an SV* containing it; NULL if no valid one found */
2647 SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2654 const U8* first_bad_char_loc;
2655 const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2657 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2659 if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2661 &first_bad_char_loc))
2663 /* If warnings are on, this will print a more detailed analysis of what
2664 * is wrong than the error message below */
2665 utf8n_to_uvuni(first_bad_char_loc,
2666 e - ((char *) first_bad_char_loc),
2669 /* We deliberately don't try to print the malformed character, which
2670 * might not print very well; it also may be just the first of many
2671 * malformations, so don't print what comes after it */
2672 yyerror(Perl_form(aTHX_
2673 "Malformed UTF-8 character immediately after '%.*s'",
2674 (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
2678 res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2679 /* include the <}> */
2680 e - backslash_ptr + 1);
2685 /* See if the charnames handler is the Perl core's, and if so, we can skip
2686 * the validation needed for a user-supplied one, as Perl's does its own
2688 table = GvHV(PL_hintgv); /* ^H */
2689 cvp = hv_fetchs(table, "charnames", FALSE);
2691 if (((rv = SvRV(cv)) != NULL)
2692 && ((stash = CvSTASH(rv)) != NULL))
2694 const char * const name = HvNAME(stash);
2695 if strEQ(name, "_charnames") {
2700 /* Here, it isn't Perl's charname handler. We can't rely on a
2701 * user-supplied handler to validate the input name. For non-ut8 input,
2702 * look to see that the first character is legal. Then loop through the
2703 * rest checking that each is a continuation */
2705 /* This code needs to be sync'ed with a regex in _charnames.pm which does
2709 if (! isALPHAU(*s)) {
2714 if (! isCHARNAME_CONT(*s)) {
2721 /* Similarly for utf8. For invariants can check directly; for other
2722 * Latin1, can calculate their code point and check; otherwise use a
2724 if (UTF8_IS_INVARIANT(*s)) {
2725 if (! isALPHAU(*s)) {
2729 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2730 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s, *(s+1))))) {
2736 if (! PL_utf8_charname_begin) {
2737 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2738 PL_utf8_charname_begin = _core_swash_init("utf8",
2739 "_Perl_Charname_Begin",
2741 1, 0, NULL, &flags);
2743 if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2750 if (UTF8_IS_INVARIANT(*s)) {
2751 if (! isCHARNAME_CONT(*s)) {
2756 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2757 if (! isCHARNAME_CONT(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s,
2765 if (! PL_utf8_charname_continue) {
2766 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2767 PL_utf8_charname_continue = _core_swash_init("utf8",
2768 "_Perl_Charname_Continue",
2770 1, 0, NULL, &flags);
2772 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2780 if (SvUTF8(res)) { /* Don't accept malformed input */
2781 const U8* first_bad_char_loc;
2783 const char* const str = SvPV_const(res, len);
2784 if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2785 /* If warnings are on, this will print a more detailed analysis of
2786 * what is wrong than the error message below */
2787 utf8n_to_uvuni(first_bad_char_loc,
2788 (char *) first_bad_char_loc - str,
2791 /* We deliberately don't try to print the malformed character,
2792 * which might not print very well; it also may be just the first
2793 * of many malformations, so don't print what comes after it */
2796 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2797 (int) (e - backslash_ptr + 1), backslash_ptr,
2798 (int) ((char *) first_bad_char_loc - str), str
2808 int bad_char_size = ((UTF) ? UTF8SKIP(s) : 1);
2810 /* The final %.*s makes sure that should the trailing NUL be missing
2811 * that this print won't run off the end of the string */
2814 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2815 (int)(s - backslash_ptr + bad_char_size), backslash_ptr,
2816 (int)(e - s + bad_char_size), s + bad_char_size
2818 UTF ? SVf_UTF8 : 0);
2826 Extracts the next constant part of a pattern, double-quoted string,
2827 or transliteration. This is terrifying code.
2829 For example, in parsing the double-quoted string "ab\x63$d", it would
2830 stop at the '$' and return an OP_CONST containing 'abc'.
2832 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2833 processing a pattern (PL_lex_inpat is true), a transliteration
2834 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2836 Returns a pointer to the character scanned up to. If this is
2837 advanced from the start pointer supplied (i.e. if anything was
2838 successfully parsed), will leave an OP_CONST for the substring scanned
2839 in pl_yylval. Caller must intuit reason for not parsing further
2840 by looking at the next characters herself.
2844 \N{ABC} => \N{U+41.42.43}
2847 all other \-char, including \N and \N{ apart from \N{ABC}
2850 @ and $ where it appears to be a var, but not for $ as tail anchor
2855 In transliterations:
2856 characters are VERY literal, except for - not at the start or end
2857 of the string, which indicates a range. If the range is in bytes,
2858 scan_const expands the range to the full set of intermediate
2859 characters. If the range is in utf8, the hyphen is replaced with
2860 a certain range mark which will be handled by pmtrans() in op.c.
2862 In double-quoted strings:
2864 double-quoted style: \r and \n
2865 constants: \x31, etc.
2866 deprecated backrefs: \1 (in substitution replacements)
2867 case and quoting: \U \Q \E
2870 scan_const does *not* construct ops to handle interpolated strings.
2871 It stops processing as soon as it finds an embedded $ or @ variable
2872 and leaves it to the caller to work out what's going on.
2874 embedded arrays (whether in pattern or not) could be:
2875 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2877 $ in double-quoted strings must be the symbol of an embedded scalar.
2879 $ in pattern could be $foo or could be tail anchor. Assumption:
2880 it's a tail anchor if $ is the last thing in the string, or if it's
2881 followed by one of "()| \r\n\t"
2883 \1 (backreferences) are turned into $1 in substitutions
2885 The structure of the code is
2886 while (there's a character to process) {
2887 handle transliteration ranges
2888 skip regexp comments /(?#comment)/ and codes /(?{code})/
2889 skip #-initiated comments in //x patterns
2890 check for embedded arrays
2891 check for embedded scalars
2893 deprecate \1 in substitution replacements
2894 handle string-changing backslashes \l \U \Q \E, etc.
2895 switch (what was escaped) {
2896 handle \- in a transliteration (becomes a literal -)
2897 if a pattern and not \N{, go treat as regular character
2898 handle \132 (octal characters)
2899 handle \x15 and \x{1234} (hex characters)
2900 handle \N{name} (named characters, also \N{3,5} in a pattern)
2901 handle \cV (control characters)
2902 handle printf-style backslashes (\f, \r, \n, etc)
2905 } (end if backslash)
2906 handle regular character
2907 } (end while character to read)
2912 S_scan_const(pTHX_ char *start)
2915 char *send = PL_bufend; /* end of the constant */
2916 SV *sv = newSV(send - start); /* sv for the constant. See
2917 note below on sizing. */
2918 char *s = start; /* start of the constant */
2919 char *d = SvPVX(sv); /* destination for copies */
2920 bool dorange = FALSE; /* are we in a translit range? */
2921 bool didrange = FALSE; /* did we just finish a range? */
2922 bool in_charclass = FALSE; /* within /[...]/ */
2923 bool has_utf8 = FALSE; /* Output constant is UTF8 */
2924 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
2925 to be UTF8? But, this can
2926 show as true when the source
2927 isn't utf8, as for example
2928 when it is entirely composed
2930 SV *res; /* result from charnames */
2932 /* Note on sizing: The scanned constant is placed into sv, which is
2933 * initialized by newSV() assuming one byte of output for every byte of
2934 * input. This routine expects newSV() to allocate an extra byte for a
2935 * trailing NUL, which this routine will append if it gets to the end of
2936 * the input. There may be more bytes of input than output (eg., \N{LATIN
2937 * CAPITAL LETTER A}), or more output than input if the constant ends up
2938 * recoded to utf8, but each time a construct is found that might increase
2939 * the needed size, SvGROW() is called. Its size parameter each time is
2940 * based on the best guess estimate at the time, namely the length used so
2941 * far, plus the length the current construct will occupy, plus room for
2942 * the trailing NUL, plus one byte for every input byte still unscanned */
2946 UV literal_endpoint = 0;
2947 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2950 PERL_ARGS_ASSERT_SCAN_CONST;
2952 assert(PL_lex_inwhat != OP_TRANSR);
2953 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2954 /* If we are doing a trans and we know we want UTF8 set expectation */
2955 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2956 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2960 while (s < send || dorange) {
2962 /* get transliterations out of the way (they're most literal) */
2963 if (PL_lex_inwhat == OP_TRANS) {
2964 /* expand a range A-Z to the full set of characters. AIE! */
2966 I32 i; /* current expanded character */
2967 I32 min; /* first character in range */
2968 I32 max; /* last character in range */
2979 char * const c = (char*)utf8_hop((U8*)d, -1);
2983 *c = (char)UTF_TO_NATIVE(0xff);
2984 /* mark the range as done, and continue */
2990 i = d - SvPVX_const(sv); /* remember current offset */
2993 SvLEN(sv) + (has_utf8 ?
2994 (512 - UTF_CONTINUATION_MARK +
2997 /* How many two-byte within 0..255: 128 in UTF-8,
2998 * 96 in UTF-8-mod. */
3000 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
3002 d = SvPVX(sv) + i; /* refresh d after realloc */
3006 for (j = 0; j <= 1; j++) {
3007 char * const c = (char*)utf8_hop((U8*)d, -1);
3008 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
3014 max = (U8)0xff; /* only to \xff */
3015 uvmax = uv; /* \x{100} to uvmax */
3017 d = c; /* eat endpoint chars */
3022 d -= 2; /* eat the first char and the - */
3023 min = (U8)*d; /* first char in range */
3024 max = (U8)d[1]; /* last char in range */
3032 "Invalid range \"%c-%c\" in transliteration operator",
3033 (char)min, (char)max);
3037 if (literal_endpoint == 2 &&
3038 ((isLOWER(min) && isLOWER(max)) ||
3039 (isUPPER(min) && isUPPER(max)))) {
3041 for (i = min; i <= max; i++)
3043 *d++ = NATIVE_TO_NEED(has_utf8,i);
3045 for (i = min; i <= max; i++)
3047 *d++ = NATIVE_TO_NEED(has_utf8,i);
3052 for (i = min; i <= max; i++)
3055 const U8 ch = (U8)NATIVE_TO_UTF(i);
3056 if (UNI_IS_INVARIANT(ch))
3059 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
3060 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
3069 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
3071 *d++ = (char)UTF_TO_NATIVE(0xff);
3073 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
3077 /* mark the range as done, and continue */
3081 literal_endpoint = 0;
3086 /* range begins (ignore - as first or last char) */
3087 else if (*s == '-' && s+1 < send && s != start) {
3090 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
3097 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
3107 literal_endpoint = 0;
3108 native_range = TRUE;
3113 /* if we get here, we're not doing a transliteration */
3115 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3118 while (s1 >= start && *s1-- == '\\')
3121 in_charclass = TRUE;
3124 else if (*s == ']' && PL_lex_inpat && in_charclass) {
3127 while (s1 >= start && *s1-- == '\\')
3130 in_charclass = FALSE;
3133 /* skip for regexp comments /(?#comment)/, except for the last
3134 * char, which will be done separately.
3135 * Stop on (?{..}) and friends */
3137 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
3139 while (s+1 < send && *s != ')')
3140 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3142 else if (!PL_lex_casemods && !in_charclass &&
3143 ( s[2] == '{' /* This should match regcomp.c */
3144 || (s[2] == '?' && s[3] == '{')))
3150 /* likewise skip #-initiated comments in //x patterns */
3151 else if (*s == '#' && PL_lex_inpat &&
3152 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
3153 while (s+1 < send && *s != '\n')
3154 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3157 /* no further processing of single-quoted regex */
3158 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3159 goto default_action;
3161 /* check for embedded arrays
3162 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3164 else if (*s == '@' && s[1]) {
3165 if (isALNUM_lazy_if(s+1,UTF))
3167 if (strchr(":'{$", s[1]))
3169 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3170 break; /* in regexp, neither @+ nor @- are interpolated */
3173 /* check for embedded scalars. only stop if we're sure it's a
3176 else if (*s == '$') {
3177 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
3179 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3181 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3182 "Possible unintended interpolation of $\\ in regex");
3184 break; /* in regexp, $ might be tail anchor */
3188 /* End of else if chain - OP_TRANS rejoin rest */
3191 if (*s == '\\' && s+1 < send) {
3192 char* e; /* Can be used for ending '}', etc. */
3196 /* warn on \1 - \9 in substitution replacements, but note that \11
3197 * is an octal; and \19 is \1 followed by '9' */
3198 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
3199 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
3201 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3206 /* string-change backslash escapes */
3207 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3211 /* In a pattern, process \N, but skip any other backslash escapes.
3212 * This is because we don't want to translate an escape sequence
3213 * into a meta symbol and have the regex compiler use the meta
3214 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3215 * in spite of this, we do have to process \N here while the proper
3216 * charnames handler is in scope. See bugs #56444 and #62056.
3217 * There is a complication because \N in a pattern may also stand
3218 * for 'match a non-nl', and not mean a charname, in which case its
3219 * processing should be deferred to the regex compiler. To be a
3220 * charname it must be followed immediately by a '{', and not look
3221 * like \N followed by a curly quantifier, i.e., not something like
3222 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3224 else if (PL_lex_inpat
3227 || regcurly(s + 1)))
3229 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
3230 goto default_action;
3235 /* quoted - in transliterations */
3237 if (PL_lex_inwhat == OP_TRANS) {
3245 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3246 "Unrecognized escape \\%c passed through",
3248 /* default action is to copy the quoted character */
3249 goto default_action;
3252 /* eg. \132 indicates the octal constant 0132 */
3253 case '0': case '1': case '2': case '3':
3254 case '4': case '5': case '6': case '7':
3258 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
3261 goto NUM_ESCAPE_INSERT;
3263 /* eg. \o{24} indicates the octal constant \024 */
3269 bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
3275 goto NUM_ESCAPE_INSERT;
3278 /* eg. \x24 indicates the hex constant 0x24 */
3284 bool valid = grok_bslash_x(s, &uv, &len, &error, 1);
3293 /* Insert oct or hex escaped character. There will always be
3294 * enough room in sv since such escapes will be longer than any
3295 * UTF-8 sequence they can end up as, except if they force us
3296 * to recode the rest of the string into utf8 */
3298 /* Here uv is the ordinal of the next character being added in
3299 * unicode (converted from native). */
3300 if (!UNI_IS_INVARIANT(uv)) {
3301 if (!has_utf8 && uv > 255) {
3302 /* Might need to recode whatever we have accumulated so
3303 * far if it contains any chars variant in utf8 or
3306 SvCUR_set(sv, d - SvPVX_const(sv));
3309 /* See Note on sizing above. */
3310 sv_utf8_upgrade_flags_grow(sv,
3311 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3312 UNISKIP(uv) + (STRLEN)(send - s) + 1);
3313 d = SvPVX(sv) + SvCUR(sv);
3318 d = (char*)uvuni_to_utf8((U8*)d, uv);
3319 if (PL_lex_inwhat == OP_TRANS &&
3320 PL_sublex_info.sub_op) {
3321 PL_sublex_info.sub_op->op_private |=
3322 (PL_lex_repl ? OPpTRANS_FROM_UTF
3326 if (uv > 255 && !dorange)
3327 native_range = FALSE;
3340 /* In a non-pattern \N must be a named character, like \N{LATIN
3341 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3342 * mean to match a non-newline. For non-patterns, named
3343 * characters are converted to their string equivalents. In
3344 * patterns, named characters are not converted to their
3345 * ultimate forms for the same reasons that other escapes
3346 * aren't. Instead, they are converted to the \N{U+...} form
3347 * to get the value from the charnames that is in effect right
3348 * now, while preserving the fact that it was a named character
3349 * so that the regex compiler knows this */
3351 /* This section of code doesn't generally use the
3352 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
3353 * a close examination of this macro and determined it is a
3354 * no-op except on utfebcdic variant characters. Every
3355 * character generated by this that would normally need to be
3356 * enclosed by this macro is invariant, so the macro is not
3357 * needed, and would complicate use of copy(). XXX There are
3358 * other parts of this file where the macro is used
3359 * inconsistently, but are saved by it being a no-op */
3361 /* The structure of this section of code (besides checking for
3362 * errors and upgrading to utf8) is:
3363 * Further disambiguate between the two meanings of \N, and if
3364 * not a charname, go process it elsewhere
3365 * If of form \N{U+...}, pass it through if a pattern;
3366 * otherwise convert to utf8
3367 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3368 * pattern; otherwise convert to utf8 */
3370 /* Here, s points to the 'N'; the test below is guaranteed to
3371 * succeed if we are being called on a pattern as we already
3372 * know from a test above that the next character is a '{'.
3373 * On a non-pattern \N must mean 'named sequence, which
3374 * requires braces */
3377 yyerror("Missing braces on \\N{}");
3382 /* If there is no matching '}', it is an error. */
3383 if (! (e = strchr(s, '}'))) {
3384 if (! PL_lex_inpat) {
3385 yyerror("Missing right brace on \\N{}");
3387 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3392 /* Here it looks like a named character */
3394 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3395 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3396 | PERL_SCAN_DISALLOW_PREFIX;
3399 /* For \N{U+...}, the '...' is a unicode value even on
3400 * EBCDIC machines */
3401 s += 2; /* Skip to next char after the 'U+' */
3403 uv = grok_hex(s, &len, &flags, NULL);
3404 if (len == 0 || len != (STRLEN)(e - s)) {
3405 yyerror("Invalid hexadecimal number in \\N{U+...}");
3412 /* On non-EBCDIC platforms, pass through to the regex
3413 * compiler unchanged. The reason we evaluated the
3414 * number above is to make sure there wasn't a syntax
3415 * error. But on EBCDIC we convert to native so
3416 * downstream code can continue to assume it's native
3418 s -= 5; /* Include the '\N{U+' */
3420 d += my_snprintf(d, e - s + 1 + 1, /* includes the }
3423 (unsigned int) UNI_TO_NATIVE(uv));
3425 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3429 else { /* Not a pattern: convert the hex to string */
3431 /* If destination is not in utf8, unconditionally
3432 * recode it to be so. This is because \N{} implies
3433 * Unicode semantics, and scalars have to be in utf8
3434 * to guarantee those semantics */
3436 SvCUR_set(sv, d - SvPVX_const(sv));
3439 /* See Note on sizing above. */
3440 sv_utf8_upgrade_flags_grow(
3442 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3443 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3444 d = SvPVX(sv) + SvCUR(sv);
3448 /* Add the string to the output */
3449 if (UNI_IS_INVARIANT(uv)) {
3452 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3455 else /* Here is \N{NAME} but not \N{U+...}. */
3456 if ((res = get_and_check_backslash_N_name(s, e)))
3459 const char *str = SvPV_const(res, len);
3462 if (! len) { /* The name resolved to an empty string */
3463 Copy("\\N{}", d, 4, char);
3467 /* In order to not lose information for the regex
3468 * compiler, pass the result in the specially made
3469 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3470 * the code points in hex of each character
3471 * returned by charnames */
3473 const char *str_end = str + len;
3474 const STRLEN off = d - SvPVX_const(sv);
3476 if (! SvUTF8(res)) {
3477 /* For the non-UTF-8 case, we can determine the
3478 * exact length needed without having to parse
3479 * through the string. Each character takes up
3480 * 2 hex digits plus either a trailing dot or
3482 d = off + SvGROW(sv, off
3484 + 6 /* For the "\N{U+", and
3486 + (STRLEN)(send - e));
3487 Copy("\\N{U+", d, 5, char);
3489 while (str < str_end) {
3491 my_snprintf(hex_string, sizeof(hex_string),
3492 "%02X.", (U8) *str);
3493 Copy(hex_string, d, 3, char);
3497 d--; /* We will overwrite below the final
3498 dot with a right brace */
3501 STRLEN char_length; /* cur char's byte length */
3503 /* and the number of bytes after this is
3504 * translated into hex digits */
3505 STRLEN output_length;
3507 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3508 * for max('U+', '.'); and 1 for NUL */
3509 char hex_string[2 * UTF8_MAXBYTES + 5];
3511 /* Get the first character of the result. */
3512 U32 uv = utf8n_to_uvuni((U8 *) str,
3516 /* Convert first code point to hex, including
3517 * the boiler plate before it. For all these,
3518 * we convert to native format so that
3519 * downstream code can continue to assume the
3520 * input is native */
3522 my_snprintf(hex_string, sizeof(hex_string),
3524 (unsigned int) UNI_TO_NATIVE(uv));
3526 /* Make sure there is enough space to hold it */
3527 d = off + SvGROW(sv, off
3529 + (STRLEN)(send - e)
3530 + 2); /* '}' + NUL */
3532 Copy(hex_string, d, output_length, char);
3535 /* For each subsequent character, append dot and
3536 * its ordinal in hex */
3537 while ((str += char_length) < str_end) {
3538 const STRLEN off = d - SvPVX_const(sv);
3539 U32 uv = utf8n_to_uvuni((U8 *) str,
3544 my_snprintf(hex_string,
3547 (unsigned int) UNI_TO_NATIVE(uv));
3549 d = off + SvGROW(sv, off
3551 + (STRLEN)(send - e)
3552 + 2); /* '}' + NUL */
3553 Copy(hex_string, d, output_length, char);
3558 *d++ = '}'; /* Done. Add the trailing brace */
3561 else { /* Here, not in a pattern. Convert the name to a
3564 /* If destination is not in utf8, unconditionally
3565 * recode it to be so. This is because \N{} implies
3566 * Unicode semantics, and scalars have to be in utf8
3567 * to guarantee those semantics */
3569 SvCUR_set(sv, d - SvPVX_const(sv));
3572 /* See Note on sizing above. */
3573 sv_utf8_upgrade_flags_grow(sv,
3574 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3575 len + (STRLEN)(send - s) + 1);
3576 d = SvPVX(sv) + SvCUR(sv);
3578 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3580 /* See Note on sizing above. (NOTE: SvCUR() is not
3581 * set correctly here). */
3582 const STRLEN off = d - SvPVX_const(sv);
3583 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3585 Copy(str, d, len, char);
3591 } /* End \N{NAME} */
3594 native_range = FALSE; /* \N{} is defined to be Unicode */
3596 s = e + 1; /* Point to just after the '}' */
3599 /* \c is a control character */
3603 *d++ = grok_bslash_c(*s++, has_utf8, 1);
3606 yyerror("Missing control char name in \\c");
3610 /* printf-style backslashes, formfeeds, newlines, etc */
3612 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3615 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3618 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3621 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3624 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3627 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3630 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3636 } /* end if (backslash) */
3643 /* If we started with encoded form, or already know we want it,
3644 then encode the next character */
3645 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3649 /* One might think that it is wasted effort in the case of the
3650 * source being utf8 (this_utf8 == TRUE) to take the next character
3651 * in the source, convert it to an unsigned value, and then convert
3652 * it back again. But the source has not been validated here. The
3653 * routine that does the conversion checks for errors like
3656 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3657 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3659 SvCUR_set(sv, d - SvPVX_const(sv));
3662 /* See Note on sizing above. */
3663 sv_utf8_upgrade_flags_grow(sv,
3664 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3665 need + (STRLEN)(send - s) + 1);
3666 d = SvPVX(sv) + SvCUR(sv);
3668 } else if (need > len) {
3669 /* encoded value larger than old, may need extra space (NOTE:
3670 * SvCUR() is not set correctly here). See Note on sizing
3672 const STRLEN off = d - SvPVX_const(sv);
3673 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3677 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3679 if (uv > 255 && !dorange)
3680 native_range = FALSE;
3684 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3686 } /* while loop to process each character */
3688 /* terminate the string and set up the sv */
3690 SvCUR_set(sv, d - SvPVX_const(sv));
3691 if (SvCUR(sv) >= SvLEN(sv))
3692 Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3693 " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3696 if (PL_encoding && !has_utf8) {
3697 sv_recode_to_utf8(sv, PL_encoding);
3703 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3704 PL_sublex_info.sub_op->op_private |=
3705 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3709 /* shrink the sv if we allocated more than we used */
3710 if (SvCUR(sv) + 5 < SvLEN(sv)) {
3711 SvPV_shrink_to_cur(sv);
3714 /* return the substring (via pl_yylval) only if we parsed anything */
3715 if (s > PL_bufptr) {
3716 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3717 const char *const key = PL_lex_inpat ? "qr" : "q";
3718 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3722 if (PL_lex_inwhat == OP_TRANS) {
3725 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3728 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3736 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3739 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3746 * Returns TRUE if there's more to the expression (e.g., a subscript),
3749 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3751 * ->[ and ->{ return TRUE
3752 * { and [ outside a pattern are always subscripts, so return TRUE
3753 * if we're outside a pattern and it's not { or [, then return FALSE
3754 * if we're in a pattern and the first char is a {
3755 * {4,5} (any digits around the comma) returns FALSE
3756 * if we're in a pattern and the first char is a [
3758 * [SOMETHING] has a funky algorithm to decide whether it's a
3759 * character class or not. It has to deal with things like
3760 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3761 * anything else returns TRUE
3764 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3767 S_intuit_more(pTHX_ register char *s)
3771 PERL_ARGS_ASSERT_INTUIT_MORE;
3773 if (PL_lex_brackets)
3775 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3777 if (*s != '{' && *s != '[')
3782 /* In a pattern, so maybe we have {n,m}. */
3790 /* On the other hand, maybe we have a character class */
3793 if (*s == ']' || *s == '^')
3796 /* this is terrifying, and it works */
3797 int weight = 2; /* let's weigh the evidence */
3799 unsigned char un_char = 255, last_un_char;
3800 const char * const send = strchr(s,']');
3801 char tmpbuf[sizeof PL_tokenbuf * 4];
3803 if (!send) /* has to be an expression */
3806 Zero(seen,256,char);
3809 else if (isDIGIT(*s)) {
3811 if (isDIGIT(s[1]) && s[2] == ']')
3817 for (; s < send; s++) {
3818 last_un_char = un_char;
3819 un_char = (unsigned char)*s;
3824 weight -= seen[un_char] * 10;
3825 if (isALNUM_lazy_if(s+1,UTF)) {
3827 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3828 len = (int)strlen(tmpbuf);
3829 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3830 UTF ? SVf_UTF8 : 0, SVt_PV))
3835 else if (*s == '$' && s[1] &&
3836 strchr("[#!%*<>()-=",s[1])) {
3837 if (/*{*/ strchr("])} =",s[2]))
3846 if (strchr("wds]",s[1]))
3848 else if (seen[(U8)'\''] || seen[(U8)'"'])
3850 else if (strchr("rnftbxcav",s[1]))
3852 else if (isDIGIT(s[1])) {
3854 while (s[1] && isDIGIT(s[1]))
3864 if (strchr("aA01! ",last_un_char))
3866 if (strchr("zZ79~",s[1]))
3868 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3869 weight -= 5; /* cope with negative subscript */
3872 if (!isALNUM(last_un_char)
3873 && !(last_un_char == '$' || last_un_char == '@'
3874 || last_un_char == '&')
3875 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3880 if (keyword(tmpbuf, d - tmpbuf, 0))
3883 if (un_char == last_un_char + 1)
3885 weight -= seen[un_char];
3890 if (weight >= 0) /* probably a character class */
3900 * Does all the checking to disambiguate
3902 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3903 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3905 * First argument is the stuff after the first token, e.g. "bar".
3907 * Not a method if foo is a filehandle.
3908 * Not a method if foo is a subroutine prototyped to take a filehandle.
3909 * Not a method if it's really "Foo $bar"
3910 * Method if it's "foo $bar"
3911 * Not a method if it's really "print foo $bar"
3912 * Method if it's really "foo package::" (interpreted as package->foo)
3913 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3914 * Not a method if bar is a filehandle or package, but is quoted with
3919 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3922 char *s = start + (*start == '$');
3923 char tmpbuf[sizeof PL_tokenbuf];
3930 PERL_ARGS_ASSERT_INTUIT_METHOD;
3932 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3934 if (cv && SvPOK(cv)) {
3935 const char *proto = CvPROTO(cv);
3943 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3944 /* start is the beginning of the possible filehandle/object,
3945 * and s is the end of it
3946 * tmpbuf is a copy of it
3949 if (*start == '$') {
3950 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3951 isUPPER(*PL_tokenbuf))
3954 len = start - SvPVX(PL_linestr);
3958 start = SvPVX(PL_linestr) + len;
3962 return *s == '(' ? FUNCMETH : METHOD;
3964 if (!keyword(tmpbuf, len, 0)) {
3965 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3969 soff = s - SvPVX(PL_linestr);
3973 indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
3974 if (indirgv && GvCVu(indirgv))
3976 /* filehandle or package name makes it a method */
3977 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
3979 soff = s - SvPVX(PL_linestr);
3982 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3983 return 0; /* no assumptions -- "=>" quotes bareword */
3985 start_force(PL_curforce);
3986 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3987 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3988 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3990 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
3991 ( UTF ? SVf_UTF8 : 0 )));
3996 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3998 return *s == '(' ? FUNCMETH : METHOD;
4004 /* Encoded script support. filter_add() effectively inserts a
4005 * 'pre-processing' function into the current source input stream.
4006 * Note that the filter function only applies to the current source file
4007 * (e.g., it will not affect files 'require'd or 'use'd by this one).
4009 * The datasv parameter (which may be NULL) can be used to pass
4010 * private data to this instance of the filter. The filter function
4011 * can recover the SV using the FILTER_DATA macro and use it to
4012 * store private buffers and state information.
4014 * The supplied datasv parameter is upgraded to a PVIO type
4015 * and the IoDIRP/IoANY field is used to store the function pointer,
4016 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4017 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4018 * private use must be set using malloc'd pointers.
4022 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4031 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4032 Perl_croak(aTHX_ "Source filters apply only to byte streams");
4034 if (!PL_rsfp_filters)
4035 PL_rsfp_filters = newAV();
4038 SvUPGRADE(datasv, SVt_PVIO);
4039 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4040 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4041 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4042 FPTR2DPTR(void *, IoANY(datasv)),
4043 SvPV_nolen(datasv)));
4044 av_unshift(PL_rsfp_filters, 1);
4045 av_store(PL_rsfp_filters, 0, datasv) ;
4047 !PL_parser->filtered
4048 && PL_parser->lex_flags & LEX_EVALBYTES
4049 && PL_bufptr < PL_bufend
4051 const char *s = PL_bufptr;
4052 while (s < PL_bufend) {
4054 SV *linestr = PL_parser->linestr;
4055 char *buf = SvPVX(linestr);
4056 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4057 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4058 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4059 STRLEN const linestart_pos = PL_parser->linestart - buf;
4060 STRLEN const last_uni_pos =
4061 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4062 STRLEN const last_lop_pos =
4063 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4064 av_push(PL_rsfp_filters, linestr);
4065 PL_parser->linestr =
4066 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4067 buf = SvPVX(PL_parser->linestr);
4068 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4069 PL_parser->bufptr = buf + bufptr_pos;
4070 PL_parser->oldbufptr = buf + oldbufptr_pos;
4071 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4072 PL_parser->linestart = buf + linestart_pos;
4073 if (PL_parser->last_uni)
4074 PL_parser->last_uni = buf + last_uni_pos;
4075 if (PL_parser->last_lop)
4076 PL_parser->last_lop = buf + last_lop_pos;
4077 SvLEN(linestr) = SvCUR(linestr);
4078 SvCUR(linestr) = s-SvPVX(linestr);
4079 PL_parser->filtered = 1;
4089 /* Delete most recently added instance of this filter function. */
4091 Perl_filter_del(pTHX_ filter_t funcp)
4096 PERL_ARGS_ASSERT_FILTER_DEL;
4099 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4100 FPTR2DPTR(void*, funcp)));
4102 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4104 /* if filter is on top of stack (usual case) just pop it off */
4105 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4106 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4107 sv_free(av_pop(PL_rsfp_filters));
4111 /* we need to search for the correct entry and clear it */
4112 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4116 /* Invoke the idxth filter function for the current rsfp. */
4117 /* maxlen 0 = read one text line */
4119 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4124 /* This API is bad. It should have been using unsigned int for maxlen.
4125 Not sure if we want to change the API, but if not we should sanity
4126 check the value here. */
4127 unsigned int correct_length
4136 PERL_ARGS_ASSERT_FILTER_READ;
4138 if (!PL_parser || !PL_rsfp_filters)
4140 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
4141 /* Provide a default input filter to make life easy. */
4142 /* Note that we append to the line. This is handy. */
4143 DEBUG_P(PerlIO_printf(Perl_debug_log,
4144 "filter_read %d: from rsfp\n", idx));
4145 if (correct_length) {
4148 const int old_len = SvCUR(buf_sv);
4150 /* ensure buf_sv is large enough */
4151 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4152 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4153 correct_length)) <= 0) {
4154 if (PerlIO_error(PL_rsfp))
4155 return -1; /* error */
4157 return 0 ; /* end of file */
4159 SvCUR_set(buf_sv, old_len + len) ;
4160 SvPVX(buf_sv)[old_len + len] = '\0';
4163 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4164 if (PerlIO_error(PL_rsfp))
4165 return -1; /* error */
4167 return 0 ; /* end of file */
4170 return SvCUR(buf_sv);
4172 /* Skip this filter slot if filter has been deleted */
4173 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4174 DEBUG_P(PerlIO_printf(Perl_debug_log,
4175 "filter_read %d: skipped (filter deleted)\n",
4177 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4179 if (SvTYPE(datasv) != SVt_PVIO) {
4180 if (correct_length) {
4182 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4183 if (!remainder) return 0; /* eof */
4184 if (correct_length > remainder) correct_length = remainder;
4185 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4186 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4189 const char *s = SvEND(datasv);
4190 const char *send = SvPVX(datasv) + SvLEN(datasv);
4198 if (s == send) return 0; /* eof */
4199 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4200 SvCUR_set(datasv, s-SvPVX(datasv));
4202 return SvCUR(buf_sv);
4204 /* Get function pointer hidden within datasv */
4205 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4206 DEBUG_P(PerlIO_printf(Perl_debug_log,
4207 "filter_read %d: via function %p (%s)\n",
4208 idx, (void*)datasv, SvPV_nolen_const(datasv)));
4209 /* Call function. The function is expected to */
4210 /* call "FILTER_READ(idx+1, buf_sv)" first. */
4211 /* Return: <0:error, =0:eof, >0:not eof */
4212 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4216 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
4220 PERL_ARGS_ASSERT_FILTER_GETS;
4222 #ifdef PERL_CR_FILTER
4223 if (!PL_rsfp_filters) {
4224 filter_add(S_cr_textfilter,NULL);
4227 if (PL_rsfp_filters) {
4229 SvCUR_set(sv, 0); /* start with empty line */
4230 if (FILTER_READ(0, sv, 0) > 0)
4231 return ( SvPVX(sv) ) ;
4236 return (sv_gets(sv, PL_rsfp, append));
4240 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4245 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4247 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4251 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4252 (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4254 return GvHV(gv); /* Foo:: */
4257 /* use constant CLASS => 'MyClass' */
4258 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4259 if (gv && GvCV(gv)) {
4260 SV * const sv = cv_const_sv(GvCV(gv));
4262 pkgname = SvPV_const(sv, len);
4265 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4269 * S_readpipe_override
4270 * Check whether readpipe() is overridden, and generates the appropriate
4271 * optree, provided sublex_start() is called afterwards.
4274 S_readpipe_override(pTHX)
4277 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
4278 pl_yylval.ival = OP_BACKTICK;
4280 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4282 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
4283 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
4284 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4286 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
4287 op_append_elem(OP_LIST,
4288 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4289 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4296 * The intent of this yylex wrapper is to minimize the changes to the
4297 * tokener when we aren't interested in collecting madprops. It remains
4298 * to be seen how successful this strategy will be...
4305 char *s = PL_bufptr;
4307 /* make sure PL_thiswhite is initialized */
4311 /* previous token ate up our whitespace? */
4312 if (!PL_lasttoke && PL_nextwhite) {
4313 PL_thiswhite = PL_nextwhite;
4317 /* isolate the token, and figure out where it is without whitespace */
4318 PL_realtokenstart = -1;
4322 assert(PL_curforce < 0);
4324 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
4325 if (!PL_thistoken) {
4326 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4327 PL_thistoken = newSVpvs("");
4329 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4330 PL_thistoken = newSVpvn(tstart, s - tstart);
4333 if (PL_thismad) /* install head */
4334 CURMAD('X', PL_thistoken);
4337 /* last whitespace of a sublex? */
4338 if (optype == ')' && PL_endwhite) {
4339 CURMAD('X', PL_endwhite);
4344 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
4345 if (!PL_thiswhite && !PL_endwhite && !optype) {
4346 sv_free(PL_thistoken);
4351 /* put off final whitespace till peg */
4352 if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
4353 PL_nextwhite = PL_thiswhite;
4356 else if (PL_thisopen) {
4357 CURMAD('q', PL_thisopen);
4359 sv_free(PL_thistoken);
4363 /* Store actual token text as madprop X */
4364 CURMAD('X', PL_thistoken);
4368 /* add preceding whitespace as madprop _ */
4369 CURMAD('_', PL_thiswhite);
4373 /* add quoted material as madprop = */
4374 CURMAD('=', PL_thisstuff);
4378 /* add terminating quote as madprop Q */
4379 CURMAD('Q', PL_thisclose);
4383 /* special processing based on optype */
4387 /* opval doesn't need a TOKEN since it can already store mp */
4397 if (pl_yylval.opval)
4398 append_madprops(PL_thismad, pl_yylval.opval, 0);
4406 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4419 /* remember any fake bracket that lexer is about to discard */
4420 if (PL_lex_brackets == 1 &&
4421 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4424 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4427 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4428 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4431 break; /* don't bother looking for trailing comment */
4440 /* attach a trailing comment to its statement instead of next token */
4444 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4446 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4448 if (*s == '\n' || *s == '#') {
4449 while (s < PL_bufend && *s != '\n')
4453 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4454 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4467 /* Create new token struct. Note: opvals return early above. */
4468 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4475 S_tokenize_use(pTHX_ int is_use, char *s) {
4478 PERL_ARGS_ASSERT_TOKENIZE_USE;
4480 if (PL_expect != XSTATE)
4481 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4482 is_use ? "use" : "no"));
4485 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4486 s = force_version(s, TRUE);
4487 if (*s == ';' || *s == '}'
4488 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4489 start_force(PL_curforce);
4490 NEXTVAL_NEXTTOKE.opval = NULL;
4493 else if (*s == 'v') {
4494 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4495 s = force_version(s, FALSE);
4499 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4500 s = force_version(s, FALSE);
4502 pl_yylval.ival = is_use;
4506 static const char* const exp_name[] =
4507 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4508 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4512 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4514 S_word_takes_any_delimeter(char *p, STRLEN len)
4516 return (len == 1 && strchr("msyq", p[0])) ||
4518 (p[0] == 't' && p[1] == 'r') ||
4519 (p[0] == 'q' && strchr("qwxr", p[1]))));
4525 Works out what to call the token just pulled out of the input
4526 stream. The yacc parser takes care of taking the ops we return and
4527 stitching them into a tree.
4530 The type of the next token
4533 Switch based on the current state:
4534 - if we already built the token before, use it
4535 - if we have a case modifier in a string, deal with that
4536 - handle other cases of interpolation inside a string
4537 - scan the next line if we are inside a format
4538 In the normal state switch on the next character:
4540 if alphabetic, go to key lookup
4541 unrecoginized character - croak
4542 - 0/4/26: handle end-of-line or EOF
4543 - cases for whitespace
4544 - \n and #: handle comments and line numbers
4545 - various operators, brackets and sigils
4548 - 'v': vstrings (or go to key lookup)
4549 - 'x' repetition operator (or go to key lookup)
4550 - other ASCII alphanumerics (key lookup begins here):
4553 scan built-in keyword (but do nothing with it yet)
4554 check for statement label
4555 check for lexical subs
4556 goto just_a_word if there is one
4557 see whether built-in keyword is overridden
4558 switch on keyword number:
4559 - default: just_a_word:
4560 not a built-in keyword; handle bareword lookup
4561 disambiguate between method and sub call
4562 fall back to bareword
4563 - cases for built-in keywords
4568 #pragma segment Perl_yylex
4574 char *s = PL_bufptr;
4581 /* orig_keyword, gvp, and gv are initialized here because
4582 * jump to the label just_a_word_zero can bypass their
4583 * initialization later. */
4584 I32 orig_keyword = 0;
4589 SV* tmp = newSVpvs("");
4590 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4591 (IV)CopLINE(PL_curcop),
4592 lex_state_names[PL_lex_state],
4593 exp_name[PL_expect],
4594 pv_display(tmp, s, strlen(s), 0, 60));
4598 switch (PL_lex_state) {
4600 case LEX_NORMAL: /* Some compilers will produce faster */
4601 case LEX_INTERPNORMAL: /* code if we comment these out. */
4605 /* when we've already built the next token, just pull it out of the queue */
4609 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4611 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4612 PL_nexttoke[PL_lasttoke].next_mad = 0;
4613 if (PL_thismad && PL_thismad->mad_key == '_') {
4614 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4615 PL_thismad->mad_val = 0;
4616 mad_free(PL_thismad);
4621 PL_lex_state = PL_lex_defer;
4622 PL_expect = PL_lex_expect;
4623 PL_lex_defer = LEX_NORMAL;
4624 if (!PL_nexttoke[PL_lasttoke].next_type)
4629 pl_yylval = PL_nextval[PL_nexttoke];
4631 PL_lex_state = PL_lex_defer;
4632 PL_expect = PL_lex_expect;
4633 PL_lex_defer = LEX_NORMAL;
4639 next_type = PL_nexttoke[PL_lasttoke].next_type;
4641 next_type = PL_nexttype[PL_nexttoke];
4643 if (next_type & (7<<24)) {
4644 if (next_type & (1<<24)) {
4645 if (PL_lex_brackets > 100)
4646 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4647 PL_lex_brackstack[PL_lex_brackets++] =
4648 (char) ((next_type >> 16) & 0xff);
4650 if (next_type & (2<<24))
4651 PL_lex_allbrackets++;
4652 if (next_type & (4<<24))
4653 PL_lex_allbrackets--;
4654 next_type &= 0xffff;
4656 if (S_is_opval_token(next_type) && pl_yylval.opval)
4657 pl_yylval.opval->op_savefree = 0; /* release */
4658 return REPORT(next_type == 'p' ? pending_ident() : next_type);
4661 /* interpolated case modifiers like \L \U, including \Q and \E.
4662 when we get here, PL_bufptr is at the \
4664 case LEX_INTERPCASEMOD:
4666 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4668 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4669 PL_bufptr, PL_bufend, *PL_bufptr);
4671 /* handle \E or end of string */
4672 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4674 if (PL_lex_casemods) {
4675 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4676 PL_lex_casestack[PL_lex_casemods] = '\0';
4678 if (PL_bufptr != PL_bufend
4679 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4680 || oldmod == 'F')) {
4682 PL_lex_state = LEX_INTERPCONCAT;
4685 PL_thistoken = newSVpvs("\\E");
4688 PL_lex_allbrackets--;
4691 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4692 /* Got an unpaired \E */
4693 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4694 "Useless use of \\E");
4697 while (PL_bufptr != PL_bufend &&
4698 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4701 PL_thiswhite = newSVpvs("");
4702 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4707 if (PL_bufptr != PL_bufend)
4710 PL_lex_state = LEX_INTERPCONCAT;
4714 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4715 "### Saw case modifier\n"); });
4717 if (s[1] == '\\' && s[2] == 'E') {
4721 PL_thiswhite = newSVpvs("");
4722 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4726 PL_lex_state = LEX_INTERPCONCAT;
4731 if (!PL_madskills) /* when just compiling don't need correct */
4732 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4733 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
4734 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4735 (strchr(PL_lex_casestack, 'L')
4736 || strchr(PL_lex_casestack, 'U')
4737 || strchr(PL_lex_casestack, 'F'))) {
4738 PL_lex_casestack[--PL_lex_casemods] = '\0';
4739 PL_lex_allbrackets--;
4742 if (PL_lex_casemods > 10)
4743 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4744 PL_lex_casestack[PL_lex_casemods++] = *s;
4745 PL_lex_casestack[PL_lex_casemods] = '\0';
4746 PL_lex_state = LEX_INTERPCONCAT;
4747 start_force(PL_curforce);
4748 NEXTVAL_NEXTTOKE.ival = 0;
4749 force_next((2<<24)|'(');
4750 start_force(PL_curforce);
4752 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4754 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4756 NEXTVAL_NEXTTOKE.ival = OP_LC;
4758 NEXTVAL_NEXTTOKE.ival = OP_UC;
4760 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4762 NEXTVAL_NEXTTOKE.ival = OP_FC;
4764 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4766 SV* const tmpsv = newSVpvs("\\ ");
4767 /* replace the space with the character we want to escape
4769 SvPVX(tmpsv)[1] = *s;
4775 if (PL_lex_starts) {
4781 sv_free(PL_thistoken);
4782 PL_thistoken = newSVpvs("");
4785 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4786 if (PL_lex_casemods == 1 && PL_lex_inpat)
4795 case LEX_INTERPPUSH:
4796 return REPORT(sublex_push());
4798 case LEX_INTERPSTART:
4799 if (PL_bufptr == PL_bufend)
4800 return REPORT(sublex_done());
4801 DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4802 "### Interpolated variable\n"); });
4804 PL_lex_dojoin = (*PL_bufptr == '@');
4805 PL_lex_state = LEX_INTERPNORMAL;
4806 if (PL_lex_dojoin) {
4807 start_force(PL_curforce);
4808 NEXTVAL_NEXTTOKE.ival = 0;
4810 start_force(PL_curforce);
4811 force_ident("\"", '$');
4812 start_force(PL_curforce);
4813 NEXTVAL_NEXTTOKE.ival = 0;
4815 start_force(PL_curforce);
4816 NEXTVAL_NEXTTOKE.ival = 0;
4817 force_next((2<<24)|'(');
4818 start_force(PL_curforce);
4819 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
4822 /* Convert (?{...}) and friends to 'do {...}' */
4823 if (PL_lex_inpat && *PL_bufptr == '(') {
4824 PL_parser->lex_shared->re_eval_start = PL_bufptr;
4826 if (*PL_bufptr != '{')
4828 start_force(PL_curforce);
4829 /* XXX probably need a CURMAD(something) here */
4830 PL_expect = XTERMBLOCK;
4834 if (PL_lex_starts++) {
4839 sv_free(PL_thistoken);
4840 PL_thistoken = newSVpvs("");
4843 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4844 if (!PL_lex_casemods && PL_lex_inpat)
4851 case LEX_INTERPENDMAYBE:
4852 if (intuit_more(PL_bufptr)) {
4853 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
4859 if (PL_lex_dojoin) {
4860 PL_lex_dojoin = FALSE;
4861 PL_lex_state = LEX_INTERPCONCAT;
4865 sv_free(PL_thistoken);
4866 PL_thistoken = newSVpvs("");
4869 PL_lex_allbrackets--;
4872 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4873 && SvEVALED(PL_lex_repl))
4875 if (PL_bufptr != PL_bufend)
4876 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4879 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
4880 re_eval_str. If the here-doc body’s length equals the previous
4881 value of re_eval_start, re_eval_start will now be null. So
4882 check re_eval_str as well. */
4883 if (PL_parser->lex_shared->re_eval_start
4884 || PL_parser->lex_shared->re_eval_str) {
4886 if (*PL_bufptr != ')')
4887 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
4889 /* having compiled a (?{..}) expression, return the original
4890 * text too, as a const */
4891 if (PL_parser->lex_shared->re_eval_str) {
4892 sv = PL_parser->lex_shared->re_eval_str;
4893 PL_parser->lex_shared->re_eval_str = NULL;
4895 PL_bufptr - PL_parser->lex_shared->re_eval_start);
4896 SvPV_shrink_to_cur(sv);
4898 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
4899 PL_bufptr - PL_parser->lex_shared->re_eval_start);
4900 start_force(PL_curforce);
4901 /* XXX probably need a CURMAD(something) here */
4902 NEXTVAL_NEXTTOKE.opval =
4903 (OP*)newSVOP(OP_CONST, 0,
4906 PL_parser->lex_shared->re_eval_start = NULL;
4912 case LEX_INTERPCONCAT:
4914 if (PL_lex_brackets)
4915 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
4916 (long) PL_lex_brackets);
4918 if (PL_bufptr == PL_bufend)
4919 return REPORT(sublex_done());
4921 /* m'foo' still needs to be parsed for possible (?{...}) */
4922 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
4923 SV *sv = newSVsv(PL_linestr);
4925 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4929 s = scan_const(PL_bufptr);
4931 PL_lex_state = LEX_INTERPCASEMOD;
4933 PL_lex_state = LEX_INTERPSTART;
4936 if (s != PL_bufptr) {
4937 start_force(PL_curforce);
4939 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4941 NEXTVAL_NEXTTOKE = pl_yylval;
4944 if (PL_lex_starts++) {
4948 sv_free(PL_thistoken);
4949 PL_thistoken = newSVpvs("");
4952 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4953 if (!PL_lex_casemods && PL_lex_inpat)
4966 s = scan_formline(PL_bufptr);
4967 if (!PL_lex_formbrack)
4977 PL_oldoldbufptr = PL_oldbufptr;
4983 sv_free(PL_thistoken);
4986 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
4990 if (isIDFIRST_lazy_if(s,UTF))
4993 SV *dsv = newSVpvs_flags("", SVs_TEMP);
4994 const char *c = UTF ? savepv(sv_uni_display(dsv, newSVpvn_flags(s,
4996 SVs_TEMP | SVf_UTF8),
4997 10, UNI_DISPLAY_ISPRINT))
4998 : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
4999 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
5000 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
5001 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
5009 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %"SVf"<-- HERE near column %d", c, SVfARG(dsv), (int) len + 1);
5013 goto fake_eof; /* emulate EOF on ^D or ^Z */
5019 if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
5022 if (PL_lex_brackets &&
5023 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
5024 yyerror((const char *)
5026 ? "Format not terminated"
5027 : "Missing right curly or square bracket"));
5029 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5030 "### Tokener got EOF\n");
5034 if (s++ < PL_bufend)
5035 goto retry; /* ignore stray nulls */
5038 if (!PL_in_eval && !PL_preambled) {
5039 PL_preambled = TRUE;
5045 /* Generate a string of Perl code to load the debugger.
5046 * If PERL5DB is set, it will return the contents of that,
5047 * otherwise a compile-time require of perl5db.pl. */
5049 const char * const pdb = PerlEnv_getenv("PERL5DB");
5052 sv_setpv(PL_linestr, pdb);
5053 sv_catpvs(PL_linestr,";");
5055 SETERRNO(0,SS_NORMAL);
5056 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
5059 sv_setpvs(PL_linestr,"");
5060 if (PL_preambleav) {
5061 SV **svp = AvARRAY(PL_preambleav);
5062 SV **const end = svp + AvFILLp(PL_preambleav);
5064 sv_catsv(PL_linestr, *svp);
5066 sv_catpvs(PL_linestr, ";");
5068 sv_free(MUTABLE_SV(PL_preambleav));
5069 PL_preambleav = NULL;
5072 sv_catpvs(PL_linestr,
5073 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
5074 if (PL_minus_n || PL_minus_p) {
5075 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
5077 sv_catpvs(PL_linestr,"chomp;");
5080 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
5081 || *PL_splitstr == '"')
5082 && strchr(PL_splitstr + 1, *PL_splitstr))
5083 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
5085 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
5086 bytes can be used as quoting characters. :-) */
5087 const char *splits = PL_splitstr;
5088 sv_catpvs(PL_linestr, "our @F=split(q\0");
5091 if (*splits == '\\')
5092 sv_catpvn(PL_linestr, splits, 1);
5093 sv_catpvn(PL_linestr, splits, 1);
5094 } while (*splits++);
5095 /* This loop will embed the trailing NUL of
5096 PL_linestr as the last thing it does before
5098 sv_catpvs(PL_linestr, ");");
5102 sv_catpvs(PL_linestr,"our @F=split(' ');");
5105 sv_catpvs(PL_linestr, "\n");
5106 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5107 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5108 PL_last_lop = PL_last_uni = NULL;
5109 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5110 update_debugger_info(PL_linestr, NULL, 0);
5115 bof = PL_rsfp ? TRUE : FALSE;
5118 fake_eof = LEX_FAKE_EOF;
5120 PL_bufptr = PL_bufend;
5121 COPLINE_INC_WITH_HERELINES;
5122 if (!lex_next_chunk(fake_eof)) {
5123 CopLINE_dec(PL_curcop);
5125 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
5127 CopLINE_dec(PL_curcop);
5130 PL_realtokenstart = -1;
5133 /* If it looks like the start of a BOM or raw UTF-16,
5134 * check if it in fact is. */
5135 if (bof && PL_rsfp &&
5140 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
5141 bof = (offset == (Off_t)SvCUR(PL_linestr));
5142 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5143 /* offset may include swallowed CR */
5145 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
5148 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5149 s = swallow_bom((U8*)s);
5152 if (PL_parser->in_pod) {
5153 /* Incest with pod. */
5156 sv_catsv(PL_thiswhite, PL_linestr);
5158 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
5159 sv_setpvs(PL_linestr, "");
5160 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5161 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5162 PL_last_lop = PL_last_uni = NULL;
5163 PL_parser->in_pod = 0;
5166 if (PL_rsfp || PL_parser->filtered)
5168 } while (PL_parser->in_pod);
5169 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5170 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5171 PL_last_lop = PL_last_uni = NULL;
5172 if (CopLINE(PL_curcop) == 1) {
5173 while (s < PL_bufend && isSPACE(*s))
5175 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5179 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5183 if (*s == '#' && *(s+1) == '!')
5185 #ifdef ALTERNATE_SHEBANG
5187 static char const as[] = ALTERNATE_SHEBANG;
5188 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5189 d = s + (sizeof(as) - 1);
5191 #endif /* ALTERNATE_SHEBANG */
5200 while (*d && !isSPACE(*d))
5204 #ifdef ARG_ZERO_IS_SCRIPT
5205 if (ipathend > ipath) {
5207 * HP-UX (at least) sets argv[0] to the script name,
5208 * which makes $^X incorrect. And Digital UNIX and Linux,
5209 * at least, set argv[0] to the basename of the Perl
5210 * interpreter. So, having found "#!", we'll set it right.
5212 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5214 assert(SvPOK(x) || SvGMAGICAL(x));
5215 if (sv_eq(x, CopFILESV(PL_curcop))) {
5216 sv_setpvn(x, ipath, ipathend - ipath);
5222 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
5223 const char * const lstart = SvPV_const(x,llen);
5225 bstart += blen - llen;
5226 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5227 sv_setpvn(x, ipath, ipathend - ipath);
5232 TAINT_NOT; /* $^X is always tainted, but that's OK */
5234 #endif /* ARG_ZERO_IS_SCRIPT */
5239 d = instr(s,"perl -");
5241 d = instr(s,"perl");
5243 /* avoid getting into infinite loops when shebang
5244 * line contains "Perl" rather than "perl" */
5246 for (d = ipathend-4; d >= ipath; --d) {
5247 if ((*d == 'p' || *d == 'P')
5248 && !ibcmp(d, "perl", 4))
5258 #ifdef ALTERNATE_SHEBANG
5260 * If the ALTERNATE_SHEBANG on this system starts with a
5261 * character that can be part of a Perl expression, then if
5262 * we see it but not "perl", we're probably looking at the
5263 * start of Perl code, not a request to hand off to some
5264 * other interpreter. Similarly, if "perl" is there, but
5265 * not in the first 'word' of the line, we assume the line
5266 * contains the start of the Perl program.
5268 if (d && *s != '#') {
5269 const char *c = ipath;
5270 while (*c && !strchr("; \t\r\n\f\v#", *c))
5273 d = NULL; /* "perl" not in first word; ignore */
5275 *s = '#'; /* Don't try to parse shebang line */
5277 #endif /* ALTERNATE_SHEBANG */
5282 !instr(s,"indir") &&
5283 instr(PL_origargv[0],"perl"))
5290 while (s < PL_bufend && isSPACE(*s))
5292 if (s < PL_bufend) {
5293 Newx(newargv,PL_origargc+3,char*);
5295 while (s < PL_bufend && !isSPACE(*s))
5298 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5301 newargv = PL_origargv;
5304 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5306 Perl_croak(aTHX_ "Can't exec %s", ipath);
5309 while (*d && !isSPACE(*d))
5311 while (SPACE_OR_TAB(*d))
5315 const bool switches_done = PL_doswitches;
5316 const U32 oldpdb = PL_perldb;
5317 const bool oldn = PL_minus_n;
5318 const bool oldp = PL_minus_p;
5322 bool baduni = FALSE;
5324 const char *d2 = d1 + 1;
5325 if (parse_unicode_opts((const char **)&d2)
5329 if (baduni || *d1 == 'M' || *d1 == 'm') {
5330 const char * const m = d1;
5331 while (*d1 && !isSPACE(*d1))
5333 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5336 d1 = moreswitches(d1);
5338 if (PL_doswitches && !switches_done) {
5339 int argc = PL_origargc;
5340 char **argv = PL_origargv;
5343 } while (argc && argv[0][0] == '-' && argv[0][1]);
5344 init_argv_symbols(argc,argv);
5346 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5347 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5348 /* if we have already added "LINE: while (<>) {",
5349 we must not do it again */
5351 sv_setpvs(PL_linestr, "");
5352 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5353 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5354 PL_last_lop = PL_last_uni = NULL;
5355 PL_preambled = FALSE;
5356 if (PERLDB_LINE || PERLDB_SAVESRC)
5357 (void)gv_fetchfile(PL_origfilename);
5364 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5365 PL_lex_state = LEX_FORMLINE;
5366 start_force(PL_curforce);
5367 NEXTVAL_NEXTTOKE.ival = 0;
5368 force_next(FORMRBRACK);
5373 #ifdef PERL_STRICT_CR
5374 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5376 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5378 case ' ': case '\t': case '\f': case 013:
5380 PL_realtokenstart = -1;
5383 PL_thiswhite = newSVpvs("");
5384 sv_catpvn(PL_thiswhite, s, 1);
5392 PL_realtokenstart = -1;
5396 if (PL_lex_state != LEX_NORMAL ||
5397 (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5398 if (*s == '#' && s == PL_linestart && PL_in_eval
5399 && !PL_rsfp && !PL_parser->filtered) {
5400 /* handle eval qq[#line 1 "foo"\n ...] */
5401 CopLINE_dec(PL_curcop);
5404 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5406 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5410 const bool in_comment = *s == '#';
5412 while (d < PL_bufend && *d != '\n')
5416 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5417 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5421 PL_thiswhite = newSVpvn(s, d - s);
5424 if (in_comment && d == PL_bufend
5425 && PL_lex_state == LEX_INTERPNORMAL
5426 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5427 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5430 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5431 PL_lex_state = LEX_FORMLINE;
5432 start_force(PL_curforce);
5433 NEXTVAL_NEXTTOKE.ival = 0;
5434 force_next(FORMRBRACK);
5440 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5441 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5444 TOKEN(PEG); /* make sure any #! line is accessible */
5449 /* if (PL_madskills && PL_lex_formbrack) { */
5451 while (d < PL_bufend && *d != '\n')
5455 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5456 Perl_croak(aTHX_ "panic: input overflow");
5457 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5459 PL_thiswhite = newSVpvs("");
5460 if (CopLINE(PL_curcop) == 1) {
5461 sv_setpvs(PL_thiswhite, "");
5464 sv_catpvn(PL_thiswhite, s, d - s);
5478 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
5486 while (s < PL_bufend && SPACE_OR_TAB(*s))
5489 if (strnEQ(s,"=>",2)) {
5490 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
5491 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5492 OPERATOR('-'); /* unary minus */
5494 PL_last_uni = PL_oldbufptr;
5496 case 'r': ftst = OP_FTEREAD; break;
5497 case 'w': ftst = OP_FTEWRITE; break;
5498 case 'x': ftst = OP_FTEEXEC; break;
5499 case 'o': ftst = OP_FTEOWNED; break;
5500 case 'R': ftst = OP_FTRREAD; break;
5501 case 'W': ftst = OP_FTRWRITE; break;
5502 case 'X': ftst = OP_FTREXEC; break;
5503 case 'O': ftst = OP_FTROWNED; break;
5504 case 'e': ftst = OP_FTIS; break;
5505 case 'z': ftst = OP_FTZERO; break;
5506 case 's': ftst = OP_FTSIZE; break;
5507 case 'f': ftst = OP_FTFILE; break;
5508 case 'd': ftst = OP_FTDIR; break;
5509 case 'l': ftst = OP_FTLINK; break;
5510 case 'p': ftst = OP_FTPIPE; break;
5511 case 'S': ftst = OP_FTSOCK; break;
5512 case 'u': ftst = OP_FTSUID; break;
5513 case 'g': ftst = OP_FTSGID; break;
5514 case 'k': ftst = OP_FTSVTX; break;
5515 case 'b': ftst = OP_FTBLK; break;
5516 case 'c': ftst = OP_FTCHR; break;
5517 case 't': ftst = OP_FTTTY; break;
5518 case 'T': ftst = OP_FTTEXT; break;
5519 case 'B': ftst = OP_FTBINARY; break;
5520 case 'M': case 'A': case 'C':
5521 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5523 case 'M': ftst = OP_FTMTIME; break;
5524 case 'A': ftst = OP_FTATIME; break;
5525 case 'C': ftst = OP_FTCTIME; break;
5533 PL_last_lop_op = (OPCODE)ftst;
5534 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5535 "### Saw file test %c\n", (int)tmp);
5540 /* Assume it was a minus followed by a one-letter named
5541 * subroutine call (or a -bareword), then. */
5542 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5543 "### '-%c' looked like a file test but was not\n",
5550 const char tmp = *s++;
5553 if (PL_expect == XOPERATOR)
5558 else if (*s == '>') {
5561 if (isIDFIRST_lazy_if(s,UTF)) {
5562 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5570 if (PL_expect == XOPERATOR) {
5571 if (*s == '=' && !PL_lex_allbrackets &&
5572 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5579 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5581 OPERATOR('-'); /* unary minus */
5587 const char tmp = *s++;
5590 if (PL_expect == XOPERATOR)
5595 if (PL_expect == XOPERATOR) {
5596 if (*s == '=' && !PL_lex_allbrackets &&
5597 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5604 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5611 if (PL_expect != XOPERATOR) {
5612 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5613 PL_expect = XOPERATOR;
5614 force_ident(PL_tokenbuf, '*');
5622 if (*s == '=' && !PL_lex_allbrackets &&
5623 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5629 if (*s == '=' && !PL_lex_allbrackets &&
5630 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5637 if (PL_expect == XOPERATOR) {
5638 if (s[1] == '=' && !PL_lex_allbrackets &&
5639 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5644 PL_tokenbuf[0] = '%';
5645 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5646 sizeof PL_tokenbuf - 1, FALSE);
5647 if (!PL_tokenbuf[1]) {
5650 PL_expect = XOPERATOR;
5651 force_ident_maybe_lex('%');
5655 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5656 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5661 if (PL_lex_brackets > 100)
5662 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5663 PL_lex_brackstack[PL_lex_brackets++] = 0;
5664 PL_lex_allbrackets++;
5666 const char tmp = *s++;
5671 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5673 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5681 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5688 goto just_a_word_zero_gv;
5691 switch (PL_expect) {
5697 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5699 PL_bufptr = s; /* update in case we back off */
5702 "Use of := for an empty attribute list is not allowed");
5709 PL_expect = XTERMBLOCK;
5712 stuffstart = s - SvPVX(PL_linestr) - 1;
5716 while (isIDFIRST_lazy_if(s,UTF)) {
5719 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5720 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5721 if (tmp < 0) tmp = -tmp;
5736 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5738 d = scan_str(d,TRUE,TRUE,FALSE);
5740 /* MUST advance bufptr here to avoid bogus
5741 "at end of line" context messages from yyerror().
5743 PL_bufptr = s + len;
5744 yyerror("Unterminated attribute parameter in attribute list");
5748 return REPORT(0); /* EOF indicator */
5752 sv_catsv(sv, PL_lex_stuff);
5753 attrs = op_append_elem(OP_LIST, attrs,
5754 newSVOP(OP_CONST, 0, sv));
5755 SvREFCNT_dec(PL_lex_stuff);
5756 PL_lex_stuff = NULL;
5759 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5761 if (PL_in_my == KEY_our) {
5762 deprecate(":unique");
5765 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5768 /* NOTE: any CV attrs applied here need to be part of
5769 the CVf_BUILTIN_ATTRS define in cv.h! */
5770 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5772 CvLVALUE_on(PL_compcv);
5774 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5776 deprecate(":locked");
5778 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5780 CvMETHOD_on(PL_compcv);
5782 /* After we've set the flags, it could be argued that
5783 we don't need to do the attributes.pm-based setting
5784 process, and shouldn't bother appending recognized
5785 flags. To experiment with that, uncomment the
5786 following "else". (Note that's already been
5787 uncommented. That keeps the above-applied built-in
5788 attributes from being intercepted (and possibly
5789 rejected) by a package's attribute routines, but is
5790 justified by the performance win for the common case
5791 of applying only built-in attributes.) */
5793 attrs = op_append_elem(OP_LIST, attrs,
5794 newSVOP(OP_CONST, 0,
5798 if (*s == ':' && s[1] != ':')
5801 break; /* require real whitespace or :'s */
5802 /* XXX losing whitespace on sequential attributes here */
5806 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5807 if (*s != ';' && *s != '}' && *s != tmp
5808 && (tmp != '=' || *s != ')')) {
5809 const char q = ((*s == '\'') ? '"' : '\'');
5810 /* If here for an expression, and parsed no attrs, back
5812 if (tmp == '=' && !attrs) {
5816 /* MUST advance bufptr here to avoid bogus "at end of line"
5817 context messages from yyerror().
5820 yyerror( (const char *)
5822 ? Perl_form(aTHX_ "Invalid separator character "
5823 "%c%c%c in attribute list", q, *s, q)
5824 : "Unterminated attribute list" ) );
5832 start_force(PL_curforce);
5833 NEXTVAL_NEXTTOKE.opval = attrs;
5834 CURMAD('_', PL_nextwhite);
5839 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5840 (s - SvPVX(PL_linestr)) - stuffstart);
5845 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5849 PL_lex_allbrackets--;
5853 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5854 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
5858 PL_lex_allbrackets++;
5861 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5867 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5870 PL_lex_allbrackets--;
5876 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5879 if (PL_lex_brackets <= 0)
5880 yyerror("Unmatched right square bracket");
5883 PL_lex_allbrackets--;
5884 if (PL_lex_state == LEX_INTERPNORMAL) {
5885 if (PL_lex_brackets == 0) {
5886 if (*s == '-' && s[1] == '>')
5887 PL_lex_state = LEX_INTERPENDMAYBE;
5888 else if (*s != '[' && *s != '{')
5889 PL_lex_state = LEX_INTERPEND;
5896 if (PL_lex_brackets > 100) {
5897 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5899 switch (PL_expect) {
5901 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5902 PL_lex_allbrackets++;
5903 OPERATOR(HASHBRACK);
5905 while (s < PL_bufend && SPACE_OR_TAB(*s))
5908 PL_tokenbuf[0] = '\0';
5909 if (d < PL_bufend && *d == '-') {
5910 PL_tokenbuf[0] = '-';
5912 while (d < PL_bufend && SPACE_OR_TAB(*d))
5915 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5916 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5918 while (d < PL_bufend && SPACE_OR_TAB(*d))
5921 const char minus = (PL_tokenbuf[0] == '-');
5922 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5930 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5931 PL_lex_allbrackets++;
5936 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5937 PL_lex_allbrackets++;
5942 if (PL_oldoldbufptr == PL_last_lop)
5943 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5945 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5946 PL_lex_allbrackets++;
5949 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5951 /* This hack is to get the ${} in the message. */
5953 yyerror("syntax error");
5956 OPERATOR(HASHBRACK);
5958 /* This hack serves to disambiguate a pair of curlies
5959 * as being a block or an anon hash. Normally, expectation
5960 * determines that, but in cases where we're not in a
5961 * position to expect anything in particular (like inside
5962 * eval"") we have to resolve the ambiguity. This code
5963 * covers the case where the first term in the curlies is a
5964 * quoted string. Most other cases need to be explicitly
5965 * disambiguated by prepending a "+" before the opening
5966 * curly in order to force resolution as an anon hash.
5968 * XXX should probably propagate the outer expectation
5969 * into eval"" to rely less on this hack, but that could
5970 * potentially break current behavior of eval"".
5974 if (*s == '\'' || *s == '"' || *s == '`') {
5975 /* common case: get past first string, handling escapes */
5976 for (t++; t < PL_bufend && *t != *s;)
5977 if (*t++ == '\\' && (*t == '\\' || *t == *s))
5981 else if (*s == 'q') {
5984 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5987 /* skip q//-like construct */
5989 char open, close, term;
5992 while (t < PL_bufend && isSPACE(*t))
5994 /* check for q => */
5995 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5996 OPERATOR(HASHBRACK);
6000 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
6004 for (t++; t < PL_bufend; t++) {
6005 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6007 else if (*t == open)
6011 for (t++; t < PL_bufend; t++) {
6012 if (*t == '\\' && t+1 < PL_bufend)
6014 else if (*t == close && --brackets <= 0)
6016 else if (*t == open)
6023 /* skip plain q word */
6024 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
6027 else if (isALNUM_lazy_if(t,UTF)) {
6029 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
6032 while (t < PL_bufend && isSPACE(*t))
6034 /* if comma follows first term, call it an anon hash */
6035 /* XXX it could be a comma expression with loop modifiers */
6036 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6037 || (*t == '=' && t[1] == '>')))
6038 OPERATOR(HASHBRACK);
6039 if (PL_expect == XREF)
6042 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6048 pl_yylval.ival = CopLINE(PL_curcop);
6049 if (isSPACE(*s) || *s == '#')
6050 PL_copline = NOLINE; /* invalidate current command line number */
6051 TOKEN(formbrack ? '=' : '{');
6053 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6057 if (PL_lex_brackets <= 0)
6058 yyerror("Unmatched right curly bracket");
6060 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6061 PL_lex_allbrackets--;
6062 if (PL_lex_state == LEX_INTERPNORMAL) {
6063 if (PL_lex_brackets == 0) {
6064 if (PL_expect & XFAKEBRACK) {
6065 PL_expect &= XENUMMASK;
6066 PL_lex_state = LEX_INTERPEND;
6071 PL_thiswhite = newSVpvs("");
6072 sv_catpvs(PL_thiswhite,"}");
6075 return yylex(); /* ignore fake brackets */
6077 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6078 && SvEVALED(PL_lex_repl))
6079 PL_lex_state = LEX_INTERPEND;
6080 else if (*s == '-' && s[1] == '>')
6081 PL_lex_state = LEX_INTERPENDMAYBE;
6082 else if (*s != '[' && *s != '{')
6083 PL_lex_state = LEX_INTERPEND;
6086 if (PL_expect & XFAKEBRACK) {
6087 PL_expect &= XENUMMASK;
6089 return yylex(); /* ignore fake brackets */
6091 start_force(PL_curforce);
6093 curmad('X', newSVpvn(s-1,1));
6094 CURMAD('_', PL_thiswhite);
6096 force_next(formbrack ? '.' : '}');
6097 if (formbrack) LEAVE;
6099 if (PL_madskills && !PL_thistoken)
6100 PL_thistoken = newSVpvs("");
6102 if (formbrack == 2) { /* means . where arguments were expected */
6103 start_force(PL_curforce);
6111 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6112 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6119 if (PL_expect == XOPERATOR) {
6120 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
6121 && isIDFIRST_lazy_if(s,UTF))
6123 CopLINE_dec(PL_curcop);
6124 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6125 CopLINE_inc(PL_curcop);
6127 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6128 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6135 PL_tokenbuf[0] = '&';
6136 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf + 1,
6137 sizeof PL_tokenbuf - 1, TRUE);
6138 if (PL_tokenbuf[1]) {
6139 PL_expect = XOPERATOR;
6140 force_ident_maybe_lex('&');
6144 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6150 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6151 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6158 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6159 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6167 const char tmp = *s++;
6169 if (!PL_lex_allbrackets &&
6170 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6177 if (!PL_lex_allbrackets &&
6178 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
6186 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6187 && strchr("+-*/%.^&|<",tmp))
6188 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6189 "Reversed %c= operator",(int)tmp);
6191 if (PL_expect == XSTATE && isALPHA(tmp) &&
6192 (s == PL_linestart+1 || s[-2] == '\n') )
6194 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6195 || PL_lex_state != LEX_NORMAL) {
6200 if (strnEQ(s,"=cut",4)) {
6216 PL_thiswhite = newSVpvs("");
6217 sv_catpvn(PL_thiswhite, PL_linestart,
6218 PL_bufend - PL_linestart);
6222 PL_parser->in_pod = 1;
6226 if (PL_expect == XBLOCK) {
6228 #ifdef PERL_STRICT_CR
6229 while (SPACE_OR_TAB(*t))
6231 while (SPACE_OR_TAB(*t) || *t == '\r')
6234 if (*t == '\n' || *t == '#') {
6237 SAVEI8(PL_parser->form_lex_state);
6238 SAVEI32(PL_lex_formbrack);
6239 PL_parser->form_lex_state = PL_lex_state;
6240 PL_lex_formbrack = PL_lex_brackets + 1;
6244 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6253 const char tmp = *s++;
6255 /* was this !=~ where !~ was meant?
6256 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6258 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6259 const char *t = s+1;
6261 while (t < PL_bufend && isSPACE(*t))
6264 if (*t == '/' || *t == '?' ||
6265 ((*t == 'm' || *t == 's' || *t == 'y')
6266 && !isALNUM(t[1])) ||
6267 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
6268 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6269 "!=~ should be !~");
6271 if (!PL_lex_allbrackets &&
6272 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6284 if (PL_expect != XOPERATOR) {
6285 if (s[1] != '<' && !strchr(s,'>'))
6288 s = scan_heredoc(s);
6290 s = scan_inputsymbol(s);
6291 PL_expect = XOPERATOR;
6292 TOKEN(sublex_start());
6298 if (*s == '=' && !PL_lex_allbrackets &&
6299 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6303 SHop(OP_LEFT_SHIFT);
6308 if (!PL_lex_allbrackets &&
6309 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6316 if (!PL_lex_allbrackets &&
6317 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6325 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6333 const char tmp = *s++;
6335 if (*s == '=' && !PL_lex_allbrackets &&
6336 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6340 SHop(OP_RIGHT_SHIFT);
6342 else if (tmp == '=') {
6343 if (!PL_lex_allbrackets &&
6344 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6352 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6361 if (PL_expect == XOPERATOR) {
6362 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6363 return deprecate_commaless_var_list();
6367 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6368 PL_tokenbuf[0] = '@';
6369 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6370 sizeof PL_tokenbuf - 1, FALSE);
6371 if (PL_expect == XOPERATOR)
6372 no_op("Array length", s);
6373 if (!PL_tokenbuf[1])
6375 PL_expect = XOPERATOR;
6376 force_ident_maybe_lex('#');
6380 PL_tokenbuf[0] = '$';
6381 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6382 sizeof PL_tokenbuf - 1, FALSE);
6383 if (PL_expect == XOPERATOR)
6385 if (!PL_tokenbuf[1]) {
6387 yyerror("Final $ should be \\$ or $name");
6393 const char tmp = *s;
6394 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6397 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6398 && intuit_more(s)) {
6400 PL_tokenbuf[0] = '@';
6401 if (ckWARN(WARN_SYNTAX)) {
6404 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
6407 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6408 while (t < PL_bufend && *t != ']')
6410 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6411 "Multidimensional syntax %.*s not supported",
6412 (int)((t - PL_bufptr) + 1), PL_bufptr);
6416 else if (*s == '{') {
6418 PL_tokenbuf[0] = '%';
6419 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6420 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6422 char tmpbuf[sizeof PL_tokenbuf];
6425 } while (isSPACE(*t));
6426 if (isIDFIRST_lazy_if(t,UTF)) {
6428 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6433 && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6434 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6435 "You need to quote \"%"SVf"\"",
6436 SVfARG(newSVpvn_flags(tmpbuf, len,
6437 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
6443 PL_expect = XOPERATOR;
6444 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6445 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6446 if (!islop || PL_last_lop_op == OP_GREPSTART)
6447 PL_expect = XOPERATOR;
6448 else if (strchr("$@\"'`q", *s))
6449 PL_expect = XTERM; /* e.g. print $fh "foo" */
6450 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6451 PL_expect = XTERM; /* e.g. print $fh &sub */
6452 else if (isIDFIRST_lazy_if(s,UTF)) {
6453 char tmpbuf[sizeof PL_tokenbuf];
6455 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6456 if ((t2 = keyword(tmpbuf, len, 0))) {
6457 /* binary operators exclude handle interpretations */
6469 PL_expect = XTERM; /* e.g. print $fh length() */
6474 PL_expect = XTERM; /* e.g. print $fh subr() */
6477 else if (isDIGIT(*s))
6478 PL_expect = XTERM; /* e.g. print $fh 3 */
6479 else if (*s == '.' && isDIGIT(s[1]))
6480 PL_expect = XTERM; /* e.g. print $fh .3 */
6481 else if ((*s == '?' || *s == '-' || *s == '+')
6482 && !isSPACE(s[1]) && s[1] != '=')
6483 PL_expect = XTERM; /* e.g. print $fh -1 */
6484 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6486 PL_expect = XTERM; /* e.g. print $fh /.../
6487 XXX except DORDOR operator
6489 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6491 PL_expect = XTERM; /* print $fh <<"EOF" */
6494 force_ident_maybe_lex('$');
6498 if (PL_expect == XOPERATOR)
6500 PL_tokenbuf[0] = '@';
6501 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6502 if (!PL_tokenbuf[1]) {
6505 if (PL_lex_state == LEX_NORMAL)
6507 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6509 PL_tokenbuf[0] = '%';
6511 /* Warn about @ where they meant $. */
6512 if (*s == '[' || *s == '{') {
6513 if (ckWARN(WARN_SYNTAX)) {
6514 const char *t = s + 1;
6515 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
6516 t += UTF ? UTF8SKIP(t) : 1;
6517 if (*t == '}' || *t == ']') {
6519 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6520 /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */
6521 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6522 "Scalar value %"SVf" better written as $%"SVf,
6523 SVfARG(newSVpvn_flags(PL_bufptr, (STRLEN)(t-PL_bufptr),
6524 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))),
6525 SVfARG(newSVpvn_flags(PL_bufptr+1, (STRLEN)(t-PL_bufptr-1),
6526 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))));
6531 PL_expect = XOPERATOR;
6532 force_ident_maybe_lex('@');
6535 case '/': /* may be division, defined-or, or pattern */
6536 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6537 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6538 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6543 case '?': /* may either be conditional or pattern */
6544 if (PL_expect == XOPERATOR) {
6547 if (!PL_lex_allbrackets &&
6548 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6552 PL_lex_allbrackets++;
6558 /* A // operator. */
6559 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6560 (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6561 LEX_FAKEEOF_LOGIC)) {
6569 if (*s == '=' && !PL_lex_allbrackets &&
6570 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6579 /* Disable warning on "study /blah/" */
6580 if (PL_oldoldbufptr == PL_last_uni
6581 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6582 || memNE(PL_last_uni, "study", 5)
6583 || isALNUM_lazy_if(PL_last_uni+5,UTF)
6587 deprecate("?PATTERN? without explicit operator");
6588 s = scan_pat(s,OP_MATCH);
6589 TERM(sublex_start());
6593 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6594 #ifdef PERL_STRICT_CR
6597 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6599 && (s == PL_linestart || s[-1] == '\n') )
6602 formbrack = 2; /* dot seen where arguments expected */
6605 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6609 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6612 if (!PL_lex_allbrackets &&
6613 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6620 pl_yylval.ival = OPf_SPECIAL;
6626 if (*s == '=' && !PL_lex_allbrackets &&
6627 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6634 case '0': case '1': case '2': case '3': case '4':
6635 case '5': case '6': case '7': case '8': case '9':
6636 s = scan_num(s, &pl_yylval);
6637 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6638 if (PL_expect == XOPERATOR)
6643 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
6644 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6645 if (PL_expect == XOPERATOR) {
6646 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6647 return deprecate_commaless_var_list();
6654 pl_yylval.ival = OP_CONST;
6655 TERM(sublex_start());
6658 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
6659 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6660 if (PL_expect == XOPERATOR) {
6661 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6662 return deprecate_commaless_var_list();
6669 pl_yylval.ival = OP_CONST;
6670 /* FIXME. I think that this can be const if char *d is replaced by
6671 more localised variables. */
6672 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6673 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6674 pl_yylval.ival = OP_STRINGIFY;
6678 TERM(sublex_start());
6681 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
6682 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6683 if (PL_expect == XOPERATOR)
6684 no_op("Backticks",s);
6687 readpipe_override();
6688 TERM(sublex_start());
6692 if (PL_lex_inwhat && isDIGIT(*s))
6693 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6695 if (PL_expect == XOPERATOR)
6696 no_op("Backslash",s);
6700 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6701 char *start = s + 2;
6702 while (isDIGIT(*start) || *start == '_')
6704 if (*start == '.' && isDIGIT(start[1])) {
6705 s = scan_num(s, &pl_yylval);
6708 else if ((*start == ':' && start[1] == ':')
6709 || (PL_expect == XSTATE && *start == ':'))
6711 else if (PL_expect == XSTATE) {
6713 while (d < PL_bufend && isSPACE(*d)) d++;
6714 if (*d == ':') goto keylookup;
6716 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6717 if (!isALPHA(*start) && (PL_expect == XTERM
6718 || PL_expect == XREF || PL_expect == XSTATE
6719 || PL_expect == XTERMORDORDOR)) {
6720 GV *const gv = gv_fetchpvn_flags(s, start - s,
6721 UTF ? SVf_UTF8 : 0, SVt_PVCV);
6723 s = scan_num(s, &pl_yylval);
6730 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6783 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6785 /* Some keywords can be followed by any delimiter, including ':' */
6786 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6788 /* x::* is just a word, unless x is "CORE" */
6789 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6793 while (d < PL_bufend && isSPACE(*d))
6794 d++; /* no comments skipped here, or s### is misparsed */
6796 /* Is this a word before a => operator? */
6797 if (*d == '=' && d[1] == '>') {
6800 = (OP*)newSVOP(OP_CONST, 0,
6801 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6802 pl_yylval.opval->op_private = OPpCONST_BARE;
6806 /* Check for plugged-in keyword */
6810 char *saved_bufptr = PL_bufptr;
6812 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6814 if (result == KEYWORD_PLUGIN_DECLINE) {
6815 /* not a plugged-in keyword */
6816 PL_bufptr = saved_bufptr;
6817 } else if (result == KEYWORD_PLUGIN_STMT) {
6818 pl_yylval.opval = o;
6821 return REPORT(PLUGSTMT);
6822 } else if (result == KEYWORD_PLUGIN_EXPR) {
6823 pl_yylval.opval = o;
6825 PL_expect = XOPERATOR;
6826 return REPORT(PLUGEXPR);
6828 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6833 /* Check for built-in keyword */
6834 tmp = keyword(PL_tokenbuf, len, 0);
6836 /* Is this a label? */
6837 if (!anydelim && PL_expect == XSTATE
6838 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6840 pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
6841 pl_yylval.pval[len] = '\0';
6842 pl_yylval.pval[len+1] = UTF ? 1 : 0;
6847 /* Check for lexical sub */
6848 if (PL_expect != XOPERATOR) {
6849 char tmpbuf[sizeof PL_tokenbuf + 1];
6851 Copy(PL_tokenbuf, tmpbuf+1, len, char);
6852 off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
6853 if (off != NOT_IN_PAD) {
6854 assert(off); /* we assume this is boolean-true below */
6855 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
6856 HV * const stash = PAD_COMPNAME_OURSTASH(off);
6857 HEK * const stashname = HvNAME_HEK(stash);
6858 sv = newSVhek(stashname);
6859 sv_catpvs(sv, "::");
6860 sv_catpvn_flags(sv, PL_tokenbuf, len,
6861 (UTF ? SV_CATUTF8 : SV_CATBYTES));
6862 gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
6867 rv2cv_op = newOP(OP_PADANY, 0);
6868 rv2cv_op->op_targ = off;
6869 rv2cv_op = (OP*)newCVREF(0, rv2cv_op);
6870 cv = (CV *)PAD_SV(off);
6878 if (tmp < 0) { /* second-class keyword? */
6879 GV *ogv = NULL; /* override (winner) */
6880 GV *hgv = NULL; /* hidden (loser) */
6881 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6883 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6884 UTF ? SVf_UTF8 : 0, SVt_PVCV)) &&
6887 if (GvIMPORTED_CV(gv))
6889 else if (! CvMETHOD(cv))
6893 (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
6894 UTF ? -(I32)len : (I32)len, FALSE)) &&
6895 (gv = *gvp) && isGV_with_GP(gv) &&
6896 GvCVu(gv) && GvIMPORTED_CV(gv))
6903 tmp = 0; /* overridden by import or by GLOBAL */
6906 && -tmp==KEY_lock /* XXX generalizable kludge */
6909 tmp = 0; /* any sub overrides "weak" keyword */
6911 else { /* no override */
6913 if (tmp == KEY_dump) {
6914 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6915 "dump() better written as CORE::dump()");
6919 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6920 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6921 "Ambiguous call resolved as CORE::%s(), "
6922 "qualify as such or use &",
6930 default: /* not a keyword */
6931 /* Trade off - by using this evil construction we can pull the
6932 variable gv into the block labelled keylookup. If not, then
6933 we have to give it function scope so that the goto from the
6934 earlier ':' case doesn't bypass the initialisation. */
6936 just_a_word_zero_gv:
6948 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6949 const char penultchar =
6950 lastchar && PL_bufptr - 2 >= PL_linestart
6954 SV *nextPL_nextwhite = 0;
6958 /* Get the rest if it looks like a package qualifier */
6960 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6962 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6965 Perl_croak(aTHX_ "Bad name after %"SVf"%s",
6966 SVfARG(newSVpvn_flags(PL_tokenbuf, len,
6967 (UTF ? SVf_UTF8 : 0) | SVs_TEMP )),
6968 *s == '\'' ? "'" : "::");
6973 if (PL_expect == XOPERATOR) {
6974 if (PL_bufptr == PL_linestart) {
6975 CopLINE_dec(PL_curcop);
6976 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6977 CopLINE_inc(PL_curcop);
6980 no_op("Bareword",s);
6983 /* Look for a subroutine with this name in current package,
6984 unless this is a lexical sub, or name is "Foo::",
6985 in which case Foo is a bareword
6986 (and a package name). */
6988 if (len > 2 && !PL_madskills &&
6989 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6991 if (ckWARN(WARN_BAREWORD)
6992 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
6993 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6994 "Bareword \"%"SVf"\" refers to nonexistent package",
6995 SVfARG(newSVpvn_flags(PL_tokenbuf, len,
6996 (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
6998 PL_tokenbuf[len] = '\0';
7004 /* Mustn't actually add anything to a symbol table.
7005 But also don't want to "initialise" any placeholder
7006 constants that might already be there into full
7007 blown PVGVs with attached PVCV. */
7008 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7009 GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
7015 /* if we saw a global override before, get the right name */
7018 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
7019 len ? len : strlen(PL_tokenbuf));
7021 SV * const tmp_sv = sv;
7022 sv = newSVpvs("CORE::GLOBAL::");
7023 sv_catsv(sv, tmp_sv);
7024 SvREFCNT_dec(tmp_sv);
7028 if (PL_madskills && !PL_thistoken) {
7029 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
7030 PL_thistoken = newSVpvn(start,s - start);
7031 PL_realtokenstart = s - SvPVX(PL_linestr);
7035 /* Presume this is going to be a bareword of some sort. */
7037 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
7038 pl_yylval.opval->op_private = OPpCONST_BARE;
7040 /* And if "Foo::", then that's what it certainly is. */
7046 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
7047 const_op->op_private = OPpCONST_BARE;
7048 rv2cv_op = newCVREF(0, const_op);
7049 cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0);
7052 /* See if it's the indirect object for a list operator. */
7054 if (PL_oldoldbufptr &&
7055 PL_oldoldbufptr < PL_bufptr &&
7056 (PL_oldoldbufptr == PL_last_lop
7057 || PL_oldoldbufptr == PL_last_uni) &&
7058 /* NO SKIPSPACE BEFORE HERE! */
7059 (PL_expect == XREF ||
7060 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
7062 bool immediate_paren = *s == '(';
7064 /* (Now we can afford to cross potential line boundary.) */
7065 s = SKIPSPACE2(s,nextPL_nextwhite);
7067 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
7070 /* Two barewords in a row may indicate method call. */
7072 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
7073 (tmp = intuit_method(s, gv, cv))) {
7075 if (tmp == METHOD && !PL_lex_allbrackets &&
7076 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7077 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7081 /* If not a declared subroutine, it's an indirect object. */
7082 /* (But it's an indir obj regardless for sort.) */
7083 /* Also, if "_" follows a filetest operator, it's a bareword */
7086 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
7088 (PL_last_lop_op != OP_MAPSTART &&
7089 PL_last_lop_op != OP_GREPSTART))))
7090 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7091 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
7094 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7099 PL_expect = XOPERATOR;
7102 s = SKIPSPACE2(s,nextPL_nextwhite);
7103 PL_nextwhite = nextPL_nextwhite;
7108 /* Is this a word before a => operator? */
7109 if (*s == '=' && s[1] == '>' && !pkgname) {
7112 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
7113 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
7114 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
7118 /* If followed by a paren, it's certainly a subroutine. */
7123 while (SPACE_OR_TAB(*d))
7125 if (*d == ')' && (sv = cv_const_sv(cv))) {
7132 PL_nextwhite = PL_thiswhite;
7135 start_force(PL_curforce);
7137 NEXTVAL_NEXTTOKE.opval =
7138 off ? rv2cv_op : pl_yylval.opval;
7139 PL_expect = XOPERATOR;
7142 PL_nextwhite = nextPL_nextwhite;
7143 curmad('X', PL_thistoken);
7144 PL_thistoken = newSVpvs("");
7148 op_free(pl_yylval.opval), force_next(PRIVATEREF);
7149 else op_free(rv2cv_op), force_next(WORD);
7154 /* If followed by var or block, call it a method (unless sub) */
7156 if ((*s == '$' || *s == '{') && !cv) {
7158 PL_last_lop = PL_oldbufptr;
7159 PL_last_lop_op = OP_METHOD;
7160 if (!PL_lex_allbrackets &&
7161 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7162 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7166 /* If followed by a bareword, see if it looks like indir obj. */
7169 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
7170 && (tmp = intuit_method(s, gv, cv))) {
7172 if (tmp == METHOD && !PL_lex_allbrackets &&
7173 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7174 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7178 /* Not a method, so call it a subroutine (if defined) */
7181 if (lastchar == '-' && penultchar != '-') {
7182 const SV *tmpsv = newSVpvn_flags( PL_tokenbuf, len ? len : strlen(PL_tokenbuf), (UTF ? SVf_UTF8 : 0) | SVs_TEMP );
7183 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7184 "Ambiguous use of -%"SVf" resolved as -&%"SVf"()",
7185 SVfARG(tmpsv), SVfARG(tmpsv));
7187 /* Check for a constant sub */
7188 if ((sv = cv_const_sv(cv))) {
7191 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7192 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7193 pl_yylval.opval->op_private = OPpCONST_FOLDED;
7194 pl_yylval.opval->op_flags |= OPf_SPECIAL;
7198 op_free(pl_yylval.opval);
7199 pl_yylval.opval = rv2cv_op;
7200 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7201 PL_last_lop = PL_oldbufptr;
7202 PL_last_lop_op = OP_ENTERSUB;
7203 /* Is there a prototype? */
7210 STRLEN protolen = CvPROTOLEN(cv);
7211 const char *proto = CvPROTO(cv);
7215 if ((optional = *proto == ';'))
7218 while (*proto == ';');
7222 *proto == '$' || *proto == '_'
7223 || *proto == '*' || *proto == '+'
7228 *proto == '\\' && proto[1] && proto[2] == '\0'
7231 UNIPROTO(UNIOPSUB,optional);
7232 if (*proto == '\\' && proto[1] == '[') {
7233 const char *p = proto + 2;
7234 while(*p && *p != ']')
7236 if(*p == ']' && !p[1])
7237 UNIPROTO(UNIOPSUB,optional);
7239 if (*proto == '&' && *s == '{') {
7241 sv_setpvs(PL_subname, "__ANON__");
7243 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7244 if (!PL_lex_allbrackets &&
7245 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7246 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7253 PL_nextwhite = PL_thiswhite;
7256 start_force(PL_curforce);
7257 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7260 PL_nextwhite = nextPL_nextwhite;
7261 curmad('X', PL_thistoken);
7262 PL_thistoken = newSVpvs("");
7264 force_next(off ? PRIVATEREF : WORD);
7265 if (!PL_lex_allbrackets &&
7266 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7267 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7272 /* Guess harder when madskills require "best effort". */
7273 if (PL_madskills && (!gv || !GvCVu(gv))) {
7274 int probable_sub = 0;
7275 if (strchr("\"'`$@%0123456789!*+{[<", *s))
7277 else if (isALPHA(*s)) {
7281 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
7282 if (!keyword(tmpbuf, tmplen, 0))
7285 while (d < PL_bufend && isSPACE(*d))
7287 if (*d == '=' && d[1] == '>')
7292 gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
7294 op_free(pl_yylval.opval);
7295 pl_yylval.opval = rv2cv_op;
7296 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7297 PL_last_lop = PL_oldbufptr;
7298 PL_last_lop_op = OP_ENTERSUB;
7299 PL_nextwhite = PL_thiswhite;
7301 start_force(PL_curforce);
7302 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7304 PL_nextwhite = nextPL_nextwhite;
7305 curmad('X', PL_thistoken);
7306 PL_thistoken = newSVpvs("");
7307 force_next(off ? PRIVATEREF : WORD);
7308 if (!PL_lex_allbrackets &&
7309 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7310 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7314 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7316 force_next(off ? PRIVATEREF : WORD);
7317 if (!PL_lex_allbrackets &&
7318 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7319 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7324 /* Call it a bare word */
7326 if (PL_hints & HINT_STRICT_SUBS)
7327 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7330 /* after "print" and similar functions (corresponding to
7331 * "F? L" in opcode.pl), whatever wasn't already parsed as
7332 * a filehandle should be subject to "strict subs".
7333 * Likewise for the optional indirect-object argument to system
7334 * or exec, which can't be a bareword */
7335 if ((PL_last_lop_op == OP_PRINT
7336 || PL_last_lop_op == OP_PRTF
7337 || PL_last_lop_op == OP_SAY
7338 || PL_last_lop_op == OP_SYSTEM
7339 || PL_last_lop_op == OP_EXEC)
7340 && (PL_hints & HINT_STRICT_SUBS))
7341 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7342 if (lastchar != '-') {
7343 if (ckWARN(WARN_RESERVED)) {
7347 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7348 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7356 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
7357 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7358 "Operator or semicolon missing before %c%"SVf,
7359 lastchar, SVfARG(newSVpvn_flags(PL_tokenbuf,
7360 strlen(PL_tokenbuf),
7361 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
7362 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7363 "Ambiguous use of %c resolved as operator %c",
7364 lastchar, lastchar);
7371 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7376 (OP*)newSVOP(OP_CONST, 0,
7377 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7380 case KEY___PACKAGE__:
7382 (OP*)newSVOP(OP_CONST, 0,
7384 ? newSVhek(HvNAME_HEK(PL_curstash))
7391 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7392 const char *pname = "main";
7395 if (PL_tokenbuf[2] == 'D')
7398 PL_curstash ? PL_curstash : PL_defstash;
7399 pname = HvNAME_get(stash);
7400 plen = HvNAMELEN (stash);
7401 if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
7403 gv = gv_fetchpvn_flags(
7404 Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
7405 plen+6, GV_ADD|putf8, SVt_PVIO
7409 GvIOp(gv) = newIO();
7410 IoIFP(GvIOp(gv)) = PL_rsfp;
7411 #if defined(HAS_FCNTL) && defined(F_SETFD)
7413 const int fd = PerlIO_fileno(PL_rsfp);
7414 fcntl(fd,F_SETFD,fd >= 3);
7417 /* Mark this internal pseudo-handle as clean */
7418 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7419 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7420 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7422 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7423 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7424 /* if the script was opened in binmode, we need to revert
7425 * it to text mode for compatibility; but only iff it has CRs
7426 * XXX this is a questionable hack at best. */
7427 if (PL_bufend-PL_bufptr > 2
7428 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7431 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7432 loc = PerlIO_tell(PL_rsfp);
7433 (void)PerlIO_seek(PL_rsfp, 0L, 0);
7436 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7438 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7439 #endif /* NETWARE */
7441 PerlIO_seek(PL_rsfp, loc, 0);
7445 #ifdef PERLIO_LAYERS
7448 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7449 else if (PL_encoding) {
7456 XPUSHs(PL_encoding);
7458 call_method("name", G_SCALAR);
7462 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7463 Perl_form(aTHX_ ":encoding(%"SVf")",
7472 if (PL_realtokenstart >= 0) {
7473 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7475 PL_endwhite = newSVpvs("");
7476 sv_catsv(PL_endwhite, PL_thiswhite);
7478 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7479 PL_realtokenstart = -1;
7481 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7491 FUN0OP(newPVOP(OP_RUNCV,0,NULL));
7500 if (PL_expect == XSTATE) {
7507 if (*s == ':' && s[1] == ':') {
7511 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7512 if ((*s == ':' && s[1] == ':')
7513 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7517 Copy(PL_bufptr, PL_tokenbuf, olen, char);
7521 Perl_croak(aTHX_ "CORE::%"SVf" is not a keyword",
7522 SVfARG(newSVpvn_flags(PL_tokenbuf, len,
7523 (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
7526 else if (tmp == KEY_require || tmp == KEY_do
7528 /* that's a way to remember we saw "CORE::" */
7541 LOP(OP_ACCEPT,XTERM);
7544 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7549 LOP(OP_ATAN2,XTERM);
7555 LOP(OP_BINMODE,XTERM);
7558 LOP(OP_BLESS,XTERM);
7567 /* We have to disambiguate the two senses of
7568 "continue". If the next token is a '{' then
7569 treat it as the start of a continue block;
7570 otherwise treat it as a control operator.
7580 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7590 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7599 if (!PL_cryptseen) {
7600 PL_cryptseen = TRUE;
7604 LOP(OP_CRYPT,XTERM);
7607 LOP(OP_CHMOD,XTERM);
7610 LOP(OP_CHOWN,XTERM);
7613 LOP(OP_CONNECT,XTERM);
7633 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7635 if (len && !keyword(PL_tokenbuf + 1, len, 0)) {
7638 force_ident_maybe_lex('&');
7643 if (orig_keyword == KEY_do) {
7652 PL_hints |= HINT_BLOCK_SCOPE;
7662 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7663 STR_WITH_LEN("NDBM_File::"),
7664 STR_WITH_LEN("DB_File::"),
7665 STR_WITH_LEN("GDBM_File::"),
7666 STR_WITH_LEN("SDBM_File::"),
7667 STR_WITH_LEN("ODBM_File::"),
7669 LOP(OP_DBMOPEN,XTERM);
7675 PL_expect = XOPERATOR;
7676 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7683 pl_yylval.ival = CopLINE(PL_curcop);
7687 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7701 if (*s == '{') { /* block eval */
7702 PL_expect = XTERMBLOCK;
7703 UNIBRACK(OP_ENTERTRY);
7705 else { /* string eval */
7707 UNIBRACK(OP_ENTEREVAL);
7712 UNIBRACK(-OP_ENTEREVAL);
7726 case KEY_endhostent:
7732 case KEY_endservent:
7735 case KEY_endprotoent:
7746 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7748 pl_yylval.ival = CopLINE(PL_curcop);
7750 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7753 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7756 if ((PL_bufend - p) >= 3 &&
7757 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7759 else if ((PL_bufend - p) >= 4 &&
7760 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7763 if (isIDFIRST_lazy_if(p,UTF)) {
7764 p = scan_ident(p, PL_bufend,
7765 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
7769 Perl_croak(aTHX_ "Missing $ on loop variable");
7771 s = SvPVX(PL_linestr) + soff;
7777 LOP(OP_FORMLINE,XTERM);
7786 LOP(OP_FCNTL,XTERM);
7792 LOP(OP_FLOCK,XTERM);
7795 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7800 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7805 LOP(OP_GREPSTART, XREF);
7808 PL_expect = XOPERATOR;
7809 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7824 case KEY_getpriority:
7825 LOP(OP_GETPRIORITY,XTERM);
7827 case KEY_getprotobyname:
7830 case KEY_getprotobynumber:
7831 LOP(OP_GPBYNUMBER,XTERM);
7833 case KEY_getprotoent:
7845 case KEY_getpeername:
7846 UNI(OP_GETPEERNAME);
7848 case KEY_gethostbyname:
7851 case KEY_gethostbyaddr:
7852 LOP(OP_GHBYADDR,XTERM);
7854 case KEY_gethostent:
7857 case KEY_getnetbyname:
7860 case KEY_getnetbyaddr:
7861 LOP(OP_GNBYADDR,XTERM);
7866 case KEY_getservbyname:
7867 LOP(OP_GSBYNAME,XTERM);
7869 case KEY_getservbyport:
7870 LOP(OP_GSBYPORT,XTERM);
7872 case KEY_getservent:
7875 case KEY_getsockname:
7876 UNI(OP_GETSOCKNAME);
7878 case KEY_getsockopt:
7879 LOP(OP_GSOCKOPT,XTERM);
7894 pl_yylval.ival = CopLINE(PL_curcop);
7899 orig_keyword==KEY_glob ? (orig_keyword=0, -OP_GLOB) : OP_GLOB,
7907 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7909 pl_yylval.ival = CopLINE(PL_curcop);
7913 LOP(OP_INDEX,XTERM);
7919 LOP(OP_IOCTL,XTERM);
7931 PL_expect = XOPERATOR;
7932 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7949 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7954 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7968 LOP(OP_LISTEN,XTERM);
7977 s = scan_pat(s,OP_MATCH);
7978 TERM(sublex_start());
7981 LOP(OP_MAPSTART, XREF);
7984 LOP(OP_MKDIR,XTERM);
7987 LOP(OP_MSGCTL,XTERM);
7990 LOP(OP_MSGGET,XTERM);
7993 LOP(OP_MSGRCV,XTERM);
7996 LOP(OP_MSGSND,XTERM);
8001 PL_in_my = (U16)tmp;
8003 if (isIDFIRST_lazy_if(s,UTF)) {
8007 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8008 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
8010 if (!FEATURE_LEXSUBS_IS_ENABLED)
8012 "Experimental \"%s\" subs not enabled",
8013 tmp == KEY_my ? "my" :
8014 tmp == KEY_state ? "state" : "our");
8017 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
8018 if (!PL_in_my_stash) {
8021 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
8022 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
8025 if (PL_madskills) { /* just add type to declarator token */
8026 sv_catsv(PL_thistoken, PL_nextwhite);
8028 sv_catpvn(PL_thistoken, start, s - start);
8036 PL_expect = XOPERATOR;
8037 s = force_word(s,WORD,TRUE,FALSE,FALSE);
8041 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8046 s = tokenize_use(0, s);
8050 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
8053 if (!PL_lex_allbrackets &&
8054 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8055 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8061 if (isIDFIRST_lazy_if(s,UTF)) {
8063 for (d = s; isALNUM_lazy_if(d,UTF);) {
8064 d += UTF ? UTF8SKIP(d) : 1;
8066 while (UTF8_IS_CONTINUED(*d) && is_utf8_mark((U8*)d)) {
8067 d += UTF ? UTF8SKIP(d) : 1;
8071 for (t=d; isSPACE(*t);)
8073 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8075 && !(t[0] == '=' && t[1] == '>')
8076 && !(t[0] == ':' && t[1] == ':')
8077 && !keyword(s, d-s, 0)
8079 SV *tmpsv = newSVpvn_flags(s, (STRLEN)(d-s),
8080 SVs_TEMP | (UTF ? SVf_UTF8 : 0));
8081 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8082 "Precedence problem: open %"SVf" should be open(%"SVf")",
8083 SVfARG(tmpsv), SVfARG(tmpsv));
8089 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8091 pl_yylval.ival = OP_OR;
8101 LOP(OP_OPEN_DIR,XTERM);
8104 checkcomma(s,PL_tokenbuf,"filehandle");
8108 checkcomma(s,PL_tokenbuf,"filehandle");
8127 s = force_word(s,WORD,FALSE,TRUE,FALSE);
8129 s = force_strict_version(s);
8130 PL_lex_expect = XBLOCK;
8134 LOP(OP_PIPE_OP,XTERM);
8137 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
8140 pl_yylval.ival = OP_CONST;
8141 TERM(sublex_start());
8148 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
8151 PL_expect = XOPERATOR;
8152 if (SvCUR(PL_lex_stuff)) {
8153 int warned_comma = !ckWARN(WARN_QW);
8154 int warned_comment = warned_comma;
8155 d = SvPV_force(PL_lex_stuff, len);
8157 for (; isSPACE(*d) && len; --len, ++d)
8162 if (!warned_comma || !warned_comment) {
8163 for (; !isSPACE(*d) && len; --len, ++d) {
8164 if (!warned_comma && *d == ',') {
8165 Perl_warner(aTHX_ packWARN(WARN_QW),
8166 "Possible attempt to separate words with commas");
8169 else if (!warned_comment && *d == '#') {
8170 Perl_warner(aTHX_ packWARN(WARN_QW),
8171 "Possible attempt to put comments in qw() list");
8177 for (; !isSPACE(*d) && len; --len, ++d)
8180 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8181 words = op_append_elem(OP_LIST, words,
8182 newSVOP(OP_CONST, 0, tokeq(sv)));
8187 words = newNULLLIST();
8189 SvREFCNT_dec(PL_lex_stuff);
8190 PL_lex_stuff = NULL;
8192 PL_expect = XOPERATOR;
8193 pl_yylval.opval = sawparens(words);
8198 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
8201 pl_yylval.ival = OP_STRINGIFY;
8202 if (SvIVX(PL_lex_stuff) == '\'')
8203 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
8204 TERM(sublex_start());
8207 s = scan_pat(s,OP_QR);
8208 TERM(sublex_start());
8211 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
8214 readpipe_override();
8215 TERM(sublex_start());
8222 PL_expect = XOPERATOR;
8224 s = force_version(s, FALSE);
8226 else if (*s != 'v' || !isDIGIT(s[1])
8227 || (s = force_version(s, TRUE), *s == 'v'))
8229 *PL_tokenbuf = '\0';
8230 s = force_word(s,WORD,TRUE,TRUE,FALSE);
8231 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
8232 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8233 GV_ADD | (UTF ? SVf_UTF8 : 0));
8235 yyerror("<> should be quotes");
8237 if (orig_keyword == KEY_require) {
8245 PL_last_uni = PL_oldbufptr;
8246 PL_last_lop_op = OP_REQUIRE;
8248 return REPORT( (int)REQUIRE );
8254 PL_expect = XOPERATOR;
8255 s = force_word(s,WORD,TRUE,FALSE,FALSE);
8259 LOP(OP_RENAME,XTERM);
8268 LOP(OP_RINDEX,XTERM);
8277 UNIDOR(OP_READLINE);
8280 UNIDOR(OP_BACKTICK);
8289 LOP(OP_REVERSE,XTERM);
8292 UNIDOR(OP_READLINK);
8299 if (pl_yylval.opval)
8300 TERM(sublex_start());
8302 TOKEN(1); /* force error */
8305 checkcomma(s,PL_tokenbuf,"filehandle");
8315 LOP(OP_SELECT,XTERM);
8321 LOP(OP_SEMCTL,XTERM);
8324 LOP(OP_SEMGET,XTERM);
8327 LOP(OP_SEMOP,XTERM);
8333 LOP(OP_SETPGRP,XTERM);
8335 case KEY_setpriority:
8336 LOP(OP_SETPRIORITY,XTERM);
8338 case KEY_sethostent:
8344 case KEY_setservent:
8347 case KEY_setprotoent:
8357 LOP(OP_SEEKDIR,XTERM);
8359 case KEY_setsockopt:
8360 LOP(OP_SSOCKOPT,XTERM);
8366 LOP(OP_SHMCTL,XTERM);
8369 LOP(OP_SHMGET,XTERM);
8372 LOP(OP_SHMREAD,XTERM);
8375 LOP(OP_SHMWRITE,XTERM);
8378 LOP(OP_SHUTDOWN,XTERM);
8387 LOP(OP_SOCKET,XTERM);
8389 case KEY_socketpair:
8390 LOP(OP_SOCKPAIR,XTERM);
8393 checkcomma(s,PL_tokenbuf,"subroutine name");
8396 s = force_word(s,WORD,TRUE,TRUE,FALSE);
8400 LOP(OP_SPLIT,XTERM);
8403 LOP(OP_SPRINTF,XTERM);
8406 LOP(OP_SPLICE,XTERM);
8421 LOP(OP_SUBSTR,XTERM);
8427 char * const tmpbuf = PL_tokenbuf + 1;
8428 SSize_t tboffset = 0;
8429 expectation attrful;
8430 bool have_name, have_proto;
8431 const int key = tmp;
8436 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
8437 SV *subtoken = PL_madskills
8438 ? newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr))
8443 s = SKIPSPACE2(s,tmpwhite);
8449 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
8450 (*s == ':' && s[1] == ':'))
8453 SV *nametoke = NULL;
8457 attrful = XATTRBLOCK;
8458 /* remember buffer pos'n for later force_word */
8459 tboffset = s - PL_oldbufptr;
8460 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8464 nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
8467 if (memchr(tmpbuf, ':', len) || key != KEY_sub
8469 PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
8471 sv_setpvn(PL_subname, tmpbuf, len);
8473 sv_setsv(PL_subname,PL_curstname);
8474 sv_catpvs(PL_subname,"::");
8475 sv_catpvn(PL_subname,tmpbuf,len);
8477 if (SvUTF8(PL_linestr))
8478 SvUTF8_on(PL_subname);
8484 CURMAD('X', nametoke);
8485 CURMAD('_', tmpwhite);
8486 force_ident_maybe_lex('&');
8488 s = SKIPSPACE2(d,tmpwhite);
8494 if (key == KEY_my || key == KEY_our || key==KEY_state)
8497 /* diag_listed_as: Missing name in "%s sub" */
8499 "Missing name in \"%s\"", PL_bufptr);
8501 PL_expect = XTERMBLOCK;
8502 attrful = XATTRTERM;
8503 sv_setpvs(PL_subname,"?");
8507 if (key == KEY_format) {
8509 PL_thistoken = subtoken;
8513 (void) force_word(PL_oldbufptr + tboffset, WORD,
8519 /* Look for a prototype */
8522 bool bad_proto = FALSE;
8523 bool in_brackets = FALSE;
8524 char greedy_proto = ' ';
8525 bool proto_after_greedy_proto = FALSE;
8526 bool must_be_last = FALSE;
8527 bool underscore = FALSE;
8528 bool seen_underscore = FALSE;
8529 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
8532 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
8534 Perl_croak(aTHX_ "Prototype not terminated");
8535 /* strip spaces and check for bad characters */
8536 d = SvPV(PL_lex_stuff, tmplen);
8538 for (p = d; tmplen; tmplen--, ++p) {
8542 if (warnillegalproto) {
8544 proto_after_greedy_proto = TRUE;
8545 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
8550 if ( !strchr(";@%", *p) )
8557 else if ( *p == ']' ) {
8558 in_brackets = FALSE;
8560 else if ( (*p == '@' || *p == '%') &&
8561 ( tmp < 2 || d[tmp-2] != '\\' ) &&
8563 must_be_last = TRUE;
8566 else if ( *p == '_' ) {
8567 underscore = seen_underscore = TRUE;
8574 if (proto_after_greedy_proto)
8575 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8576 "Prototype after '%c' for %"SVf" : %s",
8577 greedy_proto, SVfARG(PL_subname), d);
8579 SV *dsv = newSVpvs_flags("", SVs_TEMP);
8580 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8581 "Illegal character %sin prototype for %"SVf" : %s",
8582 seen_underscore ? "after '_' " : "",
8584 SvUTF8(PL_lex_stuff)
8585 ? sv_uni_display(dsv,
8586 newSVpvn_flags(d, tmp, SVs_TEMP | SVf_UTF8),
8588 UNI_DISPLAY_ISPRINT)
8589 : pv_pretty(dsv, d, tmp, 60, NULL, NULL,
8590 PERL_PV_ESCAPE_NONASCII));
8592 SvCUR_set(PL_lex_stuff, tmp);
8597 CURMAD('q', PL_thisopen);
8598 CURMAD('_', tmpwhite);
8599 CURMAD('=', PL_thisstuff);
8600 CURMAD('Q', PL_thisclose);
8601 NEXTVAL_NEXTTOKE.opval =
8602 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8603 PL_lex_stuff = NULL;
8606 s = SKIPSPACE2(s,tmpwhite);
8614 if (*s == ':' && s[1] != ':')
8615 PL_expect = attrful;
8616 else if (*s != '{' && key == KEY_sub) {
8618 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8619 else if (*s != ';' && *s != '}')
8620 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8627 curmad('^', newSVpvs(""));
8628 CURMAD('_', tmpwhite);
8632 PL_thistoken = subtoken;
8635 NEXTVAL_NEXTTOKE.opval =
8636 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8637 PL_lex_stuff = NULL;
8643 sv_setpvs(PL_subname, "__ANON__");
8645 sv_setpvs(PL_subname, "__ANON__::__ANON__");
8649 force_ident_maybe_lex('&');
8655 LOP(OP_SYSTEM,XREF);
8658 LOP(OP_SYMLINK,XTERM);
8661 LOP(OP_SYSCALL,XTERM);
8664 LOP(OP_SYSOPEN,XTERM);
8667 LOP(OP_SYSSEEK,XTERM);
8670 LOP(OP_SYSREAD,XTERM);
8673 LOP(OP_SYSWRITE,XTERM);
8678 TERM(sublex_start());
8699 LOP(OP_TRUNCATE,XTERM);
8711 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8713 pl_yylval.ival = CopLINE(PL_curcop);
8717 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8719 pl_yylval.ival = CopLINE(PL_curcop);
8723 LOP(OP_UNLINK,XTERM);
8729 LOP(OP_UNPACK,XTERM);
8732 LOP(OP_UTIME,XTERM);
8738 LOP(OP_UNSHIFT,XTERM);
8741 s = tokenize_use(1, s);
8751 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8753 pl_yylval.ival = CopLINE(PL_curcop);
8757 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8759 pl_yylval.ival = CopLINE(PL_curcop);
8763 PL_hints |= HINT_BLOCK_SCOPE;
8770 LOP(OP_WAITPID,XTERM);
8779 ctl_l[0] = toCTRL('L');
8781 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
8784 /* Make sure $^L is defined */
8785 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
8790 if (PL_expect == XOPERATOR) {
8791 if (*s == '=' && !PL_lex_allbrackets &&
8792 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8800 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8802 pl_yylval.ival = OP_XOR;
8808 #pragma segment Main
8814 Looks up an identifier in the pad or in a package
8817 PRIVATEREF if this is a lexical name.
8818 WORD if this belongs to a package.
8821 if we're in a my declaration
8822 croak if they tried to say my($foo::bar)
8823 build the ops for a my() declaration
8824 if it's an access to a my() variable
8825 build ops for access to a my() variable
8826 if in a dq string, and they've said @foo and we can't find @foo
8828 build ops for a bareword
8832 S_pending_ident(pTHX)
8836 const char pit = (char)pl_yylval.ival;
8837 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8838 /* All routes through this function want to know if there is a colon. */
8839 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8841 DEBUG_T({ PerlIO_printf(Perl_debug_log,
8842 "### Pending identifier '%s'\n", PL_tokenbuf); });
8844 /* if we're in a my(), we can't allow dynamics here.
8845 $foo'bar has already been turned into $foo::bar, so
8846 just check for colons.
8848 if it's a legal name, the OP is a PADANY.
8851 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
8853 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8854 "variable %s in \"our\"",
8855 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
8856 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8860 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8861 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
8862 UTF ? SVf_UTF8 : 0);
8864 pl_yylval.opval = newOP(OP_PADANY, 0);
8865 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8866 UTF ? SVf_UTF8 : 0);
8872 build the ops for accesses to a my() variable.
8877 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8878 UTF ? SVf_UTF8 : 0);
8879 if (tmp != NOT_IN_PAD) {
8880 /* might be an "our" variable" */
8881 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8882 /* build ops for a bareword */
8883 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8884 HEK * const stashname = HvNAME_HEK(stash);
8885 SV * const sym = newSVhek(stashname);
8886 sv_catpvs(sym, "::");
8887 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
8888 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8889 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8893 ? (GV_ADDMULTI | GV_ADDINEVAL)
8896 ((PL_tokenbuf[0] == '$') ? SVt_PV
8897 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8902 pl_yylval.opval = newOP(OP_PADANY, 0);
8903 pl_yylval.opval->op_targ = tmp;
8909 Whine if they've said @foo in a doublequoted string,
8910 and @foo isn't a variable we can find in the symbol
8913 if (ckWARN(WARN_AMBIGUOUS) &&
8914 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8915 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8916 ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8917 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8918 /* DO NOT warn for @- and @+ */
8919 && !( PL_tokenbuf[2] == '\0' &&
8920 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8923 /* Downgraded from fatal to warning 20000522 mjd */
8924 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8925 "Possible unintended interpolation of %"SVf" in string",
8926 SVfARG(newSVpvn_flags(PL_tokenbuf, tokenbuf_len,
8927 SVs_TEMP | ( UTF ? SVf_UTF8 : 0 ))));
8931 /* build ops for a bareword */
8932 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
8933 newSVpvn_flags(PL_tokenbuf + 1,
8935 UTF ? SVf_UTF8 : 0 ));
8936 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8938 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8939 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
8940 | ( UTF ? SVf_UTF8 : 0 ),
8941 ((PL_tokenbuf[0] == '$') ? SVt_PV
8942 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8948 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8952 PERL_ARGS_ASSERT_CHECKCOMMA;
8954 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
8955 if (ckWARN(WARN_SYNTAX)) {
8958 for (w = s+2; *w && level; w++) {
8966 /* the list of chars below is for end of statements or
8967 * block / parens, boolean operators (&&, ||, //) and branch
8968 * constructs (or, and, if, until, unless, while, err, for).
8969 * Not a very solid hack... */
8970 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8971 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8972 "%s (...) interpreted as function",name);
8975 while (s < PL_bufend && isSPACE(*s))
8979 while (s < PL_bufend && isSPACE(*s))
8981 if (isIDFIRST_lazy_if(s,UTF)) {
8982 const char * const w = s;
8983 s += UTF ? UTF8SKIP(s) : 1;
8984 while (isALNUM_lazy_if(s,UTF))
8985 s += UTF ? UTF8SKIP(s) : 1;
8986 while (s < PL_bufend && isSPACE(*s))
8990 if (keyword(w, s - w, 0))
8993 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8994 if (gv && GvCVu(gv))
8996 Perl_croak(aTHX_ "No comma allowed after %s", what);
9001 /* Either returns sv, or mortalizes sv and returns a new SV*.
9002 Best used as sv=new_constant(..., sv, ...).
9003 If s, pv are NULL, calls subroutine with one argument,
9004 and <type> is used with error messages only.
9005 <type> is assumed to be well formed UTF-8 */
9008 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9009 SV *sv, SV *pv, const char *type, STRLEN typelen)
9012 HV * table = GvHV(PL_hintgv); /* ^H */
9016 const char *why1 = "", *why2 = "", *why3 = "";
9018 PERL_ARGS_ASSERT_NEW_CONSTANT;
9020 /* charnames doesn't work well if there have been errors found */
9021 if (PL_error_count > 0 && strEQ(key,"charnames"))
9022 return &PL_sv_undef;
9025 || ! (PL_hints & HINT_LOCALIZE_HH)
9026 || ! (cvp = hv_fetch(table, key, keylen, FALSE))
9031 /* Here haven't found what we're looking for. If it is charnames,
9032 * perhaps it needs to be loaded. Try doing that before giving up */
9033 if (strEQ(key,"charnames")) {
9034 Perl_load_module(aTHX_
9036 newSVpvs("_charnames"),
9037 /* version parameter; no need to specify it, as if
9038 * we get too early a version, will fail anyway,
9039 * not being able to find '_charnames' */
9045 table = GvHV(PL_hintgv);
9047 && (PL_hints & HINT_LOCALIZE_HH)
9048 && (cvp = hv_fetch(table, key, keylen, FALSE))
9054 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9055 msg = Perl_newSVpvf(aTHX_
9056 "Constant(%s) unknown", (type ? type: "undef"));
9061 why3 = "} is not defined";
9063 if (strEQ(key,"charnames")) {
9064 yyerror_pv(Perl_form(aTHX_
9065 /* The +3 is for '\N{'; -4 for that, plus '}' */
9066 "Unknown charname '%.*s'", (int)typelen - 4, type + 3
9068 UTF ? SVf_UTF8 : 0);
9072 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
9073 (type ? type: "undef"), why1, why2, why3);
9076 yyerror(SvPVX_const(msg));
9081 sv_2mortal(sv); /* Parent created it permanently */
9084 pv = newSVpvn_flags(s, len, SVs_TEMP);
9086 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9088 typesv = &PL_sv_undef;
9090 PUSHSTACKi(PERLSI_OVERLOAD);
9102 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9106 /* Check the eval first */
9107 if (!PL_in_eval && SvTRUE(ERRSV)) {
9109 const char * errstr;
9110 sv_catpvs(ERRSV, "Propagated");
9111 errstr = SvPV_const(ERRSV, errlen);
9112 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9114 res = SvREFCNT_inc_simple(sv);
9118 SvREFCNT_inc_simple_void(res);
9127 why1 = "Call to &{$^H{";
9129 why3 = "}} did not return a defined value";
9137 /* Returns a NUL terminated string, with the length of the string written to
9141 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9145 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9147 PERL_ARGS_ASSERT_SCAN_WORD;
9151 Perl_croak(aTHX_ ident_too_long);
9152 if (isALNUM(*s) || (!UTF && isALNUMC_L1(*s))) /* UTF handled below */
9154 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
9159 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
9163 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9164 char *t = s + UTF8SKIP(s);
9166 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9170 Perl_croak(aTHX_ ident_too_long);
9171 Copy(s, d, len, char);
9184 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
9187 char *bracket = NULL;
9190 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9192 PERL_ARGS_ASSERT_SCAN_IDENT;
9197 while (isDIGIT(*s)) {
9199 Perl_croak(aTHX_ ident_too_long);
9206 Perl_croak(aTHX_ ident_too_long);
9207 if (isALNUM(*s)) /* UTF handled below */
9209 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
9214 else if (*s == ':' && s[1] == ':') {
9218 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
9219 char *t = s + UTF8SKIP(s);
9220 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
9222 if (d + (t - s) > e)
9223 Perl_croak(aTHX_ ident_too_long);
9224 Copy(s, d, t - s, char);
9235 if (PL_lex_state != LEX_NORMAL)
9236 PL_lex_state = LEX_INTERPENDMAYBE;
9239 if (*s == '$' && s[1] &&
9240 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
9250 const STRLEN skip = UTF8SKIP(s);
9253 for ( i = 0; i < skip; i++ )
9261 if (*d == '^' && *s && isCONTROLVAR(*s)) {
9265 else if (ck_uni && !bracket)
9268 if (isSPACE(s[-1])) {
9270 const char ch = *s++;
9271 if (!SPACE_OR_TAB(ch)) {
9277 if (isIDFIRST_lazy_if(d,UTF)) {
9281 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
9282 end += UTF8SKIP(end);
9283 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
9284 end += UTF8SKIP(end);
9286 Copy(s, d, end - s, char);
9291 while ((isALNUM(*s) || *s == ':') && d < e)
9294 Perl_croak(aTHX_ ident_too_long);
9297 while (s < send && SPACE_OR_TAB(*s))
9299 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9300 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9301 const char * const brack =
9303 ((*s == '[') ? "[...]" : "{...}");
9304 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9305 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9306 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9307 funny, dest, brack, funny, dest, brack);
9310 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9311 PL_lex_allbrackets++;
9315 /* Handle extended ${^Foo} variables
9316 * 1999-02-27 mjd-perl-patch@plover.com */
9317 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
9321 while (isALNUM(*s) && d < e) {
9325 Perl_croak(aTHX_ ident_too_long);
9330 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9331 PL_lex_state = LEX_INTERPEND;
9334 if (PL_lex_state == LEX_NORMAL) {
9335 if (ckWARN(WARN_AMBIGUOUS) &&
9336 (keyword(dest, d - dest, 0)
9337 || get_cvn_flags(dest, d - dest, UTF ? SVf_UTF8 : 0)))
9339 SV *tmp = newSVpvn_flags( dest, d - dest,
9340 SVs_TEMP | (UTF ? SVf_UTF8 : 0) );
9343 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9344 "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9345 funny, tmp, funny, tmp);
9350 s = bracket; /* let the parser handle it */
9354 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9355 PL_lex_state = LEX_INTERPEND;
9360 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
9362 /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
9363 * the parse starting at 's', based on the subset that are valid in this
9364 * context input to this routine in 'valid_flags'. Advances s. Returns
9365 * TRUE if the input should be treated as a valid flag, so the next char
9366 * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
9367 * first call on the current regex. This routine will set it to any
9368 * charset modifier found. The caller shouldn't change it. This way,
9369 * another charset modifier encountered in the parse can be detected as an
9370 * error, as we have decided to allow only one */
9373 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9375 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9376 if (isALNUM_lazy_if(*s, UTF)) {
9377 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9378 UTF ? SVf_UTF8 : 0);
9380 /* Pretend that it worked, so will continue processing before
9389 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
9390 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
9391 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
9392 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
9393 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
9394 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9395 case LOCALE_PAT_MOD:
9397 goto multiple_charsets;
9399 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9402 case UNICODE_PAT_MOD:
9404 goto multiple_charsets;
9406 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9409 case ASCII_RESTRICT_PAT_MOD:
9411 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9415 /* Error if previous modifier wasn't an 'a', but if it was, see
9416 * if, and accept, a second occurrence (only) */
9418 || get_regex_charset(*pmfl)
9419 != REGEX_ASCII_RESTRICTED_CHARSET)
9421 goto multiple_charsets;
9423 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9427 case DEPENDS_PAT_MOD:
9429 goto multiple_charsets;
9431 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9440 if (*charset != c) {
9441 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9443 else if (c == 'a') {
9444 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9447 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9450 /* Pretend that it worked, so will continue processing before dieing */
9456 S_scan_pat(pTHX_ char *start, I32 type)
9460 char *s = scan_str(start,!!PL_madskills,FALSE, PL_reg_state.re_reparsing);
9461 const char * const valid_flags =
9462 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9463 char charset = '\0'; /* character set modifier */
9468 PERL_ARGS_ASSERT_SCAN_PAT;
9470 /* this was only needed for the initial scan_str; set it to false
9471 * so that any (?{}) code blocks etc are parsed normally */
9472 PL_reg_state.re_reparsing = FALSE;
9474 const char * const delimiter = skipspace(start);
9478 ? "Search pattern not terminated or ternary operator parsed as search pattern"
9479 : "Search pattern not terminated" ));
9482 pm = (PMOP*)newPMOP(type, 0);
9483 if (PL_multi_open == '?') {
9484 /* This is the only point in the code that sets PMf_ONCE: */
9485 pm->op_pmflags |= PMf_ONCE;
9487 /* Hence it's safe to do this bit of PMOP book-keeping here, which
9488 allows us to restrict the list needed by reset to just the ??
9490 assert(type != OP_TRANS);
9492 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9495 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9498 elements = mg->mg_len / sizeof(PMOP**);
9499 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9500 ((PMOP**)mg->mg_ptr) [elements++] = pm;
9501 mg->mg_len = elements * sizeof(PMOP**);
9502 PmopSTASH_set(pm,PL_curstash);
9509 /* if qr/...(?{..}).../, then need to parse the pattern within a new
9510 * anon CV. False positives like qr/[(?{]/ are harmless */
9512 if (type == OP_QR) {
9514 char *e, *p = SvPV(PL_lex_stuff, len);
9516 for (; p < e; p++) {
9517 if (p[0] == '(' && p[1] == '?'
9518 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9520 pm->op_pmflags |= PMf_HAS_CV;
9524 pm->op_pmflags |= PMf_IS_QR;
9527 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
9529 if (PL_madskills && modstart != s) {
9530 SV* tmptoken = newSVpvn(modstart, s - modstart);
9531 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9534 /* issue a warning if /c is specified,but /g is not */
9535 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9537 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9538 "Use of /c modifier is meaningless without /g" );
9541 PL_lex_op = (OP*)pm;
9542 pl_yylval.ival = OP_MATCH;
9547 S_scan_subst(pTHX_ char *start)
9554 char charset = '\0'; /* character set modifier */
9559 PERL_ARGS_ASSERT_SCAN_SUBST;
9561 pl_yylval.ival = OP_NULL;
9563 s = scan_str(start,!!PL_madskills,FALSE,FALSE);
9566 Perl_croak(aTHX_ "Substitution pattern not terminated");
9568 if (s[-1] == PL_multi_open)
9572 CURMAD('q', PL_thisopen);
9573 CURMAD('_', PL_thiswhite);
9574 CURMAD('E', PL_thisstuff);
9575 CURMAD('Q', PL_thisclose);
9576 PL_realtokenstart = s - SvPVX(PL_linestr);
9580 first_start = PL_multi_start;
9581 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
9584 SvREFCNT_dec(PL_lex_stuff);
9585 PL_lex_stuff = NULL;
9587 Perl_croak(aTHX_ "Substitution replacement not terminated");
9589 PL_multi_start = first_start; /* so whole substitution is taken together */
9591 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9595 CURMAD('z', PL_thisopen);
9596 CURMAD('R', PL_thisstuff);
9597 CURMAD('Z', PL_thisclose);
9603 if (*s == EXEC_PAT_MOD) {
9607 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9616 curmad('m', newSVpvn(modstart, s - modstart));
9617 append_madprops(PL_thismad, (OP*)pm, 0);
9621 if ((pm->op_pmflags & PMf_CONTINUE)) {
9622 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9626 SV * const repl = newSVpvs("");
9629 pm->op_pmflags |= PMf_EVAL;
9632 sv_catpvs(repl, "eval ");
9634 sv_catpvs(repl, "do ");
9636 sv_catpvs(repl, "{");
9637 sv_catsv(repl, PL_sublex_info.repl);
9638 sv_catpvs(repl, "}");
9640 SvREFCNT_dec(PL_sublex_info.repl);
9641 PL_sublex_info.repl = repl;
9644 PL_lex_op = (OP*)pm;
9645 pl_yylval.ival = OP_SUBST;
9650 S_scan_trans(pTHX_ char *start)
9658 bool nondestruct = 0;
9663 PERL_ARGS_ASSERT_SCAN_TRANS;
9665 pl_yylval.ival = OP_NULL;
9667 s = scan_str(start,!!PL_madskills,FALSE,FALSE);
9669 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9671 if (s[-1] == PL_multi_open)
9675 CURMAD('q', PL_thisopen);
9676 CURMAD('_', PL_thiswhite);
9677 CURMAD('E', PL_thisstuff);
9678 CURMAD('Q', PL_thisclose);
9679 PL_realtokenstart = s - SvPVX(PL_linestr);
9683 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
9686 SvREFCNT_dec(PL_lex_stuff);
9687 PL_lex_stuff = NULL;
9689 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9692 CURMAD('z', PL_thisopen);
9693 CURMAD('R', PL_thisstuff);
9694 CURMAD('Z', PL_thisclose);
9697 complement = del = squash = 0;
9704 complement = OPpTRANS_COMPLEMENT;
9707 del = OPpTRANS_DELETE;
9710 squash = OPpTRANS_SQUASH;
9722 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9723 o->op_private &= ~OPpTRANS_ALL;
9724 o->op_private |= del|squash|complement|
9725 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9726 (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF : 0);
9729 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9734 curmad('m', newSVpvn(modstart, s - modstart));
9735 append_madprops(PL_thismad, o, 0);
9744 Takes a pointer to the first < in <<FOO.
9745 Returns a pointer to the byte following <<FOO.
9747 This function scans a heredoc, which involves different methods
9748 depending on whether we are in a string eval, quoted construct, etc.
9749 This is because PL_linestr could containing a single line of input, or
9750 a whole string being evalled, or the contents of the current quote-
9753 The two basic methods are:
9754 - Steal lines from the input stream
9755 - Scan the heredoc in PL_linestr and remove it therefrom
9757 In a file scope or filtered eval, the first method is used; in a
9758 string eval, the second.
9760 In a quote-like operator, we have to choose between the two,
9761 depending on where we can find a newline. We peek into outer lex-
9762 ing scopes until we find one with a newline in it. If we reach the
9763 outermost lexing scope and it is a file, we use the stream method.
9764 Otherwise it is treated as an eval.
9768 S_scan_heredoc(pTHX_ register char *s)
9771 I32 op_type = OP_SCALAR;
9778 const bool infile = PL_rsfp || PL_parser->filtered;
9779 LEXSHARED *shared = PL_parser->lex_shared;
9781 I32 stuffstart = s - SvPVX(PL_linestr);
9784 PL_realtokenstart = -1;
9787 PERL_ARGS_ASSERT_SCAN_HEREDOC;
9790 d = PL_tokenbuf + 1;
9791 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9792 *PL_tokenbuf = '\n';
9794 while (SPACE_OR_TAB(*peek))
9796 if (*peek == '`' || *peek == '\'' || *peek =='"') {
9799 s = delimcpy(d, e, s, PL_bufend, term, &len);
9801 Perl_croak(aTHX_ "Unterminated delimiter for here document");
9807 /* <<\FOO is equivalent to <<'FOO' */
9811 if (!isALNUM_lazy_if(s,UTF))
9812 deprecate("bare << to mean <<\"\"");
9813 for (; isALNUM_lazy_if(s,UTF); s++) {
9818 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9819 Perl_croak(aTHX_ "Delimiter for here document is too long");
9822 len = d - PL_tokenbuf;
9826 tstart = PL_tokenbuf + 1;
9827 PL_thisclose = newSVpvn(tstart, len - 1);
9828 tstart = SvPVX(PL_linestr) + stuffstart;
9829 PL_thisopen = newSVpvn(tstart, s - tstart);
9830 stuffstart = s - SvPVX(PL_linestr);
9833 #ifndef PERL_STRICT_CR
9834 d = strchr(s, '\r');
9836 char * const olds = s;
9838 while (s < PL_bufend) {
9844 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9853 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9859 tstart = SvPVX(PL_linestr) + stuffstart;
9861 sv_catpvn(PL_thisstuff, tstart, s - tstart);
9863 PL_thisstuff = newSVpvn(tstart, s - tstart);
9866 stuffstart = s - SvPVX(PL_linestr);
9869 tmpstr = newSV_type(SVt_PVIV);
9873 SvIV_set(tmpstr, -1);
9875 else if (term == '`') {
9876 op_type = OP_BACKTICK;
9877 SvIV_set(tmpstr, '\\');
9880 PL_multi_start = CopLINE(PL_curcop) + 1;
9881 PL_multi_open = PL_multi_close = '<';
9882 /* inside a string eval or quote-like operator */
9883 if (!infile || PL_lex_inwhat) {
9886 char * const olds = s;
9887 PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
9888 /* These two fields are not set until an inner lexing scope is
9889 entered. But we need them set here. */
9890 shared->ls_bufptr = s;
9891 shared->ls_linestr = PL_linestr;
9893 /* Look for a newline. If the current buffer does not have one,
9894 peek into the line buffer of the parent lexing scope, going
9895 up as many levels as necessary to find one with a newline
9898 while (!(s = (char *)memchr(
9899 (void *)shared->ls_bufptr, '\n',
9900 SvEND(shared->ls_linestr)-shared->ls_bufptr
9902 shared = shared->ls_prev;
9903 /* shared is only null if we have gone beyond the outermost
9904 lexing scope. In a file, we will have broken out of the
9905 loop in the previous iteration. In an eval, the string buf-
9906 fer ends with "\n;", so the while condition below will have
9907 evaluated to false. So shared can never be null. */
9909 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
9910 most lexing scope. In a file, shared->ls_linestr at that
9911 level is just one line, so there is no body to steal. */
9912 if (infile && !shared->ls_prev) {
9918 s = (char*)memchr((void*)s, '\n', PL_bufend - s);
9921 linestr = shared->ls_linestr;
9922 bufend = SvEND(linestr);
9924 while (s < bufend &&
9925 (*s != '\n' || memNE(s,PL_tokenbuf,len)) ) {
9927 ++shared->herelines;
9932 sv_setpvn(tmpstr,d+1,s-d);
9936 sv_catpvn(PL_thisstuff, d + 1, s - d);
9938 PL_thisstuff = newSVpvn(d + 1, s - d);
9939 stuffstart = s - SvPVX(PL_linestr);
9943 /* the preceding stmt passes a newline */
9944 shared->herelines++;
9946 /* s now points to the newline after the heredoc terminator.
9947 d points to the newline before the body of the heredoc.
9950 /* We are going to modify linestr in place here, so set
9951 aside copies of the string if necessary for re-evals or
9953 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
9954 check shared->re_eval_str. */
9955 if (shared->re_eval_start || shared->re_eval_str) {
9956 /* Set aside the rest of the regexp */
9957 if (!shared->re_eval_str)
9958 shared->re_eval_str =
9959 newSVpvn(shared->re_eval_start,
9960 bufend - shared->re_eval_start);
9961 shared->re_eval_start -= s-d;
9963 if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
9964 CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
9965 cx->blk_eval.cur_text == linestr)
9967 cx->blk_eval.cur_text = newSVsv(linestr);
9968 SvSCREAM_on(cx->blk_eval.cur_text);
9970 /* Copy everything from s onwards back to d. */
9971 Move(s,d,bufend-s + 1,char);
9972 SvCUR_set(linestr, SvCUR(linestr) - (s-d));
9973 /* Setting PL_bufend only applies when we have not dug deeper
9974 into other scopes, because sublex_done sets PL_bufend to
9975 SvEND(PL_linestr). */
9976 if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
9983 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
9984 term = PL_tokenbuf[1];
9986 linestr_save = PL_linestr; /* must restore this afterwards */
9987 d = s; /* and this */
9988 PL_linestr = newSVpvs("");
9989 PL_bufend = SvPVX(PL_linestr);
9993 tstart = SvPVX(PL_linestr) + stuffstart;
9995 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
9997 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10000 PL_bufptr = PL_bufend;
10001 CopLINE_set(PL_curcop,
10002 PL_multi_start + shared->herelines);
10003 if (!lex_next_chunk(LEX_NO_TERM)
10004 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
10005 SvREFCNT_dec(linestr_save);
10008 CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
10009 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10010 lex_grow_linestr(SvCUR(PL_linestr) + 2);
10011 sv_catpvs(PL_linestr, "\n\0");
10015 stuffstart = s - SvPVX(PL_linestr);
10017 shared->herelines++;
10018 PL_last_lop = PL_last_uni = NULL;
10019 #ifndef PERL_STRICT_CR
10020 if (PL_bufend - PL_linestart >= 2) {
10021 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
10022 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10024 PL_bufend[-2] = '\n';
10026 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10028 else if (PL_bufend[-1] == '\r')
10029 PL_bufend[-1] = '\n';
10031 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10032 PL_bufend[-1] = '\n';
10034 if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
10035 SvREFCNT_dec(PL_linestr);
10036 PL_linestr = linestr_save;
10037 PL_linestart = SvPVX(linestr_save);
10038 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10043 sv_catsv(tmpstr,PL_linestr);
10047 PL_multi_end = CopLINE(PL_curcop);
10048 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10049 SvPV_shrink_to_cur(tmpstr);
10052 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10054 else if (PL_encoding)
10055 sv_recode_to_utf8(tmpstr, PL_encoding);
10057 PL_lex_stuff = tmpstr;
10058 pl_yylval.ival = op_type;
10062 SvREFCNT_dec(tmpstr);
10063 CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
10064 missingterm(PL_tokenbuf + 1);
10067 /* scan_inputsymbol
10068 takes: current position in input buffer
10069 returns: new position in input buffer
10070 side-effects: pl_yylval and lex_op are set.
10075 <FH> read from filehandle
10076 <pkg::FH> read from package qualified filehandle
10077 <pkg'FH> read from package qualified filehandle
10078 <$fh> read from filehandle in $fh
10079 <*.h> filename glob
10084 S_scan_inputsymbol(pTHX_ char *start)
10087 char *s = start; /* current position in buffer */
10090 char *d = PL_tokenbuf; /* start of temp holding space */
10091 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
10093 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10095 end = strchr(s, '\n');
10098 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
10100 /* die if we didn't have space for the contents of the <>,
10101 or if it didn't end, or if we see a newline
10104 if (len >= (I32)sizeof PL_tokenbuf)
10105 Perl_croak(aTHX_ "Excessively long <> operator");
10107 Perl_croak(aTHX_ "Unterminated <> operator");
10112 Remember, only scalar variables are interpreted as filehandles by
10113 this code. Anything more complex (e.g., <$fh{$num}>) will be
10114 treated as a glob() call.
10115 This code makes use of the fact that except for the $ at the front,
10116 a scalar variable and a filehandle look the same.
10118 if (*d == '$' && d[1]) d++;
10120 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10121 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
10122 d += UTF ? UTF8SKIP(d) : 1;
10124 /* If we've tried to read what we allow filehandles to look like, and
10125 there's still text left, then it must be a glob() and not a getline.
10126 Use scan_str to pull out the stuff between the <> and treat it
10127 as nothing more than a string.
10130 if (d - PL_tokenbuf != len) {
10131 pl_yylval.ival = OP_GLOB;
10132 s = scan_str(start,!!PL_madskills,FALSE,FALSE);
10134 Perl_croak(aTHX_ "Glob not terminated");
10138 bool readline_overriden = FALSE;
10141 /* we're in a filehandle read situation */
10144 /* turn <> into <ARGV> */
10146 Copy("ARGV",d,5,char);
10148 /* Check whether readline() is overriden */
10149 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
10151 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
10153 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
10154 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
10155 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
10156 readline_overriden = TRUE;
10158 /* if <$fh>, create the ops to turn the variable into a
10162 /* try to find it in the pad for this block, otherwise find
10163 add symbol table ops
10165 const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
10166 if (tmp != NOT_IN_PAD) {
10167 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10168 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10169 HEK * const stashname = HvNAME_HEK(stash);
10170 SV * const sym = sv_2mortal(newSVhek(stashname));
10171 sv_catpvs(sym, "::");
10172 sv_catpv(sym, d+1);
10177 OP * const o = newOP(OP_PADSV, 0);
10179 PL_lex_op = readline_overriden
10180 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10181 op_append_elem(OP_LIST, o,
10182 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10183 : (OP*)newUNOP(OP_READLINE, 0, o);
10192 ? (GV_ADDMULTI | GV_ADDINEVAL)
10193 : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
10195 PL_lex_op = readline_overriden
10196 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10197 op_append_elem(OP_LIST,
10198 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10199 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10200 : (OP*)newUNOP(OP_READLINE, 0,
10201 newUNOP(OP_RV2SV, 0,
10202 newGVOP(OP_GV, 0, gv)));
10204 if (!readline_overriden)
10205 PL_lex_op->op_flags |= OPf_SPECIAL;
10206 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10207 pl_yylval.ival = OP_NULL;
10210 /* If it's none of the above, it must be a literal filehandle
10211 (<Foo::BAR> or <FOO>) so build a simple readline OP */
10213 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
10214 PL_lex_op = readline_overriden
10215 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10216 op_append_elem(OP_LIST,
10217 newGVOP(OP_GV, 0, gv),
10218 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10219 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
10220 pl_yylval.ival = OP_NULL;
10229 takes: start position in buffer
10230 keep_quoted preserve \ on the embedded delimiter(s)
10231 keep_delims preserve the delimiters around the string
10232 re_reparse compiling a run-time /(?{})/:
10233 collapse // to /, and skip encoding src
10234 returns: position to continue reading from buffer
10235 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10236 updates the read buffer.
10238 This subroutine pulls a string out of the input. It is called for:
10239 q single quotes q(literal text)
10240 ' single quotes 'literal text'
10241 qq double quotes qq(interpolate $here please)
10242 " double quotes "interpolate $here please"
10243 qx backticks qx(/bin/ls -l)
10244 ` backticks `/bin/ls -l`
10245 qw quote words @EXPORT_OK = qw( func() $spam )
10246 m// regexp match m/this/
10247 s/// regexp substitute s/this/that/
10248 tr/// string transliterate tr/this/that/
10249 y/// string transliterate y/this/that/
10250 ($*@) sub prototypes sub foo ($)
10251 (stuff) sub attr parameters sub foo : attr(stuff)
10252 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
10254 In most of these cases (all but <>, patterns and transliterate)
10255 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
10256 calls scan_str(). s/// makes yylex() call scan_subst() which calls
10257 scan_str(). tr/// and y/// make yylex() call scan_trans() which
10260 It skips whitespace before the string starts, and treats the first
10261 character as the delimiter. If the delimiter is one of ([{< then
10262 the corresponding "close" character )]}> is used as the closing
10263 delimiter. It allows quoting of delimiters, and if the string has
10264 balanced delimiters ([{<>}]) it allows nesting.
10266 On success, the SV with the resulting string is put into lex_stuff or,
10267 if that is already non-NULL, into lex_repl. The second case occurs only
10268 when parsing the RHS of the special constructs s/// and tr/// (y///).
10269 For convenience, the terminating delimiter character is stuffed into
10274 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse)
10277 SV *sv; /* scalar value: string */
10278 const char *tmps; /* temp string, used for delimiter matching */
10279 char *s = start; /* current position in the buffer */
10280 char term; /* terminating character */
10281 char *to; /* current position in the sv's data */
10282 I32 brackets = 1; /* bracket nesting level */
10283 bool has_utf8 = FALSE; /* is there any utf8 content? */
10284 I32 termcode; /* terminating char. code */
10285 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
10286 STRLEN termlen; /* length of terminating string */
10287 int last_off = 0; /* last position for nesting bracket */
10293 PERL_ARGS_ASSERT_SCAN_STR;
10295 /* skip space before the delimiter */
10301 if (PL_realtokenstart >= 0) {
10302 stuffstart = PL_realtokenstart;
10303 PL_realtokenstart = -1;
10306 stuffstart = start - SvPVX(PL_linestr);
10308 /* mark where we are, in case we need to report errors */
10311 /* after skipping whitespace, the next character is the terminator */
10314 termcode = termstr[0] = term;
10318 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10319 Copy(s, termstr, termlen, U8);
10320 if (!UTF8_IS_INVARIANT(term))
10324 /* mark where we are */
10325 PL_multi_start = CopLINE(PL_curcop);
10326 PL_multi_open = term;
10328 /* find corresponding closing delimiter */
10329 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10330 termcode = termstr[0] = term = tmps[5];
10332 PL_multi_close = term;
10334 /* create a new SV to hold the contents. 79 is the SV's initial length.
10335 What a random number. */
10336 sv = newSV_type(SVt_PVIV);
10338 SvIV_set(sv, termcode);
10339 (void)SvPOK_only(sv); /* validate pointer */
10341 /* move past delimiter and try to read a complete string */
10343 sv_catpvn(sv, s, termlen);
10346 tstart = SvPVX(PL_linestr) + stuffstart;
10347 if (PL_madskills && !PL_thisopen && !keep_delims) {
10348 PL_thisopen = newSVpvn(tstart, s - tstart);
10349 stuffstart = s - SvPVX(PL_linestr);
10353 if (PL_encoding && !UTF && !re_reparse) {
10357 int offset = s - SvPVX_const(PL_linestr);
10358 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
10359 &offset, (char*)termstr, termlen);
10360 const char * const ns = SvPVX_const(PL_linestr) + offset;
10361 char * const svlast = SvEND(sv) - 1;
10363 for (; s < ns; s++) {
10364 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10365 COPLINE_INC_WITH_HERELINES;
10368 goto read_more_line;
10370 /* handle quoted delimiters */
10371 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10373 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10375 if ((svlast-1 - t) % 2) {
10376 if (!keep_quoted) {
10377 *(svlast-1) = term;
10379 SvCUR_set(sv, SvCUR(sv) - 1);
10384 if (PL_multi_open == PL_multi_close) {
10390 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
10391 /* At here, all closes are "was quoted" one,
10392 so we don't check PL_multi_close. */
10394 if (!keep_quoted && *(t+1) == PL_multi_open)
10399 else if (*t == PL_multi_open)
10407 SvCUR_set(sv, w - SvPVX_const(sv));
10409 last_off = w - SvPVX(sv);
10410 if (--brackets <= 0)
10415 if (!keep_delims) {
10416 SvCUR_set(sv, SvCUR(sv) - 1);
10422 /* extend sv if need be */
10423 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10424 /* set 'to' to the next character in the sv's string */
10425 to = SvPVX(sv)+SvCUR(sv);
10427 /* if open delimiter is the close delimiter read unbridle */
10428 if (PL_multi_open == PL_multi_close) {
10429 for (; s < PL_bufend; s++,to++) {
10430 /* embedded newlines increment the current line number */
10431 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10432 COPLINE_INC_WITH_HERELINES;
10433 /* handle quoted delimiters */
10434 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10437 || (re_reparse && s[1] == '\\'))
10440 /* any other quotes are simply copied straight through */
10444 /* terminate when run out of buffer (the for() condition), or
10445 have found the terminator */
10446 else if (*s == term) {
10449 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10452 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10458 /* if the terminator isn't the same as the start character (e.g.,
10459 matched brackets), we have to allow more in the quoting, and
10460 be prepared for nested brackets.
10463 /* read until we run out of string, or we find the terminator */
10464 for (; s < PL_bufend; s++,to++) {
10465 /* embedded newlines increment the line count */
10466 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10467 COPLINE_INC_WITH_HERELINES;
10468 /* backslashes can escape the open or closing characters */
10469 if (*s == '\\' && s+1 < PL_bufend) {
10470 if (!keep_quoted &&
10471 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10476 /* allow nested opens and closes */
10477 else if (*s == PL_multi_close && --brackets <= 0)
10479 else if (*s == PL_multi_open)
10481 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10486 /* terminate the copied string and update the sv's end-of-string */
10488 SvCUR_set(sv, to - SvPVX_const(sv));
10491 * this next chunk reads more into the buffer if we're not done yet
10495 break; /* handle case where we are done yet :-) */
10497 #ifndef PERL_STRICT_CR
10498 if (to - SvPVX_const(sv) >= 2) {
10499 if ((to[-2] == '\r' && to[-1] == '\n') ||
10500 (to[-2] == '\n' && to[-1] == '\r'))
10504 SvCUR_set(sv, to - SvPVX_const(sv));
10506 else if (to[-1] == '\r')
10509 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10514 /* if we're out of file, or a read fails, bail and reset the current
10515 line marker so we can report where the unterminated string began
10518 if (PL_madskills) {
10519 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10521 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10523 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10526 COPLINE_INC_WITH_HERELINES;
10527 PL_bufptr = PL_bufend;
10528 if (!lex_next_chunk(0)) {
10530 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10539 /* at this point, we have successfully read the delimited string */
10541 if (!PL_encoding || UTF || re_reparse) {
10543 if (PL_madskills) {
10544 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10545 const int len = s - tstart;
10547 sv_catpvn(PL_thisstuff, tstart, len);
10549 PL_thisstuff = newSVpvn(tstart, len);
10550 if (!PL_thisclose && !keep_delims)
10551 PL_thisclose = newSVpvn(s,termlen);
10556 sv_catpvn(sv, s, termlen);
10561 if (PL_madskills) {
10562 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10563 const int len = s - tstart - termlen;
10565 sv_catpvn(PL_thisstuff, tstart, len);
10567 PL_thisstuff = newSVpvn(tstart, len);
10568 if (!PL_thisclose && !keep_delims)
10569 PL_thisclose = newSVpvn(s - termlen,termlen);
10573 if (has_utf8 || (PL_encoding && !re_reparse))
10576 PL_multi_end = CopLINE(PL_curcop);
10578 /* if we allocated too much space, give some back */
10579 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10580 SvLEN_set(sv, SvCUR(sv) + 1);
10581 SvPV_renew(sv, SvLEN(sv));
10584 /* decide whether this is the first or second quoted string we've read
10589 PL_sublex_info.repl = sv;
10597 takes: pointer to position in buffer
10598 returns: pointer to new position in buffer
10599 side-effects: builds ops for the constant in pl_yylval.op
10601 Read a number in any of the formats that Perl accepts:
10603 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10604 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
10607 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10609 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10612 If it reads a number without a decimal point or an exponent, it will
10613 try converting the number to an integer and see if it can do so
10614 without loss of precision.
10618 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10621 const char *s = start; /* current position in buffer */
10622 char *d; /* destination in temp buffer */
10623 char *e; /* end of temp buffer */
10624 NV nv; /* number read, as a double */
10625 SV *sv = NULL; /* place to put the converted number */
10626 bool floatit; /* boolean: int or float? */
10627 const char *lastub = NULL; /* position of last underbar */
10628 static char const number_too_long[] = "Number too long";
10630 PERL_ARGS_ASSERT_SCAN_NUM;
10632 /* We use the first character to decide what type of number this is */
10636 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
10638 /* if it starts with a 0, it could be an octal number, a decimal in
10639 0.13 disguise, or a hexadecimal number, or a binary number. */
10643 u holds the "number so far"
10644 shift the power of 2 of the base
10645 (hex == 4, octal == 3, binary == 1)
10646 overflowed was the number more than we can hold?
10648 Shift is used when we add a digit. It also serves as an "are
10649 we in octal/hex/binary?" indicator to disallow hex characters
10650 when in octal mode.
10655 bool overflowed = FALSE;
10656 bool just_zero = TRUE; /* just plain 0 or binary number? */
10657 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10658 static const char* const bases[5] =
10659 { "", "binary", "", "octal", "hexadecimal" };
10660 static const char* const Bases[5] =
10661 { "", "Binary", "", "Octal", "Hexadecimal" };
10662 static const char* const maxima[5] =
10664 "0b11111111111111111111111111111111",
10668 const char *base, *Base, *max;
10670 /* check for hex */
10671 if (s[1] == 'x' || s[1] == 'X') {
10675 } else if (s[1] == 'b' || s[1] == 'B') {
10680 /* check for a decimal in disguise */
10681 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10683 /* so it must be octal */
10690 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10691 "Misplaced _ in number");
10695 base = bases[shift];
10696 Base = Bases[shift];
10697 max = maxima[shift];
10699 /* read the rest of the number */
10701 /* x is used in the overflow test,
10702 b is the digit we're adding on. */
10707 /* if we don't mention it, we're done */
10711 /* _ are ignored -- but warned about if consecutive */
10713 if (lastub && s == lastub + 1)
10714 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10715 "Misplaced _ in number");
10719 /* 8 and 9 are not octal */
10720 case '8': case '9':
10722 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10726 case '2': case '3': case '4':
10727 case '5': case '6': case '7':
10729 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10732 case '0': case '1':
10733 b = *s++ & 15; /* ASCII digit -> value of digit */
10737 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10738 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10739 /* make sure they said 0x */
10742 b = (*s++ & 7) + 9;
10744 /* Prepare to put the digit we have onto the end
10745 of the number so far. We check for overflows.
10751 x = u << shift; /* make room for the digit */
10753 if ((x >> shift) != u
10754 && !(PL_hints & HINT_NEW_BINARY)) {
10757 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10758 "Integer overflow in %s number",
10761 u = x | b; /* add the digit to the end */
10764 n *= nvshift[shift];
10765 /* If an NV has not enough bits in its
10766 * mantissa to represent an UV this summing of
10767 * small low-order numbers is a waste of time
10768 * (because the NV cannot preserve the
10769 * low-order bits anyway): we could just
10770 * remember when did we overflow and in the
10771 * end just multiply n by the right
10779 /* if we get here, we had success: make a scalar value from
10784 /* final misplaced underbar check */
10785 if (s[-1] == '_') {
10786 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10790 if (n > 4294967295.0)
10791 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10792 "%s number > %s non-portable",
10798 if (u > 0xffffffff)
10799 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10800 "%s number > %s non-portable",
10805 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10806 sv = new_constant(start, s - start, "integer",
10807 sv, NULL, NULL, 0);
10808 else if (PL_hints & HINT_NEW_BINARY)
10809 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10814 handle decimal numbers.
10815 we're also sent here when we read a 0 as the first digit
10817 case '1': case '2': case '3': case '4': case '5':
10818 case '6': case '7': case '8': case '9': case '.':
10821 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10824 /* read next group of digits and _ and copy into d */
10825 while (isDIGIT(*s) || *s == '_') {
10826 /* skip underscores, checking for misplaced ones
10830 if (lastub && s == lastub + 1)
10831 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10832 "Misplaced _ in number");
10836 /* check for end of fixed-length buffer */
10838 Perl_croak(aTHX_ number_too_long);
10839 /* if we're ok, copy the character */
10844 /* final misplaced underbar check */
10845 if (lastub && s == lastub + 1) {
10846 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10849 /* read a decimal portion if there is one. avoid
10850 3..5 being interpreted as the number 3. followed
10853 if (*s == '.' && s[1] != '.') {
10858 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10859 "Misplaced _ in number");
10863 /* copy, ignoring underbars, until we run out of digits.
10865 for (; isDIGIT(*s) || *s == '_'; s++) {
10866 /* fixed length buffer check */
10868 Perl_croak(aTHX_ number_too_long);
10870 if (lastub && s == lastub + 1)
10871 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10872 "Misplaced _ in number");
10878 /* fractional part ending in underbar? */
10879 if (s[-1] == '_') {
10880 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10881 "Misplaced _ in number");
10883 if (*s == '.' && isDIGIT(s[1])) {
10884 /* oops, it's really a v-string, but without the "v" */
10890 /* read exponent part, if present */
10891 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10895 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10896 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
10898 /* stray preinitial _ */
10900 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10901 "Misplaced _ in number");
10905 /* allow positive or negative exponent */
10906 if (*s == '+' || *s == '-')
10909 /* stray initial _ */
10911 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10912 "Misplaced _ in number");
10916 /* read digits of exponent */
10917 while (isDIGIT(*s) || *s == '_') {
10920 Perl_croak(aTHX_ number_too_long);
10924 if (((lastub && s == lastub + 1) ||
10925 (!isDIGIT(s[1]) && s[1] != '_')))
10926 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10927 "Misplaced _ in number");
10935 We try to do an integer conversion first if no characters
10936 indicating "float" have been found.
10941 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10943 if (flags == IS_NUMBER_IN_UV) {
10945 sv = newSViv(uv); /* Prefer IVs over UVs. */
10948 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10949 if (uv <= (UV) IV_MIN)
10950 sv = newSViv(-(IV)uv);
10957 /* terminate the string */
10959 nv = Atof(PL_tokenbuf);
10964 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10965 const char *const key = floatit ? "float" : "integer";
10966 const STRLEN keylen = floatit ? 5 : 7;
10967 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10968 key, keylen, sv, NULL, NULL, 0);
10972 /* if it starts with a v, it could be a v-string */
10975 sv = newSV(5); /* preallocate storage space */
10976 s = scan_vstring(s, PL_bufend, sv);
10980 /* make the op for the constant and return */
10983 lvalp->opval = newSVOP(OP_CONST, 0, sv);
10985 lvalp->opval = NULL;
10991 S_scan_formline(pTHX_ register char *s)
10996 SV * const stuff = newSVpvs("");
10997 bool needargs = FALSE;
10998 bool eofmt = FALSE;
11000 char *tokenstart = s;
11001 SV* savewhite = NULL;
11003 if (PL_madskills) {
11004 savewhite = PL_thiswhite;
11009 PERL_ARGS_ASSERT_SCAN_FORMLINE;
11011 while (!needargs) {
11014 #ifdef PERL_STRICT_CR
11015 while (SPACE_OR_TAB(*t))
11018 while (SPACE_OR_TAB(*t) || *t == '\r')
11021 if (*t == '\n' || t == PL_bufend) {
11026 eol = (char *) memchr(s,'\n',PL_bufend-s);
11030 for (t = s; t < eol; t++) {
11031 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
11033 goto enough; /* ~~ must be first line in formline */
11035 if (*t == '@' || *t == '^')
11039 sv_catpvn(stuff, s, eol-s);
11040 #ifndef PERL_STRICT_CR
11041 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
11042 char *end = SvPVX(stuff) + SvCUR(stuff);
11045 SvCUR_set(stuff, SvCUR(stuff) - 1);
11053 if ((PL_rsfp || PL_parser->filtered)
11054 && PL_parser->form_lex_state == LEX_NORMAL) {
11057 if (PL_madskills) {
11059 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
11061 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
11064 PL_bufptr = PL_bufend;
11065 COPLINE_INC_WITH_HERELINES;
11066 got_some = lex_next_chunk(0);
11067 CopLINE_dec(PL_curcop);
11070 tokenstart = PL_bufptr;
11078 if (!SvCUR(stuff) || needargs)
11079 PL_lex_state = PL_parser->form_lex_state;
11080 if (SvCUR(stuff)) {
11081 PL_expect = XSTATE;
11083 start_force(PL_curforce);
11084 NEXTVAL_NEXTTOKE.ival = 0;
11085 force_next(FORMLBRACK);
11088 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
11090 else if (PL_encoding)
11091 sv_recode_to_utf8(stuff, PL_encoding);
11093 start_force(PL_curforce);
11094 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
11098 SvREFCNT_dec(stuff);
11100 PL_lex_formbrack = 0;
11103 if (PL_madskills) {
11105 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
11107 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
11108 PL_thiswhite = savewhite;
11115 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
11118 const I32 oldsavestack_ix = PL_savestack_ix;
11119 CV* const outsidecv = PL_compcv;
11121 SAVEI32(PL_subline);
11122 save_item(PL_subname);
11123 SAVESPTR(PL_compcv);
11125 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
11126 CvFLAGS(PL_compcv) |= flags;
11128 PL_subline = CopLINE(PL_curcop);
11129 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
11130 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
11131 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
11132 if (outsidecv && CvPADLIST(outsidecv))
11133 CvPADLIST(PL_compcv)->xpadl_outid =
11134 PadlistNAMES(CvPADLIST(outsidecv));
11136 return oldsavestack_ix;
11140 #pragma segment Perl_yylex
11143 S_yywarn(pTHX_ const char *const s, U32 flags)
11147 PERL_ARGS_ASSERT_YYWARN;
11149 PL_in_eval |= EVAL_WARNONLY;
11150 yyerror_pv(s, flags);
11151 PL_in_eval &= ~EVAL_WARNONLY;
11156 Perl_yyerror(pTHX_ const char *const s)
11158 PERL_ARGS_ASSERT_YYERROR;
11159 return yyerror_pvn(s, strlen(s), 0);
11163 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11165 PERL_ARGS_ASSERT_YYERROR_PV;
11166 return yyerror_pvn(s, strlen(s), flags);
11170 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11173 const char *context = NULL;
11176 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
11177 int yychar = PL_parser->yychar;
11179 PERL_ARGS_ASSERT_YYERROR_PVN;
11181 if (!yychar || (yychar == ';' && !PL_rsfp))
11182 sv_catpvs(where_sv, "at EOF");
11183 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
11184 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
11185 PL_oldbufptr != PL_bufptr) {
11188 The code below is removed for NetWare because it abends/crashes on NetWare
11189 when the script has error such as not having the closing quotes like:
11190 if ($var eq "value)
11191 Checking of white spaces is anyway done in NetWare code.
11194 while (isSPACE(*PL_oldoldbufptr))
11197 context = PL_oldoldbufptr;
11198 contlen = PL_bufptr - PL_oldoldbufptr;
11200 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
11201 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
11204 The code below is removed for NetWare because it abends/crashes on NetWare
11205 when the script has error such as not having the closing quotes like:
11206 if ($var eq "value)
11207 Checking of white spaces is anyway done in NetWare code.
11210 while (isSPACE(*PL_oldbufptr))
11213 context = PL_oldbufptr;
11214 contlen = PL_bufptr - PL_oldbufptr;
11216 else if (yychar > 255)
11217 sv_catpvs(where_sv, "next token ???");
11218 else if (yychar == -2) { /* YYEMPTY */
11219 if (PL_lex_state == LEX_NORMAL ||
11220 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
11221 sv_catpvs(where_sv, "at end of line");
11222 else if (PL_lex_inpat)
11223 sv_catpvs(where_sv, "within pattern");
11225 sv_catpvs(where_sv, "within string");
11228 sv_catpvs(where_sv, "next char ");
11230 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11231 else if (isPRINT_LC(yychar)) {
11232 const char string = yychar;
11233 sv_catpvn(where_sv, &string, 1);
11236 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11238 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
11239 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
11240 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
11242 Perl_sv_catpvf(aTHX_ msg, "near \"%"SVf"\"\n",
11243 SVfARG(newSVpvn_flags(context, contlen,
11244 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
11246 Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
11247 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11248 Perl_sv_catpvf(aTHX_ msg,
11249 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
11250 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11253 if (PL_in_eval & EVAL_WARNONLY) {
11254 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
11258 if (PL_error_count >= 10) {
11259 if (PL_in_eval && SvCUR(ERRSV))
11260 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11261 SVfARG(ERRSV), OutCopFILE(PL_curcop));
11263 Perl_croak(aTHX_ "%s has too many errors.\n",
11264 OutCopFILE(PL_curcop));
11267 PL_in_my_stash = NULL;
11271 #pragma segment Main
11275 S_swallow_bom(pTHX_ U8 *s)
11278 const STRLEN slen = SvCUR(PL_linestr);
11280 PERL_ARGS_ASSERT_SWALLOW_BOM;
11284 if (s[1] == 0xFE) {
11285 /* UTF-16 little-endian? (or UTF-32LE?) */
11286 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
11287 /* diag_listed_as: Unsupported script encoding %s */
11288 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11289 #ifndef PERL_NO_UTF16_FILTER
11290 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11292 if (PL_bufend > (char*)s) {
11293 s = add_utf16_textfilter(s, TRUE);
11296 /* diag_listed_as: Unsupported script encoding %s */
11297 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11302 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
11303 #ifndef PERL_NO_UTF16_FILTER
11304 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11306 if (PL_bufend > (char *)s) {
11307 s = add_utf16_textfilter(s, FALSE);
11310 /* diag_listed_as: Unsupported script encoding %s */
11311 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11316 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
11317 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11318 s += 3; /* UTF-8 */
11324 if (s[2] == 0xFE && s[3] == 0xFF) {
11325 /* UTF-32 big-endian */
11326 /* diag_listed_as: Unsupported script encoding %s */
11327 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11330 else if (s[2] == 0 && s[3] != 0) {
11333 * are a good indicator of UTF-16BE. */
11334 #ifndef PERL_NO_UTF16_FILTER
11335 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11336 s = add_utf16_textfilter(s, FALSE);
11338 /* diag_listed_as: Unsupported script encoding %s */
11339 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11345 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
11346 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11347 s += 4; /* UTF-8 */
11353 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11356 * are a good indicator of UTF-16LE. */
11357 #ifndef PERL_NO_UTF16_FILTER
11358 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11359 s = add_utf16_textfilter(s, TRUE);
11361 /* diag_listed_as: Unsupported script encoding %s */
11362 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11370 #ifndef PERL_NO_UTF16_FILTER
11372 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11375 SV *const filter = FILTER_DATA(idx);
11376 /* We re-use this each time round, throwing the contents away before we
11378 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11379 SV *const utf8_buffer = filter;
11380 IV status = IoPAGE(filter);
11381 const bool reverse = cBOOL(IoLINES(filter));
11384 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11386 /* As we're automatically added, at the lowest level, and hence only called
11387 from this file, we can be sure that we're not called in block mode. Hence
11388 don't bother writing code to deal with block mode. */
11390 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11393 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11395 DEBUG_P(PerlIO_printf(Perl_debug_log,
11396 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11397 FPTR2DPTR(void *, S_utf16_textfilter),
11398 reverse ? 'l' : 'b', idx, maxlen, status,
11399 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11406 /* First, look in our buffer of existing UTF-8 data: */
11407 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11411 } else if (status == 0) {
11413 IoPAGE(filter) = 0;
11414 nl = SvEND(utf8_buffer);
11417 STRLEN got = nl - SvPVX(utf8_buffer);
11418 /* Did we have anything to append? */
11420 sv_catpvn(sv, SvPVX(utf8_buffer), got);
11421 /* Everything else in this code works just fine if SVp_POK isn't
11422 set. This, however, needs it, and we need it to work, else
11423 we loop infinitely because the buffer is never consumed. */
11424 sv_chop(utf8_buffer, nl);
11428 /* OK, not a complete line there, so need to read some more UTF-16.
11429 Read an extra octect if the buffer currently has an odd number. */
11433 if (SvCUR(utf16_buffer) >= 2) {
11434 /* Location of the high octet of the last complete code point.
11435 Gosh, UTF-16 is a pain. All the benefits of variable length,
11436 *coupled* with all the benefits of partial reads and
11438 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11439 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11441 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11445 /* We have the first half of a surrogate. Read more. */
11446 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11449 status = FILTER_READ(idx + 1, utf16_buffer,
11450 160 + (SvCUR(utf16_buffer) & 1));
11451 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
11452 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11455 IoPAGE(filter) = status;
11460 chars = SvCUR(utf16_buffer) >> 1;
11461 have = SvCUR(utf8_buffer);
11462 SvGROW(utf8_buffer, have + chars * 3 + 1);
11465 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11466 (U8*)SvPVX_const(utf8_buffer) + have,
11467 chars * 2, &newlen);
11469 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11470 (U8*)SvPVX_const(utf8_buffer) + have,
11471 chars * 2, &newlen);
11473 SvCUR_set(utf8_buffer, have + newlen);
11476 /* No need to keep this SV "well-formed" with a '\0' after the end, as
11477 it's private to us, and utf16_to_utf8{,reversed} take a
11478 (pointer,length) pair, rather than a NUL-terminated string. */
11479 if(SvCUR(utf16_buffer) & 1) {
11480 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11481 SvCUR_set(utf16_buffer, 1);
11483 SvCUR_set(utf16_buffer, 0);
11486 DEBUG_P(PerlIO_printf(Perl_debug_log,
11487 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11489 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11490 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11495 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11497 SV *filter = filter_add(S_utf16_textfilter, NULL);
11499 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11501 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11502 sv_setpvs(filter, "");
11503 IoLINES(filter) = reversed;
11504 IoPAGE(filter) = 1; /* Not EOF */
11506 /* Sadly, we have to return a valid pointer, come what may, so we have to
11507 ignore any error return from this. */
11508 SvCUR_set(PL_linestr, 0);
11509 if (FILTER_READ(0, PL_linestr, 0)) {
11510 SvUTF8_on(PL_linestr);
11512 SvUTF8_on(PL_linestr);
11514 PL_bufend = SvEND(PL_linestr);
11515 return (U8*)SvPVX(PL_linestr);
11520 Returns a pointer to the next character after the parsed
11521 vstring, as well as updating the passed in sv.
11523 Function must be called like
11526 s = scan_vstring(s,e,sv);
11528 where s and e are the start and end of the string.
11529 The sv should already be large enough to store the vstring
11530 passed in, for performance reasons.
11535 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11538 const char *pos = s;
11539 const char *start = s;
11541 PERL_ARGS_ASSERT_SCAN_VSTRING;
11543 if (*pos == 'v') pos++; /* get past 'v' */
11544 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11546 if ( *pos != '.') {
11547 /* this may not be a v-string if followed by => */
11548 const char *next = pos;
11549 while (next < e && isSPACE(*next))
11551 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11552 /* return string not v-string */
11553 sv_setpvn(sv,(char *)s,pos-s);
11554 return (char *)pos;
11558 if (!isALPHA(*pos)) {
11559 U8 tmpbuf[UTF8_MAXBYTES+1];
11562 s++; /* get past 'v' */
11567 /* this is atoi() that tolerates underscores */
11570 const char *end = pos;
11572 while (--end >= s) {
11574 const UV orev = rev;
11575 rev += (*end - '0') * mult;
11578 /* diag_listed_as: Integer overflow in %s number */
11579 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11580 "Integer overflow in decimal number");
11584 if (rev > 0x7FFFFFFF)
11585 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11587 /* Append native character for the rev point */
11588 tmpend = uvchr_to_utf8(tmpbuf, rev);
11589 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11590 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11592 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11598 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11602 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11609 Perl_keyword_plugin_standard(pTHX_
11610 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11612 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11613 PERL_UNUSED_CONTEXT;
11614 PERL_UNUSED_ARG(keyword_ptr);
11615 PERL_UNUSED_ARG(keyword_len);
11616 PERL_UNUSED_ARG(op_ptr);
11617 return KEYWORD_PLUGIN_DECLINE;
11620 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11622 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11624 SAVEI32(PL_lex_brackets);
11625 if (PL_lex_brackets > 100)
11626 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11627 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11628 SAVEI32(PL_lex_allbrackets);
11629 PL_lex_allbrackets = 0;
11630 SAVEI8(PL_lex_fakeeof);
11631 PL_lex_fakeeof = (U8)fakeeof;
11632 if(yyparse(gramtype) && !PL_parser->error_count)
11633 qerror(Perl_mess(aTHX_ "Parse error"));
11636 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11638 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11642 SAVEVPTR(PL_eval_root);
11643 PL_eval_root = NULL;
11644 parse_recdescent(gramtype, fakeeof);
11650 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11652 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11655 if (flags & ~PARSE_OPTIONAL)
11656 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11657 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11658 if (!exprop && !(flags & PARSE_OPTIONAL)) {
11659 if (!PL_parser->error_count)
11660 qerror(Perl_mess(aTHX_ "Parse error"));
11661 exprop = newOP(OP_NULL, 0);
11667 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11669 Parse a Perl arithmetic expression. This may contain operators of precedence
11670 down to the bit shift operators. The expression must be followed (and thus
11671 terminated) either by a comparison or lower-precedence operator or by
11672 something that would normally terminate an expression such as semicolon.
11673 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11674 otherwise it is mandatory. It is up to the caller to ensure that the
11675 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11676 the source of the code to be parsed and the lexical context for the
11679 The op tree representing the expression is returned. If an optional
11680 expression is absent, a null pointer is returned, otherwise the pointer
11683 If an error occurs in parsing or compilation, in most cases a valid op
11684 tree is returned anyway. The error is reflected in the parser state,
11685 normally resulting in a single exception at the top level of parsing
11686 which covers all the compilation errors that occurred. Some compilation
11687 errors, however, will throw an exception immediately.
11693 Perl_parse_arithexpr(pTHX_ U32 flags)
11695 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11699 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11701 Parse a Perl term expression. This may contain operators of precedence
11702 down to the assignment operators. The expression must be followed (and thus
11703 terminated) either by a comma or lower-precedence operator or by
11704 something that would normally terminate an expression such as semicolon.
11705 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11706 otherwise it is mandatory. It is up to the caller to ensure that the
11707 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11708 the source of the code to be parsed and the lexical context for the
11711 The op tree representing the expression is returned. If an optional
11712 expression is absent, a null pointer is returned, otherwise the pointer
11715 If an error occurs in parsing or compilation, in most cases a valid op
11716 tree is returned anyway. The error is reflected in the parser state,
11717 normally resulting in a single exception at the top level of parsing
11718 which covers all the compilation errors that occurred. Some compilation
11719 errors, however, will throw an exception immediately.
11725 Perl_parse_termexpr(pTHX_ U32 flags)
11727 return parse_expr(LEX_FAKEEOF_COMMA, flags);
11731 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11733 Parse a Perl list expression. This may contain operators of precedence
11734 down to the comma operator. The expression must be followed (and thus
11735 terminated) either by a low-precedence logic operator such as C<or> or by
11736 something that would normally terminate an expression such as semicolon.
11737 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11738 otherwise it is mandatory. It is up to the caller to ensure that the
11739 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11740 the source of the code to be parsed and the lexical context for the
11743 The op tree representing the expression is returned. If an optional
11744 expression is absent, a null pointer is returned, otherwise the pointer
11747 If an error occurs in parsing or compilation, in most cases a valid op
11748 tree is returned anyway. The error is reflected in the parser state,
11749 normally resulting in a single exception at the top level of parsing
11750 which covers all the compilation errors that occurred. Some compilation
11751 errors, however, will throw an exception immediately.
11757 Perl_parse_listexpr(pTHX_ U32 flags)
11759 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11763 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
11765 Parse a single complete Perl expression. This allows the full
11766 expression grammar, including the lowest-precedence operators such
11767 as C<or>. The expression must be followed (and thus terminated) by a
11768 token that an expression would normally be terminated by: end-of-file,
11769 closing bracketing punctuation, semicolon, or one of the keywords that
11770 signals a postfix expression-statement modifier. If I<flags> includes
11771 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11772 mandatory. It is up to the caller to ensure that the dynamic parser
11773 state (L</PL_parser> et al) is correctly set to reflect the source of
11774 the code to be parsed and the lexical context for the expression.
11776 The op tree representing the expression is returned. If an optional
11777 expression is absent, a null pointer is returned, otherwise the pointer
11780 If an error occurs in parsing or compilation, in most cases a valid op
11781 tree is returned anyway. The error is reflected in the parser state,
11782 normally resulting in a single exception at the top level of parsing
11783 which covers all the compilation errors that occurred. Some compilation
11784 errors, however, will throw an exception immediately.
11790 Perl_parse_fullexpr(pTHX_ U32 flags)
11792 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11796 =for apidoc Amx|OP *|parse_block|U32 flags
11798 Parse a single complete Perl code block. This consists of an opening
11799 brace, a sequence of statements, and a closing brace. The block
11800 constitutes a lexical scope, so C<my> variables and various compile-time
11801 effects can be contained within it. It is up to the caller to ensure
11802 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11803 reflect the source of the code to be parsed and the lexical context for
11806 The op tree representing the code block is returned. This is always a
11807 real op, never a null pointer. It will normally be a C<lineseq> list,
11808 including C<nextstate> or equivalent ops. No ops to construct any kind
11809 of runtime scope are included by virtue of it being a block.
11811 If an error occurs in parsing or compilation, in most cases a valid op
11812 tree (most likely null) is returned anyway. The error is reflected in
11813 the parser state, normally resulting in a single exception at the top
11814 level of parsing which covers all the compilation errors that occurred.
11815 Some compilation errors, however, will throw an exception immediately.
11817 The I<flags> parameter is reserved for future use, and must always
11824 Perl_parse_block(pTHX_ U32 flags)
11827 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11828 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11832 =for apidoc Amx|OP *|parse_barestmt|U32 flags
11834 Parse a single unadorned Perl statement. This may be a normal imperative
11835 statement or a declaration that has compile-time effect. It does not
11836 include any label or other affixture. It is up to the caller to ensure
11837 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11838 reflect the source of the code to be parsed and the lexical context for
11841 The op tree representing the statement is returned. This may be a
11842 null pointer if the statement is null, for example if it was actually
11843 a subroutine definition (which has compile-time side effects). If not
11844 null, it will be ops directly implementing the statement, suitable to
11845 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
11846 equivalent op (except for those embedded in a scope contained entirely
11847 within the statement).
11849 If an error occurs in parsing or compilation, in most cases a valid op
11850 tree (most likely null) is returned anyway. The error is reflected in
11851 the parser state, normally resulting in a single exception at the top
11852 level of parsing which covers all the compilation errors that occurred.
11853 Some compilation errors, however, will throw an exception immediately.
11855 The I<flags> parameter is reserved for future use, and must always
11862 Perl_parse_barestmt(pTHX_ U32 flags)
11865 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11866 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11870 =for apidoc Amx|SV *|parse_label|U32 flags
11872 Parse a single label, possibly optional, of the type that may prefix a
11873 Perl statement. It is up to the caller to ensure that the dynamic parser
11874 state (L</PL_parser> et al) is correctly set to reflect the source of
11875 the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
11876 label is optional, otherwise it is mandatory.
11878 The name of the label is returned in the form of a fresh scalar. If an
11879 optional label is absent, a null pointer is returned.
11881 If an error occurs in parsing, which can only occur if the label is
11882 mandatory, a valid label is returned anyway. The error is reflected in
11883 the parser state, normally resulting in a single exception at the top
11884 level of parsing which covers all the compilation errors that occurred.
11890 Perl_parse_label(pTHX_ U32 flags)
11892 if (flags & ~PARSE_OPTIONAL)
11893 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11894 if (PL_lex_state == LEX_KNOWNEXT) {
11895 PL_parser->yychar = yylex();
11896 if (PL_parser->yychar == LABEL) {
11897 char * const lpv = pl_yylval.pval;
11898 STRLEN llen = strlen(lpv);
11899 PL_parser->yychar = YYEMPTY;
11900 return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
11907 STRLEN wlen, bufptr_pos;
11910 if (!isIDFIRST_lazy_if(s, UTF))
11912 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
11913 if (word_takes_any_delimeter(s, wlen))
11915 bufptr_pos = s - SvPVX(PL_linestr);
11917 lex_read_space(LEX_KEEP_PREVIOUS);
11919 s = SvPVX(PL_linestr) + bufptr_pos;
11920 if (t[0] == ':' && t[1] != ':') {
11921 PL_oldoldbufptr = PL_oldbufptr;
11924 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
11928 if (flags & PARSE_OPTIONAL) {
11931 qerror(Perl_mess(aTHX_ "Parse error"));
11932 return newSVpvs("x");
11939 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
11941 Parse a single complete Perl statement. This may be a normal imperative
11942 statement or a declaration that has compile-time effect, and may include
11943 optional labels. It is up to the caller to ensure that the dynamic
11944 parser state (L</PL_parser> et al) is correctly set to reflect the source
11945 of the code to be parsed and the lexical context for the statement.
11947 The op tree representing the statement is returned. This may be a
11948 null pointer if the statement is null, for example if it was actually
11949 a subroutine definition (which has compile-time side effects). If not
11950 null, it will be the result of a L</newSTATEOP> call, normally including
11951 a C<nextstate> or equivalent op.
11953 If an error occurs in parsing or compilation, in most cases a valid op
11954 tree (most likely null) is returned anyway. The error is reflected in
11955 the parser state, normally resulting in a single exception at the top
11956 level of parsing which covers all the compilation errors that occurred.
11957 Some compilation errors, however, will throw an exception immediately.
11959 The I<flags> parameter is reserved for future use, and must always
11966 Perl_parse_fullstmt(pTHX_ U32 flags)
11969 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11970 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11974 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
11976 Parse a sequence of zero or more Perl statements. These may be normal
11977 imperative statements, including optional labels, or declarations
11978 that have compile-time effect, or any mixture thereof. The statement
11979 sequence ends when a closing brace or end-of-file is encountered in a
11980 place where a new statement could have validly started. It is up to
11981 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11982 is correctly set to reflect the source of the code to be parsed and the
11983 lexical context for the statements.
11985 The op tree representing the statement sequence is returned. This may
11986 be a null pointer if the statements were all null, for example if there
11987 were no statements or if there were only subroutine definitions (which
11988 have compile-time side effects). If not null, it will be a C<lineseq>
11989 list, normally including C<nextstate> or equivalent ops.
11991 If an error occurs in parsing or compilation, in most cases a valid op
11992 tree is returned anyway. The error is reflected in the parser state,
11993 normally resulting in a single exception at the top level of parsing
11994 which covers all the compilation errors that occurred. Some compilation
11995 errors, however, will throw an exception immediately.
11997 The I<flags> parameter is reserved for future use, and must always
12004 Perl_parse_stmtseq(pTHX_ U32 flags)
12009 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
12010 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
12011 c = lex_peek_unichar(0);
12012 if (c != -1 && c != /*{*/'}')
12013 qerror(Perl_mess(aTHX_ "Parse error"));
12019 * c-indentation-style: bsd
12020 * c-basic-offset: 4
12021 * indent-tabs-mode: nil
12024 * ex: set ts=8 sts=4 sw=4 et: