3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 =head1 Lexer interface
27 This is the lower layer of the Perl parser, managing characters and tokens.
29 =for apidoc AmU|yy_parser *|PL_parser
31 Pointer to a structure encapsulating the state of the parsing operation
32 currently in progress. The pointer can be locally changed to perform
33 a nested parse without interfering with the state of an outer parse.
34 Individual members of C<PL_parser> have their own documentation.
40 #define PERL_IN_TOKE_C
42 #include "dquote_static.c"
44 #define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
47 #define pl_yylval (PL_parser->yylval)
49 /* XXX temporary backwards compatibility */
50 #define PL_lex_brackets (PL_parser->lex_brackets)
51 #define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52 #define PL_lex_fakeeof (PL_parser->lex_fakeeof)
53 #define PL_lex_brackstack (PL_parser->lex_brackstack)
54 #define PL_lex_casemods (PL_parser->lex_casemods)
55 #define PL_lex_casestack (PL_parser->lex_casestack)
56 #define PL_lex_defer (PL_parser->lex_defer)
57 #define PL_lex_dojoin (PL_parser->lex_dojoin)
58 #define PL_lex_expect (PL_parser->lex_expect)
59 #define PL_lex_formbrack (PL_parser->lex_formbrack)
60 #define PL_lex_inpat (PL_parser->lex_inpat)
61 #define PL_lex_inwhat (PL_parser->lex_inwhat)
62 #define PL_lex_op (PL_parser->lex_op)
63 #define PL_lex_repl (PL_parser->lex_repl)
64 #define PL_lex_starts (PL_parser->lex_starts)
65 #define PL_lex_stuff (PL_parser->lex_stuff)
66 #define PL_multi_start (PL_parser->multi_start)
67 #define PL_multi_open (PL_parser->multi_open)
68 #define PL_multi_close (PL_parser->multi_close)
69 #define PL_preambled (PL_parser->preambled)
70 #define PL_sublex_info (PL_parser->sublex_info)
71 #define PL_linestr (PL_parser->linestr)
72 #define PL_expect (PL_parser->expect)
73 #define PL_copline (PL_parser->copline)
74 #define PL_bufptr (PL_parser->bufptr)
75 #define PL_oldbufptr (PL_parser->oldbufptr)
76 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
77 #define PL_linestart (PL_parser->linestart)
78 #define PL_bufend (PL_parser->bufend)
79 #define PL_last_uni (PL_parser->last_uni)
80 #define PL_last_lop (PL_parser->last_lop)
81 #define PL_last_lop_op (PL_parser->last_lop_op)
82 #define PL_lex_state (PL_parser->lex_state)
83 #define PL_rsfp (PL_parser->rsfp)
84 #define PL_rsfp_filters (PL_parser->rsfp_filters)
85 #define PL_in_my (PL_parser->in_my)
86 #define PL_in_my_stash (PL_parser->in_my_stash)
87 #define PL_tokenbuf (PL_parser->tokenbuf)
88 #define PL_multi_end (PL_parser->multi_end)
89 #define PL_error_count (PL_parser->error_count)
92 # define PL_endwhite (PL_parser->endwhite)
93 # define PL_faketokens (PL_parser->faketokens)
94 # define PL_lasttoke (PL_parser->lasttoke)
95 # define PL_nextwhite (PL_parser->nextwhite)
96 # define PL_realtokenstart (PL_parser->realtokenstart)
97 # define PL_skipwhite (PL_parser->skipwhite)
98 # define PL_thisclose (PL_parser->thisclose)
99 # define PL_thismad (PL_parser->thismad)
100 # define PL_thisopen (PL_parser->thisopen)
101 # define PL_thisstuff (PL_parser->thisstuff)
102 # define PL_thistoken (PL_parser->thistoken)
103 # define PL_thiswhite (PL_parser->thiswhite)
104 # define PL_thiswhite (PL_parser->thiswhite)
105 # define PL_nexttoke (PL_parser->nexttoke)
106 # define PL_curforce (PL_parser->curforce)
108 # define PL_nexttoke (PL_parser->nexttoke)
109 # define PL_nexttype (PL_parser->nexttype)
110 # define PL_nextval (PL_parser->nextval)
113 static const char* const ident_too_long = "Identifier too long";
116 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
117 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
119 # define CURMAD(slot,sv)
120 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
123 #define XENUMMASK 0x3f
124 #define XFAKEEOF 0x40
125 #define XFAKEBRACK 0x80
127 #ifdef USE_UTF8_SCRIPTS
128 # define UTF (!IN_BYTES)
130 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
133 /* The maximum number of characters preceding the unrecognized one to display */
134 #define UNRECOGNIZED_PRECEDE_COUNT 10
136 /* In variables named $^X, these are the legal values for X.
137 * 1999-02-27 mjd-perl-patch@plover.com */
138 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
140 #define SPACE_OR_TAB(c) isBLANK_A(c)
142 /* LEX_* are values for PL_lex_state, the state of the lexer.
143 * They are arranged oddly so that the guard on the switch statement
144 * can get by with a single comparison (if the compiler is smart enough).
146 * These values refer to the various states within a sublex parse,
147 * i.e. within a double quotish string
150 /* #define LEX_NOTPARSING 11 is done in perl.h. */
152 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
153 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
154 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
155 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
156 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
158 /* at end of code, eg "$x" followed by: */
159 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
160 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
162 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
163 string or after \E, $foo, etc */
164 #define LEX_INTERPCONST 2 /* NOT USED */
165 #define LEX_FORMLINE 1 /* expecting a format line */
166 #define LEX_KNOWNEXT 0 /* next token known; just return it */
170 static const char* const lex_state_names[] = {
189 #include "keywords.h"
191 /* CLINE is a macro that ensures PL_copline has a sane value */
196 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
199 # define SKIPSPACE0(s) skipspace0(s)
200 # define SKIPSPACE1(s) skipspace1(s)
201 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
202 # define PEEKSPACE(s) skipspace2(s,0)
204 # define SKIPSPACE0(s) skipspace(s)
205 # define SKIPSPACE1(s) skipspace(s)
206 # define SKIPSPACE2(s,tsv) skipspace(s)
207 # define PEEKSPACE(s) skipspace(s)
211 * Convenience functions to return different tokens and prime the
212 * lexer for the next token. They all take an argument.
214 * TOKEN : generic token (used for '(', DOLSHARP, etc)
215 * OPERATOR : generic operator
216 * AOPERATOR : assignment operator
217 * PREBLOCK : beginning the block after an if, while, foreach, ...
218 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
219 * PREREF : *EXPR where EXPR is not a simple identifier
220 * TERM : expression term
221 * LOOPX : loop exiting command (goto, last, dump, etc)
222 * FTST : file test operator
223 * FUN0 : zero-argument function
224 * FUN0OP : zero-argument function, with its op created in this file
225 * FUN1 : not used, except for not, which isn't a UNIOP
226 * BOop : bitwise or or xor
228 * SHop : shift operator
229 * PWop : power operator
230 * PMop : pattern-matching operator
231 * Aop : addition-level operator
232 * Mop : multiplication-level operator
233 * Eop : equality-testing operator
234 * Rop : relational operator <= != gt
236 * Also see LOP and lop() below.
239 #ifdef DEBUGGING /* Serve -DT. */
240 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
242 # define REPORT(retval) (retval)
245 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
246 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
247 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
248 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
249 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
250 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
251 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
252 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
253 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
254 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
255 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
256 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
257 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
258 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
259 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
260 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
261 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
262 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
263 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
264 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
265 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
267 /* This bit of chicanery makes a unary function followed by
268 * a parenthesis into a function with one argument, highest precedence.
269 * The UNIDOR macro is for unary functions that can be followed by the //
270 * operator (such as C<shift // 0>).
272 #define UNI3(f,x,have_x) { \
273 pl_yylval.ival = f; \
274 if (have_x) PL_expect = x; \
276 PL_last_uni = PL_oldbufptr; \
277 PL_last_lop_op = f; \
279 return REPORT( (int)FUNC1 ); \
281 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
283 #define UNI(f) UNI3(f,XTERM,1)
284 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
285 #define UNIPROTO(f,optional) { \
286 if (optional) PL_last_uni = PL_oldbufptr; \
290 #define UNIBRACK(f) UNI3(f,0,0)
292 /* grandfather return to old style */
295 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
296 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
297 pl_yylval.ival = (f); \
303 #define COPLINE_INC_WITH_HERELINES \
305 CopLINE_inc(PL_curcop); \
306 if (PL_parser->herelines) \
307 CopLINE(PL_curcop) += PL_parser->herelines, \
308 PL_parser->herelines = 0; \
310 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
311 * is no sublex_push to follow. */
312 #define COPLINE_SET_FROM_MULTI_END \
314 CopLINE_set(PL_curcop, PL_multi_end); \
315 if (PL_multi_end != PL_multi_start) \
316 PL_parser->herelines = 0; \
322 /* how to interpret the pl_yylval associated with the token */
326 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
331 static struct debug_tokens {
333 enum token_type type;
335 } const debug_tokens[] =
337 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
338 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
339 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
340 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
341 { ARROW, TOKENTYPE_NONE, "ARROW" },
342 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
343 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
344 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
345 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
346 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
347 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
348 { DO, TOKENTYPE_NONE, "DO" },
349 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
350 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
351 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
352 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
353 { ELSE, TOKENTYPE_NONE, "ELSE" },
354 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
355 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
356 { FOR, TOKENTYPE_IVAL, "FOR" },
357 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
358 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
359 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
360 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
361 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
362 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
363 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
364 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
365 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
366 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
367 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
368 { IF, TOKENTYPE_IVAL, "IF" },
369 { LABEL, TOKENTYPE_PVAL, "LABEL" },
370 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
371 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
372 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
373 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
374 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
375 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
376 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
377 { MY, TOKENTYPE_IVAL, "MY" },
378 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
379 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
380 { OROP, TOKENTYPE_IVAL, "OROP" },
381 { OROR, TOKENTYPE_NONE, "OROR" },
382 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
383 { PEG, TOKENTYPE_NONE, "PEG" },
384 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
385 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
386 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
387 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
388 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
389 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
390 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
391 { PREINC, TOKENTYPE_NONE, "PREINC" },
392 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
393 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
394 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
395 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
396 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
397 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
398 { SUB, TOKENTYPE_NONE, "SUB" },
399 { THING, TOKENTYPE_OPVAL, "THING" },
400 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
401 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
402 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
403 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
404 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
405 { USE, TOKENTYPE_IVAL, "USE" },
406 { WHEN, TOKENTYPE_IVAL, "WHEN" },
407 { WHILE, TOKENTYPE_IVAL, "WHILE" },
408 { WORD, TOKENTYPE_OPVAL, "WORD" },
409 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
410 { 0, TOKENTYPE_NONE, NULL }
413 /* dump the returned token in rv, plus any optional arg in pl_yylval */
416 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
420 PERL_ARGS_ASSERT_TOKEREPORT;
423 const char *name = NULL;
424 enum token_type type = TOKENTYPE_NONE;
425 const struct debug_tokens *p;
426 SV* const report = newSVpvs("<== ");
428 for (p = debug_tokens; p->token; p++) {
429 if (p->token == (int)rv) {
436 Perl_sv_catpv(aTHX_ report, name);
437 else if ((char)rv > ' ' && (char)rv <= '~')
439 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
441 sv_catpvs(report, " (pending identifier)");
444 sv_catpvs(report, "EOF");
446 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
451 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
453 case TOKENTYPE_OPNUM:
454 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
455 PL_op_name[lvalp->ival]);
458 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
460 case TOKENTYPE_OPVAL:
462 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
463 PL_op_name[lvalp->opval->op_type]);
464 if (lvalp->opval->op_type == OP_CONST) {
465 Perl_sv_catpvf(aTHX_ report, " %s",
466 SvPEEK(cSVOPx_sv(lvalp->opval)));
471 sv_catpvs(report, "(opval=null)");
474 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
480 /* print the buffer with suitable escapes */
483 S_printbuf(pTHX_ const char *const fmt, const char *const s)
485 SV* const tmp = newSVpvs("");
487 PERL_ARGS_ASSERT_PRINTBUF;
489 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
496 S_deprecate_commaless_var_list(pTHX) {
498 deprecate("comma-less variable list");
499 return REPORT(','); /* grandfather non-comma-format format */
505 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
506 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
510 S_ao(pTHX_ int toketype)
513 if (*PL_bufptr == '=') {
515 if (toketype == ANDAND)
516 pl_yylval.ival = OP_ANDASSIGN;
517 else if (toketype == OROR)
518 pl_yylval.ival = OP_ORASSIGN;
519 else if (toketype == DORDOR)
520 pl_yylval.ival = OP_DORASSIGN;
528 * When Perl expects an operator and finds something else, no_op
529 * prints the warning. It always prints "<something> found where
530 * operator expected. It prints "Missing semicolon on previous line?"
531 * if the surprise occurs at the start of the line. "do you need to
532 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
533 * where the compiler doesn't know if foo is a method call or a function.
534 * It prints "Missing operator before end of line" if there's nothing
535 * after the missing operator, or "... before <...>" if there is something
536 * after the missing operator.
540 S_no_op(pTHX_ const char *const what, char *s)
543 char * const oldbp = PL_bufptr;
544 const bool is_first = (PL_oldbufptr == PL_linestart);
546 PERL_ARGS_ASSERT_NO_OP;
552 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
553 if (ckWARN_d(WARN_SYNTAX)) {
555 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
556 "\t(Missing semicolon on previous line?)\n");
557 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
559 for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
560 t += UTF ? UTF8SKIP(t) : 1)
562 if (t < PL_bufptr && isSPACE(*t))
563 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
564 "\t(Do you need to predeclare %"UTF8f"?)\n",
565 UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
569 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
570 "\t(Missing operator before %"UTF8f"?)\n",
571 UTF8fARG(UTF, s - oldbp, oldbp));
579 * Complain about missing quote/regexp/heredoc terminator.
580 * If it's called with NULL then it cauterizes the line buffer.
581 * If we're in a delimited string and the delimiter is a control
582 * character, it's reformatted into a two-char sequence like ^C.
587 S_missingterm(pTHX_ char *s)
593 char * const nl = strrchr(s,'\n');
597 else if ((U8) PL_multi_close < 32) {
599 tmpbuf[1] = (char)toCTRL(PL_multi_close);
604 *tmpbuf = (char)PL_multi_close;
608 q = strchr(s,'"') ? '\'' : '"';
609 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
615 * Check whether the named feature is enabled.
618 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
621 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
623 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
625 assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
627 if (namelen > MAX_FEATURE_LEN)
629 memcpy(&he_name[8], name, namelen);
631 return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
632 REFCOUNTED_HE_EXISTS));
636 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
637 * utf16-to-utf8-reversed.
640 #ifdef PERL_CR_FILTER
644 const char *s = SvPVX_const(sv);
645 const char * const e = s + SvCUR(sv);
647 PERL_ARGS_ASSERT_STRIP_RETURN;
649 /* outer loop optimized to do nothing if there are no CR-LFs */
651 if (*s++ == '\r' && *s == '\n') {
652 /* hit a CR-LF, need to copy the rest */
656 if (*s == '\r' && s[1] == '\n')
667 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
669 const I32 count = FILTER_READ(idx+1, sv, maxlen);
670 if (count > 0 && !maxlen)
677 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
679 Creates and initialises a new lexer/parser state object, supplying
680 a context in which to lex and parse from a new source of Perl code.
681 A pointer to the new state object is placed in L</PL_parser>. An entry
682 is made on the save stack so that upon unwinding the new state object
683 will be destroyed and the former value of L</PL_parser> will be restored.
684 Nothing else need be done to clean up the parsing context.
686 The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
687 non-null, provides a string (in SV form) containing code to be parsed.
688 A copy of the string is made, so subsequent modification of I<line>
689 does not affect parsing. I<rsfp>, if non-null, provides an input stream
690 from which code will be read to be parsed. If both are non-null, the
691 code in I<line> comes first and must consist of complete lines of input,
692 and I<rsfp> supplies the remainder of the source.
694 The I<flags> parameter is reserved for future use. Currently it is only
695 used by perl internally, so extensions should always pass zero.
700 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
701 can share filters with the current parser.
702 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
703 caller, hence isn't owned by the parser, so shouldn't be closed on parser
704 destruction. This is used to handle the case of defaulting to reading the
705 script from the standard input because no filename was given on the command
706 line (without getting confused by situation where STDIN has been closed, so
707 the script handle is opened on fd 0) */
710 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
713 const char *s = NULL;
714 yy_parser *parser, *oparser;
715 if (flags && flags & ~LEX_START_FLAGS)
716 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
718 /* create and initialise a parser */
720 Newxz(parser, 1, yy_parser);
721 parser->old_parser = oparser = PL_parser;
724 parser->stack = NULL;
726 parser->stack_size = 0;
728 /* on scope exit, free this parser and restore any outer one */
730 parser->saved_curcop = PL_curcop;
732 /* initialise lexer state */
735 parser->curforce = -1;
737 parser->nexttoke = 0;
739 parser->error_count = oparser ? oparser->error_count : 0;
740 parser->copline = parser->preambling = NOLINE;
741 parser->lex_state = LEX_NORMAL;
742 parser->expect = XSTATE;
744 parser->rsfp_filters =
745 !(flags & LEX_START_SAME_FILTER) || !oparser
747 : MUTABLE_AV(SvREFCNT_inc(
748 oparser->rsfp_filters
749 ? oparser->rsfp_filters
750 : (oparser->rsfp_filters = newAV())
753 Newx(parser->lex_brackstack, 120, char);
754 Newx(parser->lex_casestack, 12, char);
755 *parser->lex_casestack = '\0';
756 Newxz(parser->lex_shared, 1, LEXSHARED);
760 s = SvPV_const(line, len);
761 parser->linestr = flags & LEX_START_COPIED
762 ? SvREFCNT_inc_simple_NN(line)
763 : newSVpvn_flags(s, len, SvUTF8(line));
764 sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2);
766 parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
768 parser->oldoldbufptr =
771 parser->linestart = SvPVX(parser->linestr);
772 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
773 parser->last_lop = parser->last_uni = NULL;
774 parser->lex_flags = flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
775 |LEX_DONT_CLOSE_RSFP);
777 parser->in_pod = parser->filtered = 0;
781 /* delete a parser object */
784 Perl_parser_free(pTHX_ const yy_parser *parser)
786 PERL_ARGS_ASSERT_PARSER_FREE;
788 PL_curcop = parser->saved_curcop;
789 SvREFCNT_dec(parser->linestr);
791 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
792 PerlIO_clearerr(parser->rsfp);
793 else if (parser->rsfp && (!parser->old_parser ||
794 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
795 PerlIO_close(parser->rsfp);
796 SvREFCNT_dec(parser->rsfp_filters);
797 SvREFCNT_dec(parser->lex_stuff);
798 SvREFCNT_dec(parser->sublex_info.repl);
800 Safefree(parser->lex_brackstack);
801 Safefree(parser->lex_casestack);
802 Safefree(parser->lex_shared);
803 PL_parser = parser->old_parser;
808 Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab)
811 I32 nexttoke = parser->lasttoke;
813 I32 nexttoke = parser->nexttoke;
815 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
818 if (S_is_opval_token(parser->nexttoke[nexttoke].next_type
820 && parser->nexttoke[nexttoke].next_val.opval
821 && parser->nexttoke[nexttoke].next_val.opval->op_slabbed
822 && OpSLAB(parser->nexttoke[nexttoke].next_val.opval) == slab) {
823 op_free(parser->nexttoke[nexttoke].next_val.opval);
824 parser->nexttoke[nexttoke].next_val.opval = NULL;
827 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
828 && parser->nextval[nexttoke].opval
829 && parser->nextval[nexttoke].opval->op_slabbed
830 && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
831 op_free(parser->nextval[nexttoke].opval);
832 parser->nextval[nexttoke].opval = NULL;
840 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
842 Buffer scalar containing the chunk currently under consideration of the
843 text currently being lexed. This is always a plain string scalar (for
844 which C<SvPOK> is true). It is not intended to be used as a scalar by
845 normal scalar means; instead refer to the buffer directly by the pointer
846 variables described below.
848 The lexer maintains various C<char*> pointers to things in the
849 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
850 reallocated, all of these pointers must be updated. Don't attempt to
851 do this manually, but rather use L</lex_grow_linestr> if you need to
852 reallocate the buffer.
854 The content of the text chunk in the buffer is commonly exactly one
855 complete line of input, up to and including a newline terminator,
856 but there are situations where it is otherwise. The octets of the
857 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
858 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
859 flag on this scalar, which may disagree with it.
861 For direct examination of the buffer, the variable
862 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
863 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
864 of these pointers is usually preferable to examination of the scalar
865 through normal scalar means.
867 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
869 Direct pointer to the end of the chunk of text currently being lexed, the
870 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
871 + SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
872 always located at the end of the buffer, and does not count as part of
873 the buffer's contents.
875 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
877 Points to the current position of lexing inside the lexer buffer.
878 Characters around this point may be freely examined, within
879 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
880 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
881 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
883 Lexing code (whether in the Perl core or not) moves this pointer past
884 the characters that it consumes. It is also expected to perform some
885 bookkeeping whenever a newline character is consumed. This movement
886 can be more conveniently performed by the function L</lex_read_to>,
887 which handles newlines appropriately.
889 Interpretation of the buffer's octets can be abstracted out by
890 using the slightly higher-level functions L</lex_peek_unichar> and
891 L</lex_read_unichar>.
893 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
895 Points to the start of the current line inside the lexer buffer.
896 This is useful for indicating at which column an error occurred, and
897 not much else. This must be updated by any lexing code that consumes
898 a newline; the function L</lex_read_to> handles this detail.
904 =for apidoc Amx|bool|lex_bufutf8
906 Indicates whether the octets in the lexer buffer
907 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
908 of Unicode characters. If not, they should be interpreted as Latin-1
909 characters. This is analogous to the C<SvUTF8> flag for scalars.
911 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
912 contains valid UTF-8. Lexing code must be robust in the face of invalid
915 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
916 is significant, but not the whole story regarding the input character
917 encoding. Normally, when a file is being read, the scalar contains octets
918 and its C<SvUTF8> flag is off, but the octets should be interpreted as
919 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
920 however, the scalar may have the C<SvUTF8> flag on, and in this case its
921 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
922 is in effect. This logic may change in the future; use this function
923 instead of implementing the logic yourself.
929 Perl_lex_bufutf8(pTHX)
935 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
937 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
938 at least I<len> octets (including terminating NUL). Returns a
939 pointer to the reallocated buffer. This is necessary before making
940 any direct modification of the buffer that would increase its length.
941 L</lex_stuff_pvn> provides a more convenient way to insert text into
944 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
945 this function updates all of the lexer's variables that point directly
952 Perl_lex_grow_linestr(pTHX_ STRLEN len)
956 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
957 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
958 linestr = PL_parser->linestr;
959 buf = SvPVX(linestr);
960 if (len <= SvLEN(linestr))
962 bufend_pos = PL_parser->bufend - buf;
963 bufptr_pos = PL_parser->bufptr - buf;
964 oldbufptr_pos = PL_parser->oldbufptr - buf;
965 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
966 linestart_pos = PL_parser->linestart - buf;
967 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
968 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
969 re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
970 PL_parser->lex_shared->re_eval_start - buf : 0;
972 buf = sv_grow(linestr, len);
974 PL_parser->bufend = buf + bufend_pos;
975 PL_parser->bufptr = buf + bufptr_pos;
976 PL_parser->oldbufptr = buf + oldbufptr_pos;
977 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
978 PL_parser->linestart = buf + linestart_pos;
979 if (PL_parser->last_uni)
980 PL_parser->last_uni = buf + last_uni_pos;
981 if (PL_parser->last_lop)
982 PL_parser->last_lop = buf + last_lop_pos;
983 if (PL_parser->lex_shared->re_eval_start)
984 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
989 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
991 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
992 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
993 reallocating the buffer if necessary. This means that lexing code that
994 runs later will see the characters as if they had appeared in the input.
995 It is not recommended to do this as part of normal parsing, and most
996 uses of this facility run the risk of the inserted characters being
997 interpreted in an unintended manner.
999 The string to be inserted is represented by I<len> octets starting
1000 at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
1001 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
1002 The characters are recoded for the lexer buffer, according to how the
1003 buffer is currently being interpreted (L</lex_bufutf8>). If a string
1004 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1005 function is more convenient.
1011 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1015 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1016 if (flags & ~(LEX_STUFF_UTF8))
1017 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1019 if (flags & LEX_STUFF_UTF8) {
1022 STRLEN highhalf = 0; /* Count of variants */
1023 const char *p, *e = pv+len;
1024 for (p = pv; p != e; p++) {
1025 if (! UTF8_IS_INVARIANT(*p)) {
1031 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1032 bufptr = PL_parser->bufptr;
1033 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1034 SvCUR_set(PL_parser->linestr,
1035 SvCUR(PL_parser->linestr) + len+highhalf);
1036 PL_parser->bufend += len+highhalf;
1037 for (p = pv; p != e; p++) {
1039 if (! UTF8_IS_INVARIANT(c)) {
1040 *bufptr++ = UTF8_TWO_BYTE_HI(c);
1041 *bufptr++ = UTF8_TWO_BYTE_LO(c);
1043 *bufptr++ = (char)c;
1048 if (flags & LEX_STUFF_UTF8) {
1049 STRLEN highhalf = 0;
1050 const char *p, *e = pv+len;
1051 for (p = pv; p != e; p++) {
1053 if (UTF8_IS_ABOVE_LATIN1(c)) {
1054 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1055 "non-Latin-1 character into Latin-1 input");
1056 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1059 } else if (! UTF8_IS_INVARIANT(c)) {
1060 /* malformed UTF-8 */
1062 SAVESPTR(PL_warnhook);
1063 PL_warnhook = PERL_WARNHOOK_FATAL;
1064 utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
1070 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1071 bufptr = PL_parser->bufptr;
1072 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1073 SvCUR_set(PL_parser->linestr,
1074 SvCUR(PL_parser->linestr) + len-highhalf);
1075 PL_parser->bufend += len-highhalf;
1078 if (UTF8_IS_INVARIANT(*p)) {
1084 *bufptr++ = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
1090 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1091 bufptr = PL_parser->bufptr;
1092 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1093 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1094 PL_parser->bufend += len;
1095 Copy(pv, bufptr, len, char);
1101 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1103 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1104 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1105 reallocating the buffer if necessary. This means that lexing code that
1106 runs later will see the characters as if they had appeared in the input.
1107 It is not recommended to do this as part of normal parsing, and most
1108 uses of this facility run the risk of the inserted characters being
1109 interpreted in an unintended manner.
1111 The string to be inserted is represented by octets starting at I<pv>
1112 and continuing to the first nul. These octets are interpreted as either
1113 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1114 in I<flags>. The characters are recoded for the lexer buffer, according
1115 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1116 If it is not convenient to nul-terminate a string to be inserted, the
1117 L</lex_stuff_pvn> function is more appropriate.
1123 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1125 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1126 lex_stuff_pvn(pv, strlen(pv), flags);
1130 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1132 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1133 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1134 reallocating the buffer if necessary. This means that lexing code that
1135 runs later will see the characters as if they had appeared in the input.
1136 It is not recommended to do this as part of normal parsing, and most
1137 uses of this facility run the risk of the inserted characters being
1138 interpreted in an unintended manner.
1140 The string to be inserted is the string value of I<sv>. The characters
1141 are recoded for the lexer buffer, according to how the buffer is currently
1142 being interpreted (L</lex_bufutf8>). If a string to be inserted is
1143 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1144 need to construct a scalar.
1150 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1154 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1156 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1158 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1162 =for apidoc Amx|void|lex_unstuff|char *ptr
1164 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1165 I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1166 This hides the discarded text from any lexing code that runs later,
1167 as if the text had never appeared.
1169 This is not the normal way to consume lexed text. For that, use
1176 Perl_lex_unstuff(pTHX_ char *ptr)
1180 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1181 buf = PL_parser->bufptr;
1183 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1186 bufend = PL_parser->bufend;
1188 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1189 unstuff_len = ptr - buf;
1190 Move(ptr, buf, bufend+1-ptr, char);
1191 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1192 PL_parser->bufend = bufend - unstuff_len;
1196 =for apidoc Amx|void|lex_read_to|char *ptr
1198 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1199 to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1200 performing the correct bookkeeping whenever a newline character is passed.
1201 This is the normal way to consume lexed text.
1203 Interpretation of the buffer's octets can be abstracted out by
1204 using the slightly higher-level functions L</lex_peek_unichar> and
1205 L</lex_read_unichar>.
1211 Perl_lex_read_to(pTHX_ char *ptr)
1214 PERL_ARGS_ASSERT_LEX_READ_TO;
1215 s = PL_parser->bufptr;
1216 if (ptr < s || ptr > PL_parser->bufend)
1217 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1218 for (; s != ptr; s++)
1220 COPLINE_INC_WITH_HERELINES;
1221 PL_parser->linestart = s+1;
1223 PL_parser->bufptr = ptr;
1227 =for apidoc Amx|void|lex_discard_to|char *ptr
1229 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1230 up to I<ptr>. The remaining content of the buffer will be moved, and
1231 all pointers into the buffer updated appropriately. I<ptr> must not
1232 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1233 it is not permitted to discard text that has yet to be lexed.
1235 Normally it is not necessarily to do this directly, because it suffices to
1236 use the implicit discarding behaviour of L</lex_next_chunk> and things
1237 based on it. However, if a token stretches across multiple lines,
1238 and the lexing code has kept multiple lines of text in the buffer for
1239 that purpose, then after completion of the token it would be wise to
1240 explicitly discard the now-unneeded earlier lines, to avoid future
1241 multi-line tokens growing the buffer without bound.
1247 Perl_lex_discard_to(pTHX_ char *ptr)
1251 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1252 buf = SvPVX(PL_parser->linestr);
1254 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1257 if (ptr > PL_parser->bufptr)
1258 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1259 discard_len = ptr - buf;
1260 if (PL_parser->oldbufptr < ptr)
1261 PL_parser->oldbufptr = ptr;
1262 if (PL_parser->oldoldbufptr < ptr)
1263 PL_parser->oldoldbufptr = ptr;
1264 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1265 PL_parser->last_uni = NULL;
1266 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1267 PL_parser->last_lop = NULL;
1268 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1269 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1270 PL_parser->bufend -= discard_len;
1271 PL_parser->bufptr -= discard_len;
1272 PL_parser->oldbufptr -= discard_len;
1273 PL_parser->oldoldbufptr -= discard_len;
1274 if (PL_parser->last_uni)
1275 PL_parser->last_uni -= discard_len;
1276 if (PL_parser->last_lop)
1277 PL_parser->last_lop -= discard_len;
1281 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1283 Reads in the next chunk of text to be lexed, appending it to
1284 L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1285 looked to the end of the current chunk and wants to know more. It is
1286 usual, but not necessary, for lexing to have consumed the entirety of
1287 the current chunk at this time.
1289 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1290 chunk (i.e., the current chunk has been entirely consumed), normally the
1291 current chunk will be discarded at the same time that the new chunk is
1292 read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1293 will not be discarded. If the current chunk has not been entirely
1294 consumed, then it will not be discarded regardless of the flag.
1296 Returns true if some new text was added to the buffer, or false if the
1297 buffer has reached the end of the input text.
1302 #define LEX_FAKE_EOF 0x80000000
1303 #define LEX_NO_TERM 0x40000000
1306 Perl_lex_next_chunk(pTHX_ U32 flags)
1310 STRLEN old_bufend_pos, new_bufend_pos;
1311 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1312 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1313 bool got_some_for_debugger = 0;
1315 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1316 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1317 linestr = PL_parser->linestr;
1318 buf = SvPVX(linestr);
1319 if (!(flags & LEX_KEEP_PREVIOUS) &&
1320 PL_parser->bufptr == PL_parser->bufend) {
1321 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1323 if (PL_parser->last_uni != PL_parser->bufend)
1324 PL_parser->last_uni = NULL;
1325 if (PL_parser->last_lop != PL_parser->bufend)
1326 PL_parser->last_lop = NULL;
1327 last_uni_pos = last_lop_pos = 0;
1331 old_bufend_pos = PL_parser->bufend - buf;
1332 bufptr_pos = PL_parser->bufptr - buf;
1333 oldbufptr_pos = PL_parser->oldbufptr - buf;
1334 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1335 linestart_pos = PL_parser->linestart - buf;
1336 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1337 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1339 if (flags & LEX_FAKE_EOF) {
1341 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1343 } else if (filter_gets(linestr, old_bufend_pos)) {
1345 got_some_for_debugger = 1;
1346 } else if (flags & LEX_NO_TERM) {
1349 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1350 sv_setpvs(linestr, "");
1352 /* End of real input. Close filehandle (unless it was STDIN),
1353 * then add implicit termination.
1355 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1356 PerlIO_clearerr(PL_parser->rsfp);
1357 else if (PL_parser->rsfp)
1358 (void)PerlIO_close(PL_parser->rsfp);
1359 PL_parser->rsfp = NULL;
1360 PL_parser->in_pod = PL_parser->filtered = 0;
1362 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1365 if (!PL_in_eval && PL_minus_p) {
1367 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1368 PL_minus_n = PL_minus_p = 0;
1369 } else if (!PL_in_eval && PL_minus_n) {
1370 sv_catpvs(linestr, /*{*/";}");
1373 sv_catpvs(linestr, ";");
1376 buf = SvPVX(linestr);
1377 new_bufend_pos = SvCUR(linestr);
1378 PL_parser->bufend = buf + new_bufend_pos;
1379 PL_parser->bufptr = buf + bufptr_pos;
1380 PL_parser->oldbufptr = buf + oldbufptr_pos;
1381 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1382 PL_parser->linestart = buf + linestart_pos;
1383 if (PL_parser->last_uni)
1384 PL_parser->last_uni = buf + last_uni_pos;
1385 if (PL_parser->last_lop)
1386 PL_parser->last_lop = buf + last_lop_pos;
1387 if (PL_parser->preambling != NOLINE) {
1388 CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1389 PL_parser->preambling = NOLINE;
1391 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1392 PL_curstash != PL_debstash) {
1393 /* debugger active and we're not compiling the debugger code,
1394 * so store the line into the debugger's array of lines
1396 update_debugger_info(NULL, buf+old_bufend_pos,
1397 new_bufend_pos-old_bufend_pos);
1403 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1405 Looks ahead one (Unicode) character in the text currently being lexed.
1406 Returns the codepoint (unsigned integer value) of the next character,
1407 or -1 if lexing has reached the end of the input text. To consume the
1408 peeked character, use L</lex_read_unichar>.
1410 If the next character is in (or extends into) the next chunk of input
1411 text, the next chunk will be read in. Normally the current chunk will be
1412 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1413 then the current chunk will not be discarded.
1415 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1416 is encountered, an exception is generated.
1422 Perl_lex_peek_unichar(pTHX_ U32 flags)
1426 if (flags & ~(LEX_KEEP_PREVIOUS))
1427 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1428 s = PL_parser->bufptr;
1429 bufend = PL_parser->bufend;
1435 if (!lex_next_chunk(flags))
1437 s = PL_parser->bufptr;
1438 bufend = PL_parser->bufend;
1441 if (UTF8_IS_INVARIANT(head))
1443 if (UTF8_IS_START(head)) {
1444 len = UTF8SKIP(&head);
1445 while ((STRLEN)(bufend-s) < len) {
1446 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1448 s = PL_parser->bufptr;
1449 bufend = PL_parser->bufend;
1452 unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1453 if (retlen == (STRLEN)-1) {
1454 /* malformed UTF-8 */
1456 SAVESPTR(PL_warnhook);
1457 PL_warnhook = PERL_WARNHOOK_FATAL;
1458 utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
1464 if (!lex_next_chunk(flags))
1466 s = PL_parser->bufptr;
1473 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1475 Reads the next (Unicode) character in the text currently being lexed.
1476 Returns the codepoint (unsigned integer value) of the character read,
1477 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1478 if lexing has reached the end of the input text. To non-destructively
1479 examine the next character, use L</lex_peek_unichar> instead.
1481 If the next character is in (or extends into) the next chunk of input
1482 text, the next chunk will be read in. Normally the current chunk will be
1483 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1484 then the current chunk will not be discarded.
1486 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1487 is encountered, an exception is generated.
1493 Perl_lex_read_unichar(pTHX_ U32 flags)
1496 if (flags & ~(LEX_KEEP_PREVIOUS))
1497 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1498 c = lex_peek_unichar(flags);
1501 COPLINE_INC_WITH_HERELINES;
1503 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1505 ++(PL_parser->bufptr);
1511 =for apidoc Amx|void|lex_read_space|U32 flags
1513 Reads optional spaces, in Perl style, in the text currently being
1514 lexed. The spaces may include ordinary whitespace characters and
1515 Perl-style comments. C<#line> directives are processed if encountered.
1516 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1517 at a non-space character (or the end of the input text).
1519 If spaces extend into the next chunk of input text, the next chunk will
1520 be read in. Normally the current chunk will be discarded at the same
1521 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1522 chunk will not be discarded.
1527 #define LEX_NO_INCLINE 0x40000000
1528 #define LEX_NO_NEXT_CHUNK 0x80000000
1531 Perl_lex_read_space(pTHX_ U32 flags)
1534 const bool can_incline = !(flags & LEX_NO_INCLINE);
1535 bool need_incline = 0;
1536 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1537 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1540 sv_free(PL_skipwhite);
1541 PL_skipwhite = NULL;
1544 PL_skipwhite = newSVpvs("");
1545 #endif /* PERL_MAD */
1546 s = PL_parser->bufptr;
1547 bufend = PL_parser->bufend;
1553 } while (!(c == '\n' || (c == 0 && s == bufend)));
1554 } else if (c == '\n') {
1557 PL_parser->linestart = s;
1563 } else if (isSPACE(c)) {
1565 } else if (c == 0 && s == bufend) {
1570 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1571 #endif /* PERL_MAD */
1572 if (flags & LEX_NO_NEXT_CHUNK)
1574 PL_parser->bufptr = s;
1575 l = CopLINE(PL_curcop);
1576 CopLINE(PL_curcop) += PL_parser->herelines + 1;
1577 got_more = lex_next_chunk(flags);
1578 CopLINE_set(PL_curcop, l);
1579 s = PL_parser->bufptr;
1580 bufend = PL_parser->bufend;
1583 if (can_incline && need_incline && PL_parser->rsfp) {
1593 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1594 #endif /* PERL_MAD */
1595 PL_parser->bufptr = s;
1600 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1602 This function performs syntax checking on a prototype, C<proto>.
1603 If C<warn> is true, any illegal characters or mismatched brackets
1604 will trigger illegalproto warnings, declaring that they were
1605 detected in the prototype for C<name>.
1607 The return value is C<true> if this is a valid prototype, and
1608 C<false> if it is not, regardless of whether C<warn> was C<true> or
1611 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1618 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
1620 STRLEN len, origlen;
1621 char *p = proto ? SvPV(proto, len) : NULL;
1622 bool bad_proto = FALSE;
1623 bool in_brackets = FALSE;
1624 bool after_slash = FALSE;
1625 char greedy_proto = ' ';
1626 bool proto_after_greedy_proto = FALSE;
1627 bool must_be_last = FALSE;
1628 bool underscore = FALSE;
1629 bool bad_proto_after_underscore = FALSE;
1631 PERL_ARGS_ASSERT_VALIDATE_PROTO;
1637 for (; len--; p++) {
1640 proto_after_greedy_proto = TRUE;
1642 if (!strchr(";@%", *p))
1643 bad_proto_after_underscore = TRUE;
1646 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1653 in_brackets = FALSE;
1654 else if ((*p == '@' || *p == '%') &&
1657 must_be_last = TRUE;
1666 after_slash = FALSE;
1671 SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1674 ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1675 origlen, UNI_DISPLAY_ISPRINT)
1676 : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1678 if (proto_after_greedy_proto)
1679 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1680 "Prototype after '%c' for %"SVf" : %s",
1681 greedy_proto, SVfARG(name), p);
1683 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1684 "Missing ']' in prototype for %"SVf" : %s",
1687 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1688 "Illegal character in prototype for %"SVf" : %s",
1690 if (bad_proto_after_underscore)
1691 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1692 "Illegal character after '_' in prototype for %"SVf" : %s",
1696 return (! (proto_after_greedy_proto || bad_proto) );
1701 * This subroutine has nothing to do with tilting, whether at windmills
1702 * or pinball tables. Its name is short for "increment line". It
1703 * increments the current line number in CopLINE(PL_curcop) and checks
1704 * to see whether the line starts with a comment of the form
1705 * # line 500 "foo.pm"
1706 * If so, it sets the current line number and file to the values in the comment.
1710 S_incline(pTHX_ const char *s)
1718 PERL_ARGS_ASSERT_INCLINE;
1720 COPLINE_INC_WITH_HERELINES;
1721 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1722 && s+1 == PL_bufend && *s == ';') {
1723 /* fake newline in string eval */
1724 CopLINE_dec(PL_curcop);
1729 while (SPACE_OR_TAB(*s))
1731 if (strnEQ(s, "line", 4))
1735 if (SPACE_OR_TAB(*s))
1739 while (SPACE_OR_TAB(*s))
1747 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1749 while (SPACE_OR_TAB(*s))
1751 if (*s == '"' && (t = strchr(s+1, '"'))) {
1757 while (!isSPACE(*t))
1761 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1763 if (*e != '\n' && *e != '\0')
1764 return; /* false alarm */
1766 line_num = atoi(n)-1;
1769 const STRLEN len = t - s;
1771 if (!PL_rsfp && !PL_parser->filtered) {
1772 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1773 * to *{"::_<newfilename"} */
1774 /* However, the long form of evals is only turned on by the
1775 debugger - usually they're "(eval %lu)" */
1776 GV * const cfgv = CopFILEGV(PL_curcop);
1779 STRLEN tmplen2 = len;
1783 if (tmplen2 + 2 <= sizeof smallbuf)
1786 Newx(tmpbuf2, tmplen2 + 2, char);
1791 memcpy(tmpbuf2 + 2, s, tmplen2);
1794 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1796 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1797 /* adjust ${"::_<newfilename"} to store the new file name */
1798 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1799 /* The line number may differ. If that is the case,
1800 alias the saved lines that are in the array.
1801 Otherwise alias the whole array. */
1802 if (CopLINE(PL_curcop) == line_num) {
1803 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1804 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1806 else if (GvAV(cfgv)) {
1807 AV * const av = GvAV(cfgv);
1808 const I32 start = CopLINE(PL_curcop)+1;
1809 I32 items = AvFILLp(av) - start;
1811 AV * const av2 = GvAVn(gv2);
1812 SV **svp = AvARRAY(av) + start;
1813 I32 l = (I32)line_num+1;
1815 av_store(av2, l++, SvREFCNT_inc(*svp++));
1820 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1823 CopFILE_free(PL_curcop);
1824 CopFILE_setn(PL_curcop, s, len);
1826 CopLINE_set(PL_curcop, line_num);
1829 #define skipspace(s) skipspace_flags(s, 0)
1832 /* skip space before PL_thistoken */
1835 S_skipspace0(pTHX_ char *s)
1837 PERL_ARGS_ASSERT_SKIPSPACE0;
1844 PL_thiswhite = newSVpvs("");
1845 sv_catsv(PL_thiswhite, PL_skipwhite);
1846 sv_free(PL_skipwhite);
1849 PL_realtokenstart = s - SvPVX(PL_linestr);
1853 /* skip space after PL_thistoken */
1856 S_skipspace1(pTHX_ char *s)
1858 const char *start = s;
1859 I32 startoff = start - SvPVX(PL_linestr);
1861 PERL_ARGS_ASSERT_SKIPSPACE1;
1866 start = SvPVX(PL_linestr) + startoff;
1867 if (!PL_thistoken && PL_realtokenstart >= 0) {
1868 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1869 PL_thistoken = newSVpvn(tstart, start - tstart);
1871 PL_realtokenstart = -1;
1874 PL_nextwhite = newSVpvs("");
1875 sv_catsv(PL_nextwhite, PL_skipwhite);
1876 sv_free(PL_skipwhite);
1883 S_skipspace2(pTHX_ char *s, SV **svp)
1886 const I32 startoff = s - SvPVX(PL_linestr);
1888 PERL_ARGS_ASSERT_SKIPSPACE2;
1891 if (!PL_madskills || !svp)
1893 start = SvPVX(PL_linestr) + startoff;
1894 if (!PL_thistoken && PL_realtokenstart >= 0) {
1895 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1896 PL_thistoken = newSVpvn(tstart, start - tstart);
1897 PL_realtokenstart = -1;
1901 *svp = newSVpvs("");
1902 sv_setsv(*svp, PL_skipwhite);
1903 sv_free(PL_skipwhite);
1912 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1914 AV *av = CopFILEAVx(PL_curcop);
1917 if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1919 sv = *av_fetch(av, 0, 1);
1920 SvUPGRADE(sv, SVt_PVMG);
1922 if (!SvPOK(sv)) sv_setpvs(sv,"");
1924 sv_catsv(sv, orig_sv);
1926 sv_catpvn(sv, buf, len);
1931 if (PL_parser->preambling == NOLINE)
1932 av_store(av, CopLINE(PL_curcop), sv);
1938 * Called to gobble the appropriate amount and type of whitespace.
1939 * Skips comments as well.
1943 S_skipspace_flags(pTHX_ char *s, U32 flags)
1947 #endif /* PERL_MAD */
1948 PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1951 sv_free(PL_skipwhite);
1952 PL_skipwhite = NULL;
1954 #endif /* PERL_MAD */
1955 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1956 while (s < PL_bufend && SPACE_OR_TAB(*s))
1959 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1961 lex_read_space(flags | LEX_KEEP_PREVIOUS |
1962 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1963 LEX_NO_NEXT_CHUNK : 0));
1965 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1966 if (PL_linestart > PL_bufptr)
1967 PL_bufptr = PL_linestart;
1972 PL_skipwhite = newSVpvn(start, s-start);
1973 #endif /* PERL_MAD */
1979 * Check the unary operators to ensure there's no ambiguity in how they're
1980 * used. An ambiguous piece of code would be:
1982 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1983 * the +5 is its argument.
1993 if (PL_oldoldbufptr != PL_last_uni)
1995 while (isSPACE(*PL_last_uni))
1998 while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
2000 if ((t = strchr(s, '(')) && t < PL_bufptr)
2003 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
2004 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
2005 (int)(s - PL_last_uni), PL_last_uni);
2009 * LOP : macro to build a list operator. Its behaviour has been replaced
2010 * with a subroutine, S_lop() for which LOP is just another name.
2013 #define LOP(f,x) return lop(f,x,s)
2017 * Build a list operator (or something that might be one). The rules:
2018 * - if we have a next token, then it's a list operator [why?]
2019 * - if the next thing is an opening paren, then it's a function
2020 * - else it's a list operator
2024 S_lop(pTHX_ I32 f, int x, char *s)
2028 PERL_ARGS_ASSERT_LOP;
2034 PL_last_lop = PL_oldbufptr;
2035 PL_last_lop_op = (OPCODE)f;
2044 return REPORT(FUNC);
2047 return REPORT(FUNC);
2050 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
2051 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2052 return REPORT(LSTOP);
2059 * Sets up for an eventual force_next(). start_force(0) basically does
2060 * an unshift, while start_force(-1) does a push. yylex removes items
2065 S_start_force(pTHX_ int where)
2069 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
2070 where = PL_lasttoke;
2071 assert(PL_curforce < 0 || PL_curforce == where);
2072 if (PL_curforce != where) {
2073 for (i = PL_lasttoke; i > where; --i) {
2074 PL_nexttoke[i] = PL_nexttoke[i-1];
2078 if (PL_curforce < 0) /* in case of duplicate start_force() */
2079 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
2080 PL_curforce = where;
2083 curmad('^', newSVpvs(""));
2084 CURMAD('_', PL_nextwhite);
2089 S_curmad(pTHX_ char slot, SV *sv)
2095 if (PL_curforce < 0)
2096 where = &PL_thismad;
2098 where = &PL_nexttoke[PL_curforce].next_mad;
2104 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
2106 else if (PL_encoding) {
2107 sv_recode_to_utf8(sv, PL_encoding);
2112 /* keep a slot open for the head of the list? */
2113 if (slot != '_' && *where && (*where)->mad_key == '^') {
2114 (*where)->mad_key = slot;
2115 sv_free(MUTABLE_SV(((*where)->mad_val)));
2116 (*where)->mad_val = (void*)sv;
2119 addmad(newMADsv(slot, sv), where, 0);
2122 # define start_force(where) NOOP
2123 # define curmad(slot, sv) NOOP
2128 * When the lexer realizes it knows the next token (for instance,
2129 * it is reordering tokens for the parser) then it can call S_force_next
2130 * to know what token to return the next time the lexer is called. Caller
2131 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
2132 * and possibly PL_expect to ensure the lexer handles the token correctly.
2136 S_force_next(pTHX_ I32 type)
2141 PerlIO_printf(Perl_debug_log, "### forced token:\n");
2142 tokereport(type, &NEXTVAL_NEXTTOKE);
2146 if (PL_curforce < 0)
2147 start_force(PL_lasttoke);
2148 PL_nexttoke[PL_curforce].next_type = type;
2149 if (PL_lex_state != LEX_KNOWNEXT)
2150 PL_lex_defer = PL_lex_state;
2151 PL_lex_state = LEX_KNOWNEXT;
2152 PL_lex_expect = PL_expect;
2155 PL_nexttype[PL_nexttoke] = type;
2157 if (PL_lex_state != LEX_KNOWNEXT) {
2158 PL_lex_defer = PL_lex_state;
2159 PL_lex_expect = PL_expect;
2160 PL_lex_state = LEX_KNOWNEXT;
2168 int yyc = PL_parser->yychar;
2169 if (yyc != YYEMPTY) {
2172 NEXTVAL_NEXTTOKE = PL_parser->yylval;
2173 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2174 PL_lex_allbrackets--;
2176 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2177 } else if (yyc == '('/*)*/) {
2178 PL_lex_allbrackets--;
2183 PL_parser->yychar = YYEMPTY;
2188 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2191 SV * const sv = newSVpvn_utf8(start, len,
2194 && !is_ascii_string((const U8*)start, len)
2195 && is_utf8_string((const U8*)start, len));
2201 * When the lexer knows the next thing is a word (for instance, it has
2202 * just seen -> and it knows that the next char is a word char, then
2203 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2207 * char *start : buffer position (must be within PL_linestr)
2208 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2209 * int check_keyword : if true, Perl checks to make sure the word isn't
2210 * a keyword (do this if the word is a label, e.g. goto FOO)
2211 * int allow_pack : if true, : characters will also be allowed (require,
2212 * use, etc. do this)
2213 * int allow_initial_tick : used by the "sub" lexer only.
2217 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2223 PERL_ARGS_ASSERT_FORCE_WORD;
2225 start = SKIPSPACE1(start);
2227 if (isIDFIRST_lazy_if(s,UTF) ||
2228 (allow_pack && *s == ':') )
2230 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2231 if (check_keyword) {
2232 char *s2 = PL_tokenbuf;
2233 if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
2235 if (keyword(s2, len, 0))
2238 start_force(PL_curforce);
2240 curmad('X', newSVpvn(start,s-start));
2241 if (token == METHOD) {
2246 PL_expect = XOPERATOR;
2250 curmad('g', newSVpvs( "forced" ));
2251 NEXTVAL_NEXTTOKE.opval
2252 = (OP*)newSVOP(OP_CONST,0,
2253 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2254 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2262 * Called when the lexer wants $foo *foo &foo etc, but the program
2263 * text only contains the "foo" portion. The first argument is a pointer
2264 * to the "foo", and the second argument is the type symbol to prefix.
2265 * Forces the next token to be a "WORD".
2266 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2270 S_force_ident(pTHX_ const char *s, int kind)
2274 PERL_ARGS_ASSERT_FORCE_IDENT;
2277 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2278 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2279 UTF ? SVf_UTF8 : 0));
2280 start_force(PL_curforce);
2281 NEXTVAL_NEXTTOKE.opval = o;
2284 o->op_private = OPpCONST_ENTERED;
2285 /* XXX see note in pp_entereval() for why we forgo typo
2286 warnings if the symbol must be introduced in an eval.
2288 gv_fetchpvn_flags(s, len,
2289 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2290 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2291 kind == '$' ? SVt_PV :
2292 kind == '@' ? SVt_PVAV :
2293 kind == '%' ? SVt_PVHV :
2301 S_force_ident_maybe_lex(pTHX_ char pit)
2303 start_force(PL_curforce);
2304 NEXTVAL_NEXTTOKE.ival = pit;
2309 Perl_str_to_version(pTHX_ SV *sv)
2314 const char *start = SvPV_const(sv,len);
2315 const char * const end = start + len;
2316 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2318 PERL_ARGS_ASSERT_STR_TO_VERSION;
2320 while (start < end) {
2324 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2329 retval += ((NV)n)/nshift;
2338 * Forces the next token to be a version number.
2339 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2340 * and if "guessing" is TRUE, then no new token is created (and the caller
2341 * must use an alternative parsing method).
2345 S_force_version(pTHX_ char *s, int guessing)
2351 I32 startoff = s - SvPVX(PL_linestr);
2354 PERL_ARGS_ASSERT_FORCE_VERSION;
2362 while (isDIGIT(*d) || *d == '_' || *d == '.')
2366 start_force(PL_curforce);
2367 curmad('X', newSVpvn(s,d-s));
2370 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2372 #ifdef USE_LOCALE_NUMERIC
2373 char *loc = savepv(setlocale(LC_NUMERIC, NULL));
2374 setlocale(LC_NUMERIC, "C");
2376 s = scan_num(s, &pl_yylval);
2377 #ifdef USE_LOCALE_NUMERIC
2378 setlocale(LC_NUMERIC, loc);
2381 version = pl_yylval.opval;
2382 ver = cSVOPx(version)->op_sv;
2383 if (SvPOK(ver) && !SvNIOK(ver)) {
2384 SvUPGRADE(ver, SVt_PVNV);
2385 SvNV_set(ver, str_to_version(ver));
2386 SvNOK_on(ver); /* hint that it is a version */
2389 else if (guessing) {
2392 sv_free(PL_nextwhite); /* let next token collect whitespace */
2394 s = SvPVX(PL_linestr) + startoff;
2402 if (PL_madskills && !version) {
2403 sv_free(PL_nextwhite); /* let next token collect whitespace */
2405 s = SvPVX(PL_linestr) + startoff;
2408 /* NOTE: The parser sees the package name and the VERSION swapped */
2409 start_force(PL_curforce);
2410 NEXTVAL_NEXTTOKE.opval = version;
2417 * S_force_strict_version
2418 * Forces the next token to be a version number using strict syntax rules.
2422 S_force_strict_version(pTHX_ char *s)
2427 I32 startoff = s - SvPVX(PL_linestr);
2429 const char *errstr = NULL;
2431 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2433 while (isSPACE(*s)) /* leading whitespace */
2436 if (is_STRICT_VERSION(s,&errstr)) {
2438 s = (char *)scan_version(s, ver, 0);
2439 version = newSVOP(OP_CONST, 0, ver);
2441 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2442 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2446 yyerror(errstr); /* version required */
2451 if (PL_madskills && !version) {
2452 sv_free(PL_nextwhite); /* let next token collect whitespace */
2454 s = SvPVX(PL_linestr) + startoff;
2457 /* NOTE: The parser sees the package name and the VERSION swapped */
2458 start_force(PL_curforce);
2459 NEXTVAL_NEXTTOKE.opval = version;
2467 * Tokenize a quoted string passed in as an SV. It finds the next
2468 * chunk, up to end of string or a backslash. It may make a new
2469 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2474 S_tokeq(pTHX_ SV *sv)
2483 PERL_ARGS_ASSERT_TOKEQ;
2488 s = SvPV_force(sv, len);
2489 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2492 /* This is relying on the SV being "well formed" with a trailing '\0' */
2493 while (s < send && !(*s == '\\' && s[1] == '\\'))
2498 if ( PL_hints & HINT_NEW_STRING ) {
2499 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2503 if (s + 1 < send && (s[1] == '\\'))
2504 s++; /* all that, just for this */
2509 SvCUR_set(sv, d - SvPVX_const(sv));
2511 if ( PL_hints & HINT_NEW_STRING )
2512 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2517 * Now come three functions related to double-quote context,
2518 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2519 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2520 * interact with PL_lex_state, and create fake ( ... ) argument lists
2521 * to handle functions and concatenation.
2525 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
2530 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2532 * Pattern matching will set PL_lex_op to the pattern-matching op to
2533 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2535 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2537 * Everything else becomes a FUNC.
2539 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2540 * had an OP_CONST or OP_READLINE). This just sets us up for a
2541 * call to S_sublex_push().
2545 S_sublex_start(pTHX)
2548 const I32 op_type = pl_yylval.ival;
2550 if (op_type == OP_NULL) {
2551 pl_yylval.opval = PL_lex_op;
2555 if (op_type == OP_CONST || op_type == OP_READLINE) {
2556 SV *sv = tokeq(PL_lex_stuff);
2558 if (SvTYPE(sv) == SVt_PVIV) {
2559 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2561 const char * const p = SvPV_const(sv, len);
2562 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2566 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2567 PL_lex_stuff = NULL;
2568 /* Allow <FH> // "foo" */
2569 if (op_type == OP_READLINE)
2570 PL_expect = XTERMORDORDOR;
2573 else if (op_type == OP_BACKTICK && PL_lex_op) {
2574 /* readpipe() was overridden */
2575 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2576 pl_yylval.opval = PL_lex_op;
2578 PL_lex_stuff = NULL;
2582 PL_sublex_info.super_state = PL_lex_state;
2583 PL_sublex_info.sub_inwhat = (U16)op_type;
2584 PL_sublex_info.sub_op = PL_lex_op;
2585 PL_lex_state = LEX_INTERPPUSH;
2589 pl_yylval.opval = PL_lex_op;
2599 * Create a new scope to save the lexing state. The scope will be
2600 * ended in S_sublex_done. Returns a '(', starting the function arguments
2601 * to the uc, lc, etc. found before.
2602 * Sets PL_lex_state to LEX_INTERPCONCAT.
2610 const bool is_heredoc = PL_multi_close == '<';
2613 PL_lex_state = PL_sublex_info.super_state;
2614 SAVEBOOL(PL_lex_dojoin);
2615 SAVEI32(PL_lex_brackets);
2616 SAVEI32(PL_lex_allbrackets);
2617 SAVEI32(PL_lex_formbrack);
2618 SAVEI8(PL_lex_fakeeof);
2619 SAVEI32(PL_lex_casemods);
2620 SAVEI32(PL_lex_starts);
2621 SAVEI8(PL_lex_state);
2622 SAVESPTR(PL_lex_repl);
2623 SAVEVPTR(PL_lex_inpat);
2624 SAVEI16(PL_lex_inwhat);
2627 SAVECOPLINE(PL_curcop);
2628 SAVEI32(PL_multi_end);
2629 SAVEI32(PL_parser->herelines);
2630 PL_parser->herelines = 0;
2632 SAVEI8(PL_multi_close);
2633 SAVEPPTR(PL_bufptr);
2634 SAVEPPTR(PL_bufend);
2635 SAVEPPTR(PL_oldbufptr);
2636 SAVEPPTR(PL_oldoldbufptr);
2637 SAVEPPTR(PL_last_lop);
2638 SAVEPPTR(PL_last_uni);
2639 SAVEPPTR(PL_linestart);
2640 SAVESPTR(PL_linestr);
2641 SAVEGENERICPV(PL_lex_brackstack);
2642 SAVEGENERICPV(PL_lex_casestack);
2643 SAVEGENERICPV(PL_parser->lex_shared);
2644 SAVEBOOL(PL_parser->lex_re_reparsing);
2645 SAVEI32(PL_copline);
2647 /* The here-doc parser needs to be able to peek into outer lexing
2648 scopes to find the body of the here-doc. So we put PL_linestr and
2649 PL_bufptr into lex_shared, to ‘share’ those values.
2651 PL_parser->lex_shared->ls_linestr = PL_linestr;
2652 PL_parser->lex_shared->ls_bufptr = PL_bufptr;
2654 PL_linestr = PL_lex_stuff;
2655 PL_lex_repl = PL_sublex_info.repl;
2656 PL_lex_stuff = NULL;
2657 PL_sublex_info.repl = NULL;
2659 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2660 = SvPVX(PL_linestr);
2661 PL_bufend += SvCUR(PL_linestr);
2662 PL_last_lop = PL_last_uni = NULL;
2663 SAVEFREESV(PL_linestr);
2664 if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2666 PL_lex_dojoin = FALSE;
2667 PL_lex_brackets = PL_lex_formbrack = 0;
2668 PL_lex_allbrackets = 0;
2669 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2670 Newx(PL_lex_brackstack, 120, char);
2671 Newx(PL_lex_casestack, 12, char);
2672 PL_lex_casemods = 0;
2673 *PL_lex_casestack = '\0';
2675 PL_lex_state = LEX_INTERPCONCAT;
2677 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2678 PL_copline = NOLINE;
2680 Newxz(shared, 1, LEXSHARED);
2681 shared->ls_prev = PL_parser->lex_shared;
2682 PL_parser->lex_shared = shared;
2684 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2685 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2686 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2687 PL_lex_inpat = PL_sublex_info.sub_op;
2689 PL_lex_inpat = NULL;
2691 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2692 PL_in_eval &= ~EVAL_RE_REPARSING;
2699 * Restores lexer state after a S_sublex_push.
2706 if (!PL_lex_starts++) {
2707 SV * const sv = newSVpvs("");
2708 if (SvUTF8(PL_linestr))
2710 PL_expect = XOPERATOR;
2711 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2715 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2716 PL_lex_state = LEX_INTERPCASEMOD;
2720 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2721 assert(PL_lex_inwhat != OP_TRANSR);
2722 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2723 PL_linestr = PL_lex_repl;
2725 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2726 PL_bufend += SvCUR(PL_linestr);
2727 PL_last_lop = PL_last_uni = NULL;
2728 PL_lex_dojoin = FALSE;
2729 PL_lex_brackets = 0;
2730 PL_lex_allbrackets = 0;
2731 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2732 PL_lex_casemods = 0;
2733 *PL_lex_casestack = '\0';
2735 if (SvEVALED(PL_lex_repl)) {
2736 PL_lex_state = LEX_INTERPNORMAL;
2738 /* we don't clear PL_lex_repl here, so that we can check later
2739 whether this is an evalled subst; that means we rely on the
2740 logic to ensure sublex_done() is called again only via the
2741 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2744 PL_lex_state = LEX_INTERPCONCAT;
2747 if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2748 CopLINE(PL_curcop) +=
2749 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xpad_cop_seq.xlow
2750 + PL_parser->herelines;
2751 PL_parser->herelines = 0;
2756 const line_t l = CopLINE(PL_curcop);
2761 PL_endwhite = newSVpvs("");
2762 sv_catsv(PL_endwhite, PL_thiswhite);
2766 sv_setpvs(PL_thistoken,"");
2768 PL_realtokenstart = -1;
2772 if (PL_multi_close == '<')
2773 PL_parser->herelines += l - PL_multi_end;
2774 PL_bufend = SvPVX(PL_linestr);
2775 PL_bufend += SvCUR(PL_linestr);
2776 PL_expect = XOPERATOR;
2777 PL_sublex_info.sub_inwhat = 0;
2782 PERL_STATIC_INLINE SV*
2783 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2785 /* <s> points to first character of interior of \N{}, <e> to one beyond the
2786 * interior, hence to the "}". Finds what the name resolves to, returning
2787 * an SV* containing it; NULL if no valid one found */
2789 SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2796 const U8* first_bad_char_loc;
2797 const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2799 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2801 if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2803 &first_bad_char_loc))
2805 /* If warnings are on, this will print a more detailed analysis of what
2806 * is wrong than the error message below */
2807 utf8n_to_uvchr(first_bad_char_loc,
2808 e - ((char *) first_bad_char_loc),
2811 /* We deliberately don't try to print the malformed character, which
2812 * might not print very well; it also may be just the first of many
2813 * malformations, so don't print what comes after it */
2814 yyerror(Perl_form(aTHX_
2815 "Malformed UTF-8 character immediately after '%.*s'",
2816 (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
2820 res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2821 /* include the <}> */
2822 e - backslash_ptr + 1);
2824 SvREFCNT_dec_NN(res);
2828 /* See if the charnames handler is the Perl core's, and if so, we can skip
2829 * the validation needed for a user-supplied one, as Perl's does its own
2831 table = GvHV(PL_hintgv); /* ^H */
2832 cvp = hv_fetchs(table, "charnames", FALSE);
2833 if (cvp && (cv = *cvp) && SvROK(cv) && ((rv = SvRV(cv)) != NULL)
2834 && SvTYPE(rv) == SVt_PVCV && ((stash = CvSTASH(rv)) != NULL))
2836 const char * const name = HvNAME(stash);
2837 if strEQ(name, "_charnames") {
2842 /* Here, it isn't Perl's charname handler. We can't rely on a
2843 * user-supplied handler to validate the input name. For non-ut8 input,
2844 * look to see that the first character is legal. Then loop through the
2845 * rest checking that each is a continuation */
2847 /* This code needs to be sync'ed with a regex in _charnames.pm which does
2851 if (! isALPHAU(*s)) {
2856 if (! isCHARNAME_CONT(*s)) {
2859 if (*s == ' ' && *(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2860 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2861 "A sequence of multiple spaces in a charnames "
2862 "alias definition is deprecated");
2866 if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2867 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2868 "Trailing white-space in a charnames alias "
2869 "definition is deprecated");
2873 /* Similarly for utf8. For invariants can check directly; for other
2874 * Latin1, can calculate their code point and check; otherwise use a
2876 if (UTF8_IS_INVARIANT(*s)) {
2877 if (! isALPHAU(*s)) {
2881 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2882 if (! isALPHAU(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) {
2888 if (! PL_utf8_charname_begin) {
2889 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2890 PL_utf8_charname_begin = _core_swash_init("utf8",
2891 "_Perl_Charname_Begin",
2893 1, 0, NULL, &flags);
2895 if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2902 if (UTF8_IS_INVARIANT(*s)) {
2903 if (! isCHARNAME_CONT(*s)) {
2906 if (*s == ' ' && *(s-1) == ' '
2907 && ckWARN_d(WARN_DEPRECATED)) {
2908 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2909 "A sequence of multiple spaces in a charnam"
2910 "es alias definition is deprecated");
2914 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2915 if (! isCHARNAME_CONT(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1))))
2922 if (! PL_utf8_charname_continue) {
2923 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2924 PL_utf8_charname_continue = _core_swash_init("utf8",
2925 "_Perl_Charname_Continue",
2927 1, 0, NULL, &flags);
2929 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2935 if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2936 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2937 "Trailing white-space in a charnames alias "
2938 "definition is deprecated");
2942 if (SvUTF8(res)) { /* Don't accept malformed input */
2943 const U8* first_bad_char_loc;
2945 const char* const str = SvPV_const(res, len);
2946 if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2947 /* If warnings are on, this will print a more detailed analysis of
2948 * what is wrong than the error message below */
2949 utf8n_to_uvchr(first_bad_char_loc,
2950 (char *) first_bad_char_loc - str,
2953 /* We deliberately don't try to print the malformed character,
2954 * which might not print very well; it also may be just the first
2955 * of many malformations, so don't print what comes after it */
2958 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2959 (int) (e - backslash_ptr + 1), backslash_ptr,
2960 (int) ((char *) first_bad_char_loc - str), str
2970 int bad_char_size = ((UTF) ? UTF8SKIP(s) : 1);
2972 /* The final %.*s makes sure that should the trailing NUL be missing
2973 * that this print won't run off the end of the string */
2976 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2977 (int)(s - backslash_ptr + bad_char_size), backslash_ptr,
2978 (int)(e - s + bad_char_size), s + bad_char_size
2980 UTF ? SVf_UTF8 : 0);
2988 Extracts the next constant part of a pattern, double-quoted string,
2989 or transliteration. This is terrifying code.
2991 For example, in parsing the double-quoted string "ab\x63$d", it would
2992 stop at the '$' and return an OP_CONST containing 'abc'.
2994 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2995 processing a pattern (PL_lex_inpat is true), a transliteration
2996 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2998 Returns a pointer to the character scanned up to. If this is
2999 advanced from the start pointer supplied (i.e. if anything was
3000 successfully parsed), will leave an OP_CONST for the substring scanned
3001 in pl_yylval. Caller must intuit reason for not parsing further
3002 by looking at the next characters herself.
3006 \N{FOO} => \N{U+hex_for_character_FOO}
3007 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
3010 all other \-char, including \N and \N{ apart from \N{ABC}
3013 @ and $ where it appears to be a var, but not for $ as tail anchor
3018 In transliterations:
3019 characters are VERY literal, except for - not at the start or end
3020 of the string, which indicates a range. If the range is in bytes,
3021 scan_const expands the range to the full set of intermediate
3022 characters. If the range is in utf8, the hyphen is replaced with
3023 a certain range mark which will be handled by pmtrans() in op.c.
3025 In double-quoted strings:
3027 double-quoted style: \r and \n
3028 constants: \x31, etc.
3029 deprecated backrefs: \1 (in substitution replacements)
3030 case and quoting: \U \Q \E
3033 scan_const does *not* construct ops to handle interpolated strings.
3034 It stops processing as soon as it finds an embedded $ or @ variable
3035 and leaves it to the caller to work out what's going on.
3037 embedded arrays (whether in pattern or not) could be:
3038 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
3040 $ in double-quoted strings must be the symbol of an embedded scalar.
3042 $ in pattern could be $foo or could be tail anchor. Assumption:
3043 it's a tail anchor if $ is the last thing in the string, or if it's
3044 followed by one of "()| \r\n\t"
3046 \1 (backreferences) are turned into $1 in substitutions
3048 The structure of the code is
3049 while (there's a character to process) {
3050 handle transliteration ranges
3051 skip regexp comments /(?#comment)/ and codes /(?{code})/
3052 skip #-initiated comments in //x patterns
3053 check for embedded arrays
3054 check for embedded scalars
3056 deprecate \1 in substitution replacements
3057 handle string-changing backslashes \l \U \Q \E, etc.
3058 switch (what was escaped) {
3059 handle \- in a transliteration (becomes a literal -)
3060 if a pattern and not \N{, go treat as regular character
3061 handle \132 (octal characters)
3062 handle \x15 and \x{1234} (hex characters)
3063 handle \N{name} (named characters, also \N{3,5} in a pattern)
3064 handle \cV (control characters)
3065 handle printf-style backslashes (\f, \r, \n, etc)
3068 } (end if backslash)
3069 handle regular character
3070 } (end while character to read)
3075 S_scan_const(pTHX_ char *start)
3078 char *send = PL_bufend; /* end of the constant */
3079 SV *sv = newSV(send - start); /* sv for the constant. See
3080 note below on sizing. */
3081 char *s = start; /* start of the constant */
3082 char *d = SvPVX(sv); /* destination for copies */
3083 bool dorange = FALSE; /* are we in a translit range? */
3084 bool didrange = FALSE; /* did we just finish a range? */
3085 bool in_charclass = FALSE; /* within /[...]/ */
3086 bool has_utf8 = FALSE; /* Output constant is UTF8 */
3087 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
3088 to be UTF8? But, this can
3089 show as true when the source
3090 isn't utf8, as for example
3091 when it is entirely composed
3093 SV *res; /* result from charnames */
3095 /* Note on sizing: The scanned constant is placed into sv, which is
3096 * initialized by newSV() assuming one byte of output for every byte of
3097 * input. This routine expects newSV() to allocate an extra byte for a
3098 * trailing NUL, which this routine will append if it gets to the end of
3099 * the input. There may be more bytes of input than output (eg., \N{LATIN
3100 * CAPITAL LETTER A}), or more output than input if the constant ends up
3101 * recoded to utf8, but each time a construct is found that might increase
3102 * the needed size, SvGROW() is called. Its size parameter each time is
3103 * based on the best guess estimate at the time, namely the length used so
3104 * far, plus the length the current construct will occupy, plus room for
3105 * the trailing NUL, plus one byte for every input byte still unscanned */
3107 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3110 UV literal_endpoint = 0;
3111 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
3114 PERL_ARGS_ASSERT_SCAN_CONST;
3116 assert(PL_lex_inwhat != OP_TRANSR);
3117 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3118 /* If we are doing a trans and we know we want UTF8 set expectation */
3119 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
3120 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3123 /* Protect sv from errors and fatal warnings. */
3124 ENTER_with_name("scan_const");
3127 while (s < send || dorange) {
3129 /* get transliterations out of the way (they're most literal) */
3130 if (PL_lex_inwhat == OP_TRANS) {
3131 /* expand a range A-Z to the full set of characters. AIE! */
3133 I32 i; /* current expanded character */
3134 I32 min; /* first character in range */
3135 I32 max; /* last character in range */
3146 char * const c = (char*)utf8_hop((U8*)d, -1);
3150 *c = (char) ILLEGAL_UTF8_BYTE;
3151 /* mark the range as done, and continue */
3157 i = d - SvPVX_const(sv); /* remember current offset */
3160 SvLEN(sv) + (has_utf8 ?
3161 (512 - UTF_CONTINUATION_MARK +
3164 /* How many two-byte within 0..255: 128 in UTF-8,
3165 * 96 in UTF-8-mod. */
3167 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
3169 d = SvPVX(sv) + i; /* refresh d after realloc */
3173 for (j = 0; j <= 1; j++) {
3174 char * const c = (char*)utf8_hop((U8*)d, -1);
3175 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
3181 max = (U8)0xff; /* only to \xff */
3182 uvmax = uv; /* \x{100} to uvmax */
3184 d = c; /* eat endpoint chars */
3189 d -= 2; /* eat the first char and the - */
3190 min = (U8)*d; /* first char in range */
3191 max = (U8)d[1]; /* last char in range */
3198 "Invalid range \"%c-%c\" in transliteration operator",
3199 (char)min, (char)max);
3203 if (literal_endpoint == 2 &&
3204 ((isLOWER_A(min) && isLOWER_A(max)) ||
3205 (isUPPER_A(min) && isUPPER_A(max))))
3207 for (i = min; i <= max; i++) {
3214 for (i = min; i <= max; i++)
3217 append_utf8_from_native_byte(i, &d);
3225 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
3227 *d++ = (char) ILLEGAL_UTF8_BYTE;
3229 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
3233 /* mark the range as done, and continue */
3237 literal_endpoint = 0;
3242 /* range begins (ignore - as first or last char) */
3243 else if (*s == '-' && s+1 < send && s != start) {
3245 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
3252 *d++ = (char) ILLEGAL_UTF8_BYTE; /* use illegal utf8 byte--see pmtrans */
3262 literal_endpoint = 0;
3263 native_range = TRUE;
3268 /* if we get here, we're not doing a transliteration */
3270 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3273 while (s1 >= start && *s1-- == '\\')
3276 in_charclass = TRUE;
3279 else if (*s == ']' && PL_lex_inpat && in_charclass) {
3282 while (s1 >= start && *s1-- == '\\')
3285 in_charclass = FALSE;
3288 /* skip for regexp comments /(?#comment)/, except for the last
3289 * char, which will be done separately.
3290 * Stop on (?{..}) and friends */
3292 else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3294 while (s+1 < send && *s != ')')
3297 else if (!PL_lex_casemods &&
3298 ( s[2] == '{' /* This should match regcomp.c */
3299 || (s[2] == '?' && s[3] == '{')))
3305 /* likewise skip #-initiated comments in //x patterns */
3306 else if (*s == '#' && PL_lex_inpat && !in_charclass &&
3307 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
3308 while (s+1 < send && *s != '\n')
3312 /* no further processing of single-quoted regex */
3313 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3314 goto default_action;
3316 /* check for embedded arrays
3317 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3319 else if (*s == '@' && s[1]) {
3320 if (isWORDCHAR_lazy_if(s+1,UTF))
3322 if (strchr(":'{$", s[1]))
3324 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3325 break; /* in regexp, neither @+ nor @- are interpolated */
3328 /* check for embedded scalars. only stop if we're sure it's a
3331 else if (*s == '$') {
3332 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
3334 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3336 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3337 "Possible unintended interpolation of $\\ in regex");
3339 break; /* in regexp, $ might be tail anchor */
3343 /* End of else if chain - OP_TRANS rejoin rest */
3346 if (*s == '\\' && s+1 < send) {
3347 char* e; /* Can be used for ending '}', etc. */
3351 /* warn on \1 - \9 in substitution replacements, but note that \11
3352 * is an octal; and \19 is \1 followed by '9' */
3353 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
3354 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
3356 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3361 /* string-change backslash escapes */
3362 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3366 /* In a pattern, process \N, but skip any other backslash escapes.
3367 * This is because we don't want to translate an escape sequence
3368 * into a meta symbol and have the regex compiler use the meta
3369 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3370 * in spite of this, we do have to process \N here while the proper
3371 * charnames handler is in scope. See bugs #56444 and #62056.
3372 * There is a complication because \N in a pattern may also stand
3373 * for 'match a non-nl', and not mean a charname, in which case its
3374 * processing should be deferred to the regex compiler. To be a
3375 * charname it must be followed immediately by a '{', and not look
3376 * like \N followed by a curly quantifier, i.e., not something like
3377 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3379 else if (PL_lex_inpat
3382 || regcurly(s + 1, FALSE)))
3385 goto default_action;
3390 /* quoted - in transliterations */
3392 if (PL_lex_inwhat == OP_TRANS) {
3399 if ((isALPHANUMERIC(*s)))
3400 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3401 "Unrecognized escape \\%c passed through",
3403 /* default action is to copy the quoted character */
3404 goto default_action;
3407 /* eg. \132 indicates the octal constant 0132 */
3408 case '0': case '1': case '2': case '3':
3409 case '4': case '5': case '6': case '7':
3411 I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3413 uv = grok_oct(s, &len, &flags, NULL);
3415 if (len < 3 && s < send && isDIGIT(*s)
3416 && ckWARN(WARN_MISC))
3418 Perl_warner(aTHX_ packWARN(WARN_MISC),
3419 "%s", form_short_octal_warning(s, len));
3422 goto NUM_ESCAPE_INSERT;
3424 /* eg. \o{24} indicates the octal constant \024 */
3429 bool valid = grok_bslash_o(&s, &uv, &error,
3430 TRUE, /* Output warning */
3431 FALSE, /* Not strict */
3432 TRUE, /* Output warnings for
3439 goto NUM_ESCAPE_INSERT;
3442 /* eg. \x24 indicates the hex constant 0x24 */
3447 bool valid = grok_bslash_x(&s, &uv, &error,
3448 TRUE, /* Output warning */
3449 FALSE, /* Not strict */
3450 TRUE, /* Output warnings for
3460 /* Insert oct or hex escaped character. There will always be
3461 * enough room in sv since such escapes will be longer than any
3462 * UTF-8 sequence they can end up as, except if they force us
3463 * to recode the rest of the string into utf8 */
3465 /* Here uv is the ordinal of the next character being added */
3466 if (!UVCHR_IS_INVARIANT(uv)) {
3467 if (!has_utf8 && uv > 255) {
3468 /* Might need to recode whatever we have accumulated so
3469 * far if it contains any chars variant in utf8 or
3472 SvCUR_set(sv, d - SvPVX_const(sv));
3475 /* See Note on sizing above. */
3476 sv_utf8_upgrade_flags_grow(sv,
3477 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3478 UNISKIP(uv) + (STRLEN)(send - s) + 1);
3479 d = SvPVX(sv) + SvCUR(sv);
3484 d = (char*)uvchr_to_utf8((U8*)d, uv);
3485 if (PL_lex_inwhat == OP_TRANS &&
3486 PL_sublex_info.sub_op) {
3487 PL_sublex_info.sub_op->op_private |=
3488 (PL_lex_repl ? OPpTRANS_FROM_UTF
3492 if (uv > 255 && !dorange)
3493 native_range = FALSE;
3506 /* In a non-pattern \N must be a named character, like \N{LATIN
3507 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3508 * mean to match a non-newline. For non-patterns, named
3509 * characters are converted to their string equivalents. In
3510 * patterns, named characters are not converted to their
3511 * ultimate forms for the same reasons that other escapes
3512 * aren't. Instead, they are converted to the \N{U+...} form
3513 * to get the value from the charnames that is in effect right
3514 * now, while preserving the fact that it was a named character
3515 * so that the regex compiler knows this */
3517 /* The structure of this section of code (besides checking for
3518 * errors and upgrading to utf8) is:
3519 * Further disambiguate between the two meanings of \N, and if
3520 * not a charname, go process it elsewhere
3521 * If of form \N{U+...}, pass it through if a pattern;
3522 * otherwise convert to utf8
3523 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3524 * pattern; otherwise convert to utf8 */
3526 /* Here, s points to the 'N'; the test below is guaranteed to
3527 * succeed if we are being called on a pattern as we already
3528 * know from a test above that the next character is a '{'.
3529 * On a non-pattern \N must mean 'named sequence, which
3530 * requires braces */
3533 yyerror("Missing braces on \\N{}");
3538 /* If there is no matching '}', it is an error. */
3539 if (! (e = strchr(s, '}'))) {
3540 if (! PL_lex_inpat) {
3541 yyerror("Missing right brace on \\N{}");
3543 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3548 /* Here it looks like a named character */
3550 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3551 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3552 | PERL_SCAN_DISALLOW_PREFIX;
3555 /* For \N{U+...}, the '...' is a unicode value even on
3556 * EBCDIC machines */
3557 s += 2; /* Skip to next char after the 'U+' */
3559 uv = grok_hex(s, &len, &flags, NULL);
3560 if (len == 0 || len != (STRLEN)(e - s)) {
3561 yyerror("Invalid hexadecimal number in \\N{U+...}");
3568 /* On non-EBCDIC platforms, pass through to the regex
3569 * compiler unchanged. The reason we evaluated the
3570 * number above is to make sure there wasn't a syntax
3571 * error. But on EBCDIC we convert to native so
3572 * downstream code can continue to assume it's native
3574 s -= 5; /* Include the '\N{U+' */
3576 d += my_snprintf(d, e - s + 1 + 1, /* includes the }
3579 (unsigned int) UNI_TO_NATIVE(uv));
3581 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3585 else { /* Not a pattern: convert the hex to string */
3587 /* If destination is not in utf8, unconditionally
3588 * recode it to be so. This is because \N{} implies
3589 * Unicode semantics, and scalars have to be in utf8
3590 * to guarantee those semantics */
3592 SvCUR_set(sv, d - SvPVX_const(sv));
3595 /* See Note on sizing above. */
3596 sv_utf8_upgrade_flags_grow(
3598 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3599 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3600 d = SvPVX(sv) + SvCUR(sv);
3604 /* Add the (Unicode) code point to the output. */
3605 if (UNI_IS_INVARIANT(uv)) {
3606 *d++ = (char) LATIN1_TO_NATIVE(uv);
3609 d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3613 else /* Here is \N{NAME} but not \N{U+...}. */
3614 if ((res = get_and_check_backslash_N_name(s, e)))
3617 const char *str = SvPV_const(res, len);
3620 if (! len) { /* The name resolved to an empty string */
3621 Copy("\\N{}", d, 4, char);
3625 /* In order to not lose information for the regex
3626 * compiler, pass the result in the specially made
3627 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3628 * the code points in hex of each character
3629 * returned by charnames */
3631 const char *str_end = str + len;
3632 const STRLEN off = d - SvPVX_const(sv);
3634 if (! SvUTF8(res)) {
3635 /* For the non-UTF-8 case, we can determine the
3636 * exact length needed without having to parse
3637 * through the string. Each character takes up
3638 * 2 hex digits plus either a trailing dot or
3640 d = off + SvGROW(sv, off
3642 + 6 /* For the "\N{U+", and
3644 + (STRLEN)(send - e));
3645 Copy("\\N{U+", d, 5, char);
3647 while (str < str_end) {
3649 my_snprintf(hex_string, sizeof(hex_string),
3650 "%02X.", (U8) *str);
3651 Copy(hex_string, d, 3, char);
3655 d--; /* We will overwrite below the final
3656 dot with a right brace */
3659 STRLEN char_length; /* cur char's byte length */
3661 /* and the number of bytes after this is
3662 * translated into hex digits */
3663 STRLEN output_length;
3665 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3666 * for max('U+', '.'); and 1 for NUL */
3667 char hex_string[2 * UTF8_MAXBYTES + 5];
3669 /* Get the first character of the result. */
3670 U32 uv = utf8n_to_uvchr((U8 *) str,
3674 /* Convert first code point to hex, including
3675 * the boiler plate before it. */
3677 my_snprintf(hex_string, sizeof(hex_string),
3681 /* Make sure there is enough space to hold it */
3682 d = off + SvGROW(sv, off
3684 + (STRLEN)(send - e)
3685 + 2); /* '}' + NUL */
3687 Copy(hex_string, d, output_length, char);
3690 /* For each subsequent character, append dot and
3691 * its ordinal in hex */
3692 while ((str += char_length) < str_end) {
3693 const STRLEN off = d - SvPVX_const(sv);
3694 U32 uv = utf8n_to_uvchr((U8 *) str,
3699 my_snprintf(hex_string,
3704 d = off + SvGROW(sv, off
3706 + (STRLEN)(send - e)
3707 + 2); /* '}' + NUL */
3708 Copy(hex_string, d, output_length, char);
3713 *d++ = '}'; /* Done. Add the trailing brace */
3716 else { /* Here, not in a pattern. Convert the name to a
3719 /* If destination is not in utf8, unconditionally
3720 * recode it to be so. This is because \N{} implies
3721 * Unicode semantics, and scalars have to be in utf8
3722 * to guarantee those semantics */
3724 SvCUR_set(sv, d - SvPVX_const(sv));
3727 /* See Note on sizing above. */
3728 sv_utf8_upgrade_flags_grow(sv,
3729 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3730 len + (STRLEN)(send - s) + 1);
3731 d = SvPVX(sv) + SvCUR(sv);
3733 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3735 /* See Note on sizing above. (NOTE: SvCUR() is not
3736 * set correctly here). */
3737 const STRLEN off = d - SvPVX_const(sv);
3738 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3740 Copy(str, d, len, char);
3746 } /* End \N{NAME} */
3749 native_range = FALSE; /* \N{} is defined to be Unicode */
3751 s = e + 1; /* Point to just after the '}' */
3754 /* \c is a control character */
3758 *d++ = grok_bslash_c(*s++, has_utf8, 1);
3761 yyerror("Missing control char name in \\c");
3765 /* printf-style backslashes, formfeeds, newlines, etc */
3782 *d++ = ASCII_TO_NATIVE('\033');
3791 } /* end if (backslash) */
3798 /* If we started with encoded form, or already know we want it,
3799 then encode the next character */
3800 if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3804 /* One might think that it is wasted effort in the case of the
3805 * source being utf8 (this_utf8 == TRUE) to take the next character
3806 * in the source, convert it to an unsigned value, and then convert
3807 * it back again. But the source has not been validated here. The
3808 * routine that does the conversion checks for errors like
3811 const UV nextuv = (this_utf8)
3812 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
3814 const STRLEN need = UNISKIP(nextuv);
3816 SvCUR_set(sv, d - SvPVX_const(sv));
3819 /* See Note on sizing above. */
3820 sv_utf8_upgrade_flags_grow(sv,
3821 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3822 need + (STRLEN)(send - s) + 1);
3823 d = SvPVX(sv) + SvCUR(sv);
3825 } else if (need > len) {
3826 /* encoded value larger than old, may need extra space (NOTE:
3827 * SvCUR() is not set correctly here). See Note on sizing
3829 const STRLEN off = d - SvPVX_const(sv);
3830 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3834 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3836 if (uv > 255 && !dorange)
3837 native_range = FALSE;
3843 } /* while loop to process each character */
3845 /* terminate the string and set up the sv */
3847 SvCUR_set(sv, d - SvPVX_const(sv));
3848 if (SvCUR(sv) >= SvLEN(sv))
3849 Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3850 " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3853 if (PL_encoding && !has_utf8) {
3854 sv_recode_to_utf8(sv, PL_encoding);
3860 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3861 PL_sublex_info.sub_op->op_private |=
3862 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3866 /* shrink the sv if we allocated more than we used */
3867 if (SvCUR(sv) + 5 < SvLEN(sv)) {
3868 SvPV_shrink_to_cur(sv);
3871 /* return the substring (via pl_yylval) only if we parsed anything */
3874 for (; s2 < s; s2++) {
3876 COPLINE_INC_WITH_HERELINES;
3878 SvREFCNT_inc_simple_void_NN(sv);
3879 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
3880 && ! PL_parser->lex_re_reparsing)
3882 const char *const key = PL_lex_inpat ? "qr" : "q";
3883 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3887 if (PL_lex_inwhat == OP_TRANS) {
3890 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3893 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3901 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3904 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3906 LEAVE_with_name("scan_const");
3911 * Returns TRUE if there's more to the expression (e.g., a subscript),
3914 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3916 * ->[ and ->{ return TRUE
3917 * { and [ outside a pattern are always subscripts, so return TRUE
3918 * if we're outside a pattern and it's not { or [, then return FALSE
3919 * if we're in a pattern and the first char is a {
3920 * {4,5} (any digits around the comma) returns FALSE
3921 * if we're in a pattern and the first char is a [
3923 * [SOMETHING] has a funky algorithm to decide whether it's a
3924 * character class or not. It has to deal with things like
3925 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3926 * anything else returns TRUE
3929 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3932 S_intuit_more(pTHX_ char *s)
3936 PERL_ARGS_ASSERT_INTUIT_MORE;
3938 if (PL_lex_brackets)
3940 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3942 if (*s != '{' && *s != '[')
3947 /* In a pattern, so maybe we have {n,m}. */
3949 if (regcurly(s, FALSE)) {
3955 /* On the other hand, maybe we have a character class */
3958 if (*s == ']' || *s == '^')
3961 /* this is terrifying, and it works */
3964 const char * const send = strchr(s,']');
3965 unsigned char un_char, last_un_char;
3966 char tmpbuf[sizeof PL_tokenbuf * 4];
3968 if (!send) /* has to be an expression */
3970 weight = 2; /* let's weigh the evidence */
3974 else if (isDIGIT(*s)) {
3976 if (isDIGIT(s[1]) && s[2] == ']')
3982 Zero(seen,256,char);
3984 for (; s < send; s++) {
3985 last_un_char = un_char;
3986 un_char = (unsigned char)*s;
3991 weight -= seen[un_char] * 10;
3992 if (isWORDCHAR_lazy_if(s+1,UTF)) {
3994 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3995 len = (int)strlen(tmpbuf);
3996 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3997 UTF ? SVf_UTF8 : 0, SVt_PV))
4002 else if (*s == '$' && s[1] &&
4003 strchr("[#!%*<>()-=",s[1])) {
4004 if (/*{*/ strchr("])} =",s[2]))
4013 if (strchr("wds]",s[1]))
4015 else if (seen[(U8)'\''] || seen[(U8)'"'])
4017 else if (strchr("rnftbxcav",s[1]))
4019 else if (isDIGIT(s[1])) {
4021 while (s[1] && isDIGIT(s[1]))
4031 if (strchr("aA01! ",last_un_char))
4033 if (strchr("zZ79~",s[1]))
4035 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4036 weight -= 5; /* cope with negative subscript */
4039 if (!isWORDCHAR(last_un_char)
4040 && !(last_un_char == '$' || last_un_char == '@'
4041 || last_un_char == '&')
4042 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4047 if (keyword(tmpbuf, d - tmpbuf, 0))
4050 if (un_char == last_un_char + 1)
4052 weight -= seen[un_char];
4057 if (weight >= 0) /* probably a character class */
4067 * Does all the checking to disambiguate
4069 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
4070 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4072 * First argument is the stuff after the first token, e.g. "bar".
4074 * Not a method if foo is a filehandle.
4075 * Not a method if foo is a subroutine prototyped to take a filehandle.
4076 * Not a method if it's really "Foo $bar"
4077 * Method if it's "foo $bar"
4078 * Not a method if it's really "print foo $bar"
4079 * Method if it's really "foo package::" (interpreted as package->foo)
4080 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4081 * Not a method if bar is a filehandle or package, but is quoted with
4086 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
4089 char *s = start + (*start == '$');
4090 char tmpbuf[sizeof PL_tokenbuf];
4097 PERL_ARGS_ASSERT_INTUIT_METHOD;
4099 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4101 if (cv && SvPOK(cv)) {
4102 const char *proto = CvPROTO(cv);
4104 while (*proto && (isSPACE(*proto) || *proto == ';'))
4111 if (*start == '$') {
4112 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
4113 isUPPER(*PL_tokenbuf))
4116 len = start - SvPVX(PL_linestr);
4120 start = SvPVX(PL_linestr) + len;
4124 return *s == '(' ? FUNCMETH : METHOD;
4127 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4128 /* start is the beginning of the possible filehandle/object,
4129 * and s is the end of it
4130 * tmpbuf is a copy of it (but with single quotes as double colons)
4133 if (!keyword(tmpbuf, len, 0)) {
4134 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4138 soff = s - SvPVX(PL_linestr);
4142 indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
4143 if (indirgv && GvCVu(indirgv))
4145 /* filehandle or package name makes it a method */
4146 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4148 soff = s - SvPVX(PL_linestr);
4151 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4152 return 0; /* no assumptions -- "=>" quotes bareword */
4154 start_force(PL_curforce);
4155 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
4156 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4157 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4159 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
4160 ( UTF ? SVf_UTF8 : 0 )));
4165 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
4167 return *s == '(' ? FUNCMETH : METHOD;
4173 /* Encoded script support. filter_add() effectively inserts a
4174 * 'pre-processing' function into the current source input stream.
4175 * Note that the filter function only applies to the current source file
4176 * (e.g., it will not affect files 'require'd or 'use'd by this one).
4178 * The datasv parameter (which may be NULL) can be used to pass
4179 * private data to this instance of the filter. The filter function
4180 * can recover the SV using the FILTER_DATA macro and use it to
4181 * store private buffers and state information.
4183 * The supplied datasv parameter is upgraded to a PVIO type
4184 * and the IoDIRP/IoANY field is used to store the function pointer,
4185 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4186 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4187 * private use must be set using malloc'd pointers.
4191 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4200 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4201 Perl_croak(aTHX_ "Source filters apply only to byte streams");
4203 if (!PL_rsfp_filters)
4204 PL_rsfp_filters = newAV();
4207 SvUPGRADE(datasv, SVt_PVIO);
4208 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4209 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4210 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4211 FPTR2DPTR(void *, IoANY(datasv)),
4212 SvPV_nolen(datasv)));
4213 av_unshift(PL_rsfp_filters, 1);
4214 av_store(PL_rsfp_filters, 0, datasv) ;
4216 !PL_parser->filtered
4217 && PL_parser->lex_flags & LEX_EVALBYTES
4218 && PL_bufptr < PL_bufend
4220 const char *s = PL_bufptr;
4221 while (s < PL_bufend) {
4223 SV *linestr = PL_parser->linestr;
4224 char *buf = SvPVX(linestr);
4225 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4226 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4227 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4228 STRLEN const linestart_pos = PL_parser->linestart - buf;
4229 STRLEN const last_uni_pos =
4230 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4231 STRLEN const last_lop_pos =
4232 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4233 av_push(PL_rsfp_filters, linestr);
4234 PL_parser->linestr =
4235 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4236 buf = SvPVX(PL_parser->linestr);
4237 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4238 PL_parser->bufptr = buf + bufptr_pos;
4239 PL_parser->oldbufptr = buf + oldbufptr_pos;
4240 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4241 PL_parser->linestart = buf + linestart_pos;
4242 if (PL_parser->last_uni)
4243 PL_parser->last_uni = buf + last_uni_pos;
4244 if (PL_parser->last_lop)
4245 PL_parser->last_lop = buf + last_lop_pos;
4246 SvLEN(linestr) = SvCUR(linestr);
4247 SvCUR(linestr) = s-SvPVX(linestr);
4248 PL_parser->filtered = 1;
4258 /* Delete most recently added instance of this filter function. */
4260 Perl_filter_del(pTHX_ filter_t funcp)
4265 PERL_ARGS_ASSERT_FILTER_DEL;
4268 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4269 FPTR2DPTR(void*, funcp)));
4271 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4273 /* if filter is on top of stack (usual case) just pop it off */
4274 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4275 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4276 sv_free(av_pop(PL_rsfp_filters));
4280 /* we need to search for the correct entry and clear it */
4281 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4285 /* Invoke the idxth filter function for the current rsfp. */
4286 /* maxlen 0 = read one text line */
4288 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4293 /* This API is bad. It should have been using unsigned int for maxlen.
4294 Not sure if we want to change the API, but if not we should sanity
4295 check the value here. */
4296 unsigned int correct_length = maxlen < 0 ? PERL_INT_MAX : maxlen;
4298 PERL_ARGS_ASSERT_FILTER_READ;
4300 if (!PL_parser || !PL_rsfp_filters)
4302 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
4303 /* Provide a default input filter to make life easy. */
4304 /* Note that we append to the line. This is handy. */
4305 DEBUG_P(PerlIO_printf(Perl_debug_log,
4306 "filter_read %d: from rsfp\n", idx));
4307 if (correct_length) {
4310 const int old_len = SvCUR(buf_sv);
4312 /* ensure buf_sv is large enough */
4313 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4314 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4315 correct_length)) <= 0) {
4316 if (PerlIO_error(PL_rsfp))
4317 return -1; /* error */
4319 return 0 ; /* end of file */
4321 SvCUR_set(buf_sv, old_len + len) ;
4322 SvPVX(buf_sv)[old_len + len] = '\0';
4325 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4326 if (PerlIO_error(PL_rsfp))
4327 return -1; /* error */
4329 return 0 ; /* end of file */
4332 return SvCUR(buf_sv);
4334 /* Skip this filter slot if filter has been deleted */
4335 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4336 DEBUG_P(PerlIO_printf(Perl_debug_log,
4337 "filter_read %d: skipped (filter deleted)\n",
4339 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4341 if (SvTYPE(datasv) != SVt_PVIO) {
4342 if (correct_length) {
4344 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4345 if (!remainder) return 0; /* eof */
4346 if (correct_length > remainder) correct_length = remainder;
4347 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4348 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4351 const char *s = SvEND(datasv);
4352 const char *send = SvPVX(datasv) + SvLEN(datasv);
4360 if (s == send) return 0; /* eof */
4361 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4362 SvCUR_set(datasv, s-SvPVX(datasv));
4364 return SvCUR(buf_sv);
4366 /* Get function pointer hidden within datasv */
4367 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4368 DEBUG_P(PerlIO_printf(Perl_debug_log,
4369 "filter_read %d: via function %p (%s)\n",
4370 idx, (void*)datasv, SvPV_nolen_const(datasv)));
4371 /* Call function. The function is expected to */
4372 /* call "FILTER_READ(idx+1, buf_sv)" first. */
4373 /* Return: <0:error, =0:eof, >0:not eof */
4374 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4378 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4382 PERL_ARGS_ASSERT_FILTER_GETS;
4384 #ifdef PERL_CR_FILTER
4385 if (!PL_rsfp_filters) {
4386 filter_add(S_cr_textfilter,NULL);
4389 if (PL_rsfp_filters) {
4391 SvCUR_set(sv, 0); /* start with empty line */
4392 if (FILTER_READ(0, sv, 0) > 0)
4393 return ( SvPVX(sv) ) ;
4398 return (sv_gets(sv, PL_rsfp, append));
4402 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4407 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4409 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4413 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4414 (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4416 return GvHV(gv); /* Foo:: */
4419 /* use constant CLASS => 'MyClass' */
4420 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4421 if (gv && GvCV(gv)) {
4422 SV * const sv = cv_const_sv(GvCV(gv));
4424 pkgname = SvPV_const(sv, len);
4427 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4431 * S_readpipe_override
4432 * Check whether readpipe() is overridden, and generates the appropriate
4433 * optree, provided sublex_start() is called afterwards.
4436 S_readpipe_override(pTHX)
4439 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
4440 pl_yylval.ival = OP_BACKTICK;
4442 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4444 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
4445 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
4446 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4448 COPLINE_SET_FROM_MULTI_END;
4449 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
4450 op_append_elem(OP_LIST,
4451 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4452 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4459 * The intent of this yylex wrapper is to minimize the changes to the
4460 * tokener when we aren't interested in collecting madprops. It remains
4461 * to be seen how successful this strategy will be...
4468 char *s = PL_bufptr;
4470 /* make sure PL_thiswhite is initialized */
4474 /* previous token ate up our whitespace? */
4475 if (!PL_lasttoke && PL_nextwhite) {
4476 PL_thiswhite = PL_nextwhite;
4480 /* isolate the token, and figure out where it is without whitespace */
4481 PL_realtokenstart = -1;
4485 assert(PL_curforce < 0);
4487 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
4488 if (!PL_thistoken) {
4489 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4490 PL_thistoken = newSVpvs("");
4492 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4493 PL_thistoken = newSVpvn(tstart, s - tstart);
4496 if (PL_thismad) /* install head */
4497 CURMAD('X', PL_thistoken);
4500 /* last whitespace of a sublex? */
4501 if (optype == ')' && PL_endwhite) {
4502 CURMAD('X', PL_endwhite);
4507 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
4508 if (!PL_thiswhite && !PL_endwhite && !optype) {
4509 sv_free(PL_thistoken);
4514 /* put off final whitespace till peg */
4515 if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
4516 PL_nextwhite = PL_thiswhite;
4519 else if (PL_thisopen) {
4520 CURMAD('q', PL_thisopen);
4522 sv_free(PL_thistoken);
4526 /* Store actual token text as madprop X */
4527 CURMAD('X', PL_thistoken);
4531 /* add preceding whitespace as madprop _ */
4532 CURMAD('_', PL_thiswhite);
4536 /* add quoted material as madprop = */
4537 CURMAD('=', PL_thisstuff);
4541 /* add terminating quote as madprop Q */
4542 CURMAD('Q', PL_thisclose);
4546 /* special processing based on optype */
4550 /* opval doesn't need a TOKEN since it can already store mp */
4560 if (pl_yylval.opval)
4561 append_madprops(PL_thismad, pl_yylval.opval, 0);
4569 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4582 /* remember any fake bracket that lexer is about to discard */
4583 if (PL_lex_brackets == 1 &&
4584 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4587 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4590 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4591 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4594 break; /* don't bother looking for trailing comment */
4603 /* attach a trailing comment to its statement instead of next token */
4607 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4609 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4611 if (*s == '\n' || *s == '#') {
4612 while (s < PL_bufend && *s != '\n')
4616 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4617 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4630 /* Create new token struct. Note: opvals return early above. */
4631 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4638 S_tokenize_use(pTHX_ int is_use, char *s) {
4641 PERL_ARGS_ASSERT_TOKENIZE_USE;
4643 if (PL_expect != XSTATE)
4644 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4645 is_use ? "use" : "no"));
4648 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4649 s = force_version(s, TRUE);
4650 if (*s == ';' || *s == '}'
4651 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4652 start_force(PL_curforce);
4653 NEXTVAL_NEXTTOKE.opval = NULL;
4656 else if (*s == 'v') {
4657 s = force_word(s,WORD,FALSE,TRUE);
4658 s = force_version(s, FALSE);
4662 s = force_word(s,WORD,FALSE,TRUE);
4663 s = force_version(s, FALSE);
4665 pl_yylval.ival = is_use;
4669 static const char* const exp_name[] =
4670 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4671 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4675 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4677 S_word_takes_any_delimeter(char *p, STRLEN len)
4679 return (len == 1 && strchr("msyq", p[0])) ||
4681 (p[0] == 't' && p[1] == 'r') ||
4682 (p[0] == 'q' && strchr("qwxr", p[1]))));
4686 S_check_scalar_slice(pTHX_ char *s)
4689 while (*s == ' ' || *s == '\t') s++;
4690 if (*s == 'q' && s[1] == 'w'
4691 && !isWORDCHAR_lazy_if(s+2,UTF))
4693 while (*s && (isWORDCHAR_lazy_if(s,UTF) || strchr(" \t$#+-'\"", *s)))
4694 s += UTF ? UTF8SKIP(s) : 1;
4695 if (*s == '}' || *s == ']')
4696 pl_yylval.ival = OPpSLICEWARNING;
4702 Works out what to call the token just pulled out of the input
4703 stream. The yacc parser takes care of taking the ops we return and
4704 stitching them into a tree.
4707 The type of the next token
4710 Switch based on the current state:
4711 - if we already built the token before, use it
4712 - if we have a case modifier in a string, deal with that
4713 - handle other cases of interpolation inside a string
4714 - scan the next line if we are inside a format
4715 In the normal state switch on the next character:
4717 if alphabetic, go to key lookup
4718 unrecoginized character - croak
4719 - 0/4/26: handle end-of-line or EOF
4720 - cases for whitespace
4721 - \n and #: handle comments and line numbers
4722 - various operators, brackets and sigils
4725 - 'v': vstrings (or go to key lookup)
4726 - 'x' repetition operator (or go to key lookup)
4727 - other ASCII alphanumerics (key lookup begins here):
4730 scan built-in keyword (but do nothing with it yet)
4731 check for statement label
4732 check for lexical subs
4733 goto just_a_word if there is one
4734 see whether built-in keyword is overridden
4735 switch on keyword number:
4736 - default: just_a_word:
4737 not a built-in keyword; handle bareword lookup
4738 disambiguate between method and sub call
4739 fall back to bareword
4740 - cases for built-in keywords
4745 #pragma segment Perl_yylex
4751 char *s = PL_bufptr;
4755 const bool saw_infix_sigil = PL_parser->saw_infix_sigil;
4759 /* orig_keyword, gvp, and gv are initialized here because
4760 * jump to the label just_a_word_zero can bypass their
4761 * initialization later. */
4762 I32 orig_keyword = 0;
4767 SV* tmp = newSVpvs("");
4768 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4769 (IV)CopLINE(PL_curcop),
4770 lex_state_names[PL_lex_state],
4771 exp_name[PL_expect],
4772 pv_display(tmp, s, strlen(s), 0, 60));
4776 switch (PL_lex_state) {
4778 case LEX_NORMAL: /* Some compilers will produce faster */
4779 case LEX_INTERPNORMAL: /* code if we comment these out. */
4783 /* when we've already built the next token, just pull it out of the queue */
4787 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4789 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4790 PL_nexttoke[PL_lasttoke].next_mad = 0;
4791 if (PL_thismad && PL_thismad->mad_key == '_') {
4792 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4793 PL_thismad->mad_val = 0;
4794 mad_free(PL_thismad);
4799 PL_lex_state = PL_lex_defer;
4800 PL_expect = PL_lex_expect;
4801 PL_lex_defer = LEX_NORMAL;
4802 if (!PL_nexttoke[PL_lasttoke].next_type)
4807 pl_yylval = PL_nextval[PL_nexttoke];
4809 PL_lex_state = PL_lex_defer;
4810 PL_expect = PL_lex_expect;
4811 PL_lex_defer = LEX_NORMAL;
4817 next_type = PL_nexttoke[PL_lasttoke].next_type;
4819 next_type = PL_nexttype[PL_nexttoke];
4821 if (next_type & (7<<24)) {
4822 if (next_type & (1<<24)) {
4823 if (PL_lex_brackets > 100)
4824 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4825 PL_lex_brackstack[PL_lex_brackets++] =
4826 (char) ((next_type >> 16) & 0xff);
4828 if (next_type & (2<<24))
4829 PL_lex_allbrackets++;
4830 if (next_type & (4<<24))
4831 PL_lex_allbrackets--;
4832 next_type &= 0xffff;
4834 return REPORT(next_type == 'p' ? pending_ident() : next_type);
4837 /* interpolated case modifiers like \L \U, including \Q and \E.
4838 when we get here, PL_bufptr is at the \
4840 case LEX_INTERPCASEMOD:
4842 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4844 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4845 PL_bufptr, PL_bufend, *PL_bufptr);
4847 /* handle \E or end of string */
4848 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4850 if (PL_lex_casemods) {
4851 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4852 PL_lex_casestack[PL_lex_casemods] = '\0';
4854 if (PL_bufptr != PL_bufend
4855 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4856 || oldmod == 'F')) {
4858 PL_lex_state = LEX_INTERPCONCAT;
4861 PL_thistoken = newSVpvs("\\E");
4864 PL_lex_allbrackets--;
4867 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4868 /* Got an unpaired \E */
4869 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4870 "Useless use of \\E");
4873 while (PL_bufptr != PL_bufend &&
4874 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4877 PL_thiswhite = newSVpvs("");
4878 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4883 if (PL_bufptr != PL_bufend)
4886 PL_lex_state = LEX_INTERPCONCAT;
4890 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4891 "### Saw case modifier\n"); });
4893 if (s[1] == '\\' && s[2] == 'E') {
4897 PL_thiswhite = newSVpvs("");
4898 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4902 PL_lex_state = LEX_INTERPCONCAT;
4907 if (!PL_madskills) /* when just compiling don't need correct */
4908 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4909 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
4910 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4911 (strchr(PL_lex_casestack, 'L')
4912 || strchr(PL_lex_casestack, 'U')
4913 || strchr(PL_lex_casestack, 'F'))) {
4914 PL_lex_casestack[--PL_lex_casemods] = '\0';
4915 PL_lex_allbrackets--;
4918 if (PL_lex_casemods > 10)
4919 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4920 PL_lex_casestack[PL_lex_casemods++] = *s;
4921 PL_lex_casestack[PL_lex_casemods] = '\0';
4922 PL_lex_state = LEX_INTERPCONCAT;
4923 start_force(PL_curforce);
4924 NEXTVAL_NEXTTOKE.ival = 0;
4925 force_next((2<<24)|'(');
4926 start_force(PL_curforce);
4928 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4930 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4932 NEXTVAL_NEXTTOKE.ival = OP_LC;
4934 NEXTVAL_NEXTTOKE.ival = OP_UC;
4936 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4938 NEXTVAL_NEXTTOKE.ival = OP_FC;
4940 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4942 SV* const tmpsv = newSVpvs("\\ ");
4943 /* replace the space with the character we want to escape
4945 SvPVX(tmpsv)[1] = *s;
4951 if (PL_lex_starts) {
4957 sv_free(PL_thistoken);
4958 PL_thistoken = newSVpvs("");
4961 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4962 if (PL_lex_casemods == 1 && PL_lex_inpat)
4971 case LEX_INTERPPUSH:
4972 return REPORT(sublex_push());
4974 case LEX_INTERPSTART:
4975 if (PL_bufptr == PL_bufend)
4976 return REPORT(sublex_done());
4977 DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4978 "### Interpolated variable\n"); });
4980 /* for /@a/, we leave the joining for the regex engine to do
4981 * (unless we're within \Q etc) */
4982 PL_lex_dojoin = (*PL_bufptr == '@'
4983 && (!PL_lex_inpat || PL_lex_casemods));
4984 PL_lex_state = LEX_INTERPNORMAL;
4985 if (PL_lex_dojoin) {
4986 start_force(PL_curforce);
4987 NEXTVAL_NEXTTOKE.ival = 0;
4989 start_force(PL_curforce);
4990 force_ident("\"", '$');
4991 start_force(PL_curforce);
4992 NEXTVAL_NEXTTOKE.ival = 0;
4994 start_force(PL_curforce);
4995 NEXTVAL_NEXTTOKE.ival = 0;
4996 force_next((2<<24)|'(');
4997 start_force(PL_curforce);
4998 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
5001 /* Convert (?{...}) and friends to 'do {...}' */
5002 if (PL_lex_inpat && *PL_bufptr == '(') {
5003 PL_parser->lex_shared->re_eval_start = PL_bufptr;
5005 if (*PL_bufptr != '{')
5007 start_force(PL_curforce);
5008 /* XXX probably need a CURMAD(something) here */
5009 PL_expect = XTERMBLOCK;
5013 if (PL_lex_starts++) {
5018 sv_free(PL_thistoken);
5019 PL_thistoken = newSVpvs("");
5022 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5023 if (!PL_lex_casemods && PL_lex_inpat)
5030 case LEX_INTERPENDMAYBE:
5031 if (intuit_more(PL_bufptr)) {
5032 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
5038 if (PL_lex_dojoin) {
5039 PL_lex_dojoin = FALSE;
5040 PL_lex_state = LEX_INTERPCONCAT;
5044 sv_free(PL_thistoken);
5045 PL_thistoken = newSVpvs("");
5048 PL_lex_allbrackets--;
5051 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
5052 && SvEVALED(PL_lex_repl))
5054 if (PL_bufptr != PL_bufend)
5055 Perl_croak(aTHX_ "Bad evalled substitution pattern");
5058 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
5059 re_eval_str. If the here-doc body’s length equals the previous
5060 value of re_eval_start, re_eval_start will now be null. So
5061 check re_eval_str as well. */
5062 if (PL_parser->lex_shared->re_eval_start
5063 || PL_parser->lex_shared->re_eval_str) {
5065 if (*PL_bufptr != ')')
5066 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
5068 /* having compiled a (?{..}) expression, return the original
5069 * text too, as a const */
5070 if (PL_parser->lex_shared->re_eval_str) {
5071 sv = PL_parser->lex_shared->re_eval_str;
5072 PL_parser->lex_shared->re_eval_str = NULL;
5074 PL_bufptr - PL_parser->lex_shared->re_eval_start);
5075 SvPV_shrink_to_cur(sv);
5077 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
5078 PL_bufptr - PL_parser->lex_shared->re_eval_start);
5079 start_force(PL_curforce);
5080 /* XXX probably need a CURMAD(something) here */
5081 NEXTVAL_NEXTTOKE.opval =
5082 (OP*)newSVOP(OP_CONST, 0,
5085 PL_parser->lex_shared->re_eval_start = NULL;
5091 case LEX_INTERPCONCAT:
5093 if (PL_lex_brackets)
5094 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
5095 (long) PL_lex_brackets);
5097 if (PL_bufptr == PL_bufend)
5098 return REPORT(sublex_done());
5100 /* m'foo' still needs to be parsed for possible (?{...}) */
5101 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
5102 SV *sv = newSVsv(PL_linestr);
5104 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5108 s = scan_const(PL_bufptr);
5110 PL_lex_state = LEX_INTERPCASEMOD;
5112 PL_lex_state = LEX_INTERPSTART;
5115 if (s != PL_bufptr) {
5116 start_force(PL_curforce);
5118 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
5120 NEXTVAL_NEXTTOKE = pl_yylval;
5123 if (PL_lex_starts++) {
5127 sv_free(PL_thistoken);
5128 PL_thistoken = newSVpvs("");
5131 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5132 if (!PL_lex_casemods && PL_lex_inpat)
5145 s = scan_formline(PL_bufptr);
5146 if (!PL_lex_formbrack)
5155 /* We really do *not* want PL_linestr ever becoming a COW. */
5156 assert (!SvIsCOW(PL_linestr));
5158 PL_oldoldbufptr = PL_oldbufptr;
5160 PL_parser->saw_infix_sigil = 0;
5165 sv_free(PL_thistoken);
5168 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5172 if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
5175 SV *dsv = newSVpvs_flags("", SVs_TEMP);
5176 const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s,
5178 SVs_TEMP | SVf_UTF8),
5179 10, UNI_DISPLAY_ISPRINT)
5180 : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
5181 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
5182 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
5183 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
5187 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %"UTF8f"<-- HERE near column %d", c,
5188 UTF8fARG(UTF, (s - d), d),
5193 goto fake_eof; /* emulate EOF on ^D or ^Z */
5199 if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
5202 if (PL_lex_brackets &&
5203 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
5204 yyerror((const char *)
5206 ? "Format not terminated"
5207 : "Missing right curly or square bracket"));
5209 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5210 "### Tokener got EOF\n");
5214 if (s++ < PL_bufend)
5215 goto retry; /* ignore stray nulls */
5218 if (!PL_in_eval && !PL_preambled) {
5219 PL_preambled = TRUE;
5225 /* Generate a string of Perl code to load the debugger.
5226 * If PERL5DB is set, it will return the contents of that,
5227 * otherwise a compile-time require of perl5db.pl. */
5229 const char * const pdb = PerlEnv_getenv("PERL5DB");
5232 sv_setpv(PL_linestr, pdb);
5233 sv_catpvs(PL_linestr,";");
5235 SETERRNO(0,SS_NORMAL);
5236 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
5238 PL_parser->preambling = CopLINE(PL_curcop);
5240 sv_setpvs(PL_linestr,"");
5241 if (PL_preambleav) {
5242 SV **svp = AvARRAY(PL_preambleav);
5243 SV **const end = svp + AvFILLp(PL_preambleav);
5245 sv_catsv(PL_linestr, *svp);
5247 sv_catpvs(PL_linestr, ";");
5249 sv_free(MUTABLE_SV(PL_preambleav));
5250 PL_preambleav = NULL;
5253 sv_catpvs(PL_linestr,
5254 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
5255 if (PL_minus_n || PL_minus_p) {
5256 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
5258 sv_catpvs(PL_linestr,"chomp;");
5261 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
5262 || *PL_splitstr == '"')
5263 && strchr(PL_splitstr + 1, *PL_splitstr))
5264 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
5266 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
5267 bytes can be used as quoting characters. :-) */
5268 const char *splits = PL_splitstr;
5269 sv_catpvs(PL_linestr, "our @F=split(q\0");
5272 if (*splits == '\\')
5273 sv_catpvn(PL_linestr, splits, 1);
5274 sv_catpvn(PL_linestr, splits, 1);
5275 } while (*splits++);
5276 /* This loop will embed the trailing NUL of
5277 PL_linestr as the last thing it does before
5279 sv_catpvs(PL_linestr, ");");
5283 sv_catpvs(PL_linestr,"our @F=split(' ');");
5286 sv_catpvs(PL_linestr, "\n");
5287 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5288 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5289 PL_last_lop = PL_last_uni = NULL;
5290 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5291 update_debugger_info(PL_linestr, NULL, 0);
5296 bof = PL_rsfp ? TRUE : FALSE;
5299 fake_eof = LEX_FAKE_EOF;
5301 PL_bufptr = PL_bufend;
5302 COPLINE_INC_WITH_HERELINES;
5303 if (!lex_next_chunk(fake_eof)) {
5304 CopLINE_dec(PL_curcop);
5306 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
5308 CopLINE_dec(PL_curcop);
5311 PL_realtokenstart = -1;
5314 /* If it looks like the start of a BOM or raw UTF-16,
5315 * check if it in fact is. */
5316 if (bof && PL_rsfp &&
5318 *(U8*)s == BOM_UTF8_FIRST_BYTE ||
5321 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
5322 bof = (offset == (Off_t)SvCUR(PL_linestr));
5323 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5324 /* offset may include swallowed CR */
5326 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
5329 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5330 s = swallow_bom((U8*)s);
5333 if (PL_parser->in_pod) {
5334 /* Incest with pod. */
5337 sv_catsv(PL_thiswhite, PL_linestr);
5339 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
5340 sv_setpvs(PL_linestr, "");
5341 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5342 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5343 PL_last_lop = PL_last_uni = NULL;
5344 PL_parser->in_pod = 0;
5347 if (PL_rsfp || PL_parser->filtered)
5349 } while (PL_parser->in_pod);
5350 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5351 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5352 PL_last_lop = PL_last_uni = NULL;
5353 if (CopLINE(PL_curcop) == 1) {
5354 while (s < PL_bufend && isSPACE(*s))
5356 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5360 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5364 if (*s == '#' && *(s+1) == '!')
5366 #ifdef ALTERNATE_SHEBANG
5368 static char const as[] = ALTERNATE_SHEBANG;
5369 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5370 d = s + (sizeof(as) - 1);
5372 #endif /* ALTERNATE_SHEBANG */
5381 while (*d && !isSPACE(*d))
5385 #ifdef ARG_ZERO_IS_SCRIPT
5386 if (ipathend > ipath) {
5388 * HP-UX (at least) sets argv[0] to the script name,
5389 * which makes $^X incorrect. And Digital UNIX and Linux,
5390 * at least, set argv[0] to the basename of the Perl
5391 * interpreter. So, having found "#!", we'll set it right.
5393 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5395 assert(SvPOK(x) || SvGMAGICAL(x));
5396 if (sv_eq(x, CopFILESV(PL_curcop))) {
5397 sv_setpvn(x, ipath, ipathend - ipath);
5403 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
5404 const char * const lstart = SvPV_const(x,llen);
5406 bstart += blen - llen;
5407 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5408 sv_setpvn(x, ipath, ipathend - ipath);
5413 TAINT_NOT; /* $^X is always tainted, but that's OK */
5415 #endif /* ARG_ZERO_IS_SCRIPT */
5420 d = instr(s,"perl -");
5422 d = instr(s,"perl");
5424 /* avoid getting into infinite loops when shebang
5425 * line contains "Perl" rather than "perl" */
5427 for (d = ipathend-4; d >= ipath; --d) {
5428 if ((*d == 'p' || *d == 'P')
5429 && !ibcmp(d, "perl", 4))
5439 #ifdef ALTERNATE_SHEBANG
5441 * If the ALTERNATE_SHEBANG on this system starts with a
5442 * character that can be part of a Perl expression, then if
5443 * we see it but not "perl", we're probably looking at the
5444 * start of Perl code, not a request to hand off to some
5445 * other interpreter. Similarly, if "perl" is there, but
5446 * not in the first 'word' of the line, we assume the line
5447 * contains the start of the Perl program.
5449 if (d && *s != '#') {
5450 const char *c = ipath;
5451 while (*c && !strchr("; \t\r\n\f\v#", *c))
5454 d = NULL; /* "perl" not in first word; ignore */
5456 *s = '#'; /* Don't try to parse shebang line */
5458 #endif /* ALTERNATE_SHEBANG */
5463 !instr(s,"indir") &&
5464 instr(PL_origargv[0],"perl"))
5471 while (s < PL_bufend && isSPACE(*s))
5473 if (s < PL_bufend) {
5474 Newx(newargv,PL_origargc+3,char*);
5476 while (s < PL_bufend && !isSPACE(*s))
5479 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5482 newargv = PL_origargv;
5485 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5487 Perl_croak(aTHX_ "Can't exec %s", ipath);
5490 while (*d && !isSPACE(*d))
5492 while (SPACE_OR_TAB(*d))
5496 const bool switches_done = PL_doswitches;
5497 const U32 oldpdb = PL_perldb;
5498 const bool oldn = PL_minus_n;
5499 const bool oldp = PL_minus_p;
5503 bool baduni = FALSE;
5505 const char *d2 = d1 + 1;
5506 if (parse_unicode_opts((const char **)&d2)
5510 if (baduni || *d1 == 'M' || *d1 == 'm') {
5511 const char * const m = d1;
5512 while (*d1 && !isSPACE(*d1))
5514 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5517 d1 = moreswitches(d1);
5519 if (PL_doswitches && !switches_done) {
5520 int argc = PL_origargc;
5521 char **argv = PL_origargv;
5524 } while (argc && argv[0][0] == '-' && argv[0][1]);
5525 init_argv_symbols(argc,argv);
5527 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5528 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5529 /* if we have already added "LINE: while (<>) {",
5530 we must not do it again */
5532 sv_setpvs(PL_linestr, "");
5533 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5534 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5535 PL_last_lop = PL_last_uni = NULL;
5536 PL_preambled = FALSE;
5537 if (PERLDB_LINE || PERLDB_SAVESRC)
5538 (void)gv_fetchfile(PL_origfilename);
5545 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5546 PL_lex_state = LEX_FORMLINE;
5547 start_force(PL_curforce);
5548 NEXTVAL_NEXTTOKE.ival = 0;
5549 force_next(FORMRBRACK);
5554 #ifdef PERL_STRICT_CR
5555 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5557 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5559 case ' ': case '\t': case '\f': case 013:
5561 PL_realtokenstart = -1;
5564 PL_thiswhite = newSVpvs("");
5565 sv_catpvn(PL_thiswhite, s, 1);
5573 PL_realtokenstart = -1;
5577 if (PL_lex_state != LEX_NORMAL ||
5578 (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5579 if (*s == '#' && s == PL_linestart && PL_in_eval
5580 && !PL_rsfp && !PL_parser->filtered) {
5581 /* handle eval qq[#line 1 "foo"\n ...] */
5582 CopLINE_dec(PL_curcop);
5585 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5587 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5591 const bool in_comment = *s == '#';
5593 while (d < PL_bufend && *d != '\n')
5597 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5598 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5602 PL_thiswhite = newSVpvn(s, d - s);
5605 if (in_comment && d == PL_bufend
5606 && PL_lex_state == LEX_INTERPNORMAL
5607 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5608 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5611 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5612 PL_lex_state = LEX_FORMLINE;
5613 start_force(PL_curforce);
5614 NEXTVAL_NEXTTOKE.ival = 0;
5615 force_next(FORMRBRACK);
5621 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5622 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5625 TOKEN(PEG); /* make sure any #! line is accessible */
5631 if (PL_madskills) d = s;
5632 while (s < PL_bufend && *s != '\n')
5640 else if (s > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5641 Perl_croak(aTHX_ "panic: input overflow");
5643 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5645 PL_thiswhite = newSVpvs("");
5646 if (CopLINE(PL_curcop) == 1) {
5647 sv_setpvs(PL_thiswhite, "");
5650 sv_catpvn(PL_thiswhite, d, s - d);
5657 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5665 while (s < PL_bufend && SPACE_OR_TAB(*s))
5668 if (strnEQ(s,"=>",2)) {
5669 s = force_word(PL_bufptr,WORD,FALSE,FALSE);
5670 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5671 OPERATOR('-'); /* unary minus */
5673 PL_last_uni = PL_oldbufptr;
5675 case 'r': ftst = OP_FTEREAD; break;
5676 case 'w': ftst = OP_FTEWRITE; break;
5677 case 'x': ftst = OP_FTEEXEC; break;
5678 case 'o': ftst = OP_FTEOWNED; break;
5679 case 'R': ftst = OP_FTRREAD; break;
5680 case 'W': ftst = OP_FTRWRITE; break;
5681 case 'X': ftst = OP_FTREXEC; break;
5682 case 'O': ftst = OP_FTROWNED; break;
5683 case 'e': ftst = OP_FTIS; break;
5684 case 'z': ftst = OP_FTZERO; break;
5685 case 's': ftst = OP_FTSIZE; break;
5686 case 'f': ftst = OP_FTFILE; break;
5687 case 'd': ftst = OP_FTDIR; break;
5688 case 'l': ftst = OP_FTLINK; break;
5689 case 'p': ftst = OP_FTPIPE; break;
5690 case 'S': ftst = OP_FTSOCK; break;
5691 case 'u': ftst = OP_FTSUID; break;
5692 case 'g': ftst = OP_FTSGID; break;
5693 case 'k': ftst = OP_FTSVTX; break;
5694 case 'b': ftst = OP_FTBLK; break;
5695 case 'c': ftst = OP_FTCHR; break;
5696 case 't': ftst = OP_FTTTY; break;
5697 case 'T': ftst = OP_FTTEXT; break;
5698 case 'B': ftst = OP_FTBINARY; break;
5699 case 'M': case 'A': case 'C':
5700 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5702 case 'M': ftst = OP_FTMTIME; break;
5703 case 'A': ftst = OP_FTATIME; break;
5704 case 'C': ftst = OP_FTCTIME; break;
5712 PL_last_lop_op = (OPCODE)ftst;
5713 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5714 "### Saw file test %c\n", (int)tmp);
5719 /* Assume it was a minus followed by a one-letter named
5720 * subroutine call (or a -bareword), then. */
5721 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5722 "### '-%c' looked like a file test but was not\n",
5729 const char tmp = *s++;
5732 if (PL_expect == XOPERATOR)
5737 else if (*s == '>') {
5740 if (isIDFIRST_lazy_if(s,UTF)) {
5741 s = force_word(s,METHOD,FALSE,TRUE);
5749 if (PL_expect == XOPERATOR) {
5750 if (*s == '=' && !PL_lex_allbrackets &&
5751 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5758 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5760 OPERATOR('-'); /* unary minus */
5766 const char tmp = *s++;
5769 if (PL_expect == XOPERATOR)
5774 if (PL_expect == XOPERATOR) {
5775 if (*s == '=' && !PL_lex_allbrackets &&
5776 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5783 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5790 if (PL_expect != XOPERATOR) {
5791 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5792 PL_expect = XOPERATOR;
5793 force_ident(PL_tokenbuf, '*');
5801 if (*s == '=' && !PL_lex_allbrackets &&
5802 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5808 if (*s == '=' && !PL_lex_allbrackets &&
5809 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5813 PL_parser->saw_infix_sigil = 1;
5818 if (PL_expect == XOPERATOR) {
5819 if (s[1] == '=' && !PL_lex_allbrackets &&
5820 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5823 PL_parser->saw_infix_sigil = 1;
5826 PL_tokenbuf[0] = '%';
5827 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5828 sizeof PL_tokenbuf - 1, FALSE);
5830 if (!PL_tokenbuf[1]) {
5833 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5835 PL_tokenbuf[0] = '@';
5837 /* Warn about % where they meant $. */
5838 if (*s == '[' || *s == '{') {
5839 if (ckWARN(WARN_SYNTAX)) {
5840 S_check_scalar_slice(aTHX_ s);
5844 PL_expect = XOPERATOR;
5845 force_ident_maybe_lex('%');
5849 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5850 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5855 if (PL_lex_brackets > 100)
5856 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5857 PL_lex_brackstack[PL_lex_brackets++] = 0;
5858 PL_lex_allbrackets++;
5860 const char tmp = *s++;
5865 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5867 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5870 Perl_ck_warner_d(aTHX_
5871 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5872 "Smartmatch is experimental");
5878 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5885 goto just_a_word_zero_gv;
5888 switch (PL_expect) {
5894 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5896 PL_bufptr = s; /* update in case we back off */
5899 "Use of := for an empty attribute list is not allowed");
5906 PL_expect = XTERMBLOCK;
5909 stuffstart = s - SvPVX(PL_linestr) - 1;
5913 while (isIDFIRST_lazy_if(s,UTF)) {
5916 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5917 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5918 if (tmp < 0) tmp = -tmp;
5933 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5935 d = scan_str(d,TRUE,TRUE,FALSE, FALSE);
5936 COPLINE_SET_FROM_MULTI_END;
5938 /* MUST advance bufptr here to avoid bogus
5939 "at end of line" context messages from yyerror().
5941 PL_bufptr = s + len;
5942 yyerror("Unterminated attribute parameter in attribute list");
5946 return REPORT(0); /* EOF indicator */
5950 sv_catsv(sv, PL_lex_stuff);
5951 attrs = op_append_elem(OP_LIST, attrs,
5952 newSVOP(OP_CONST, 0, sv));
5953 SvREFCNT_dec(PL_lex_stuff);
5954 PL_lex_stuff = NULL;
5957 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5959 if (PL_in_my == KEY_our) {
5960 deprecate(":unique");
5963 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5966 /* NOTE: any CV attrs applied here need to be part of
5967 the CVf_BUILTIN_ATTRS define in cv.h! */
5968 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5970 CvLVALUE_on(PL_compcv);
5972 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5974 deprecate(":locked");
5976 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5978 CvMETHOD_on(PL_compcv);
5980 /* After we've set the flags, it could be argued that
5981 we don't need to do the attributes.pm-based setting
5982 process, and shouldn't bother appending recognized
5983 flags. To experiment with that, uncomment the
5984 following "else". (Note that's already been
5985 uncommented. That keeps the above-applied built-in
5986 attributes from being intercepted (and possibly
5987 rejected) by a package's attribute routines, but is
5988 justified by the performance win for the common case
5989 of applying only built-in attributes.) */
5991 attrs = op_append_elem(OP_LIST, attrs,
5992 newSVOP(OP_CONST, 0,
5996 if (*s == ':' && s[1] != ':')
5999 break; /* require real whitespace or :'s */
6000 /* XXX losing whitespace on sequential attributes here */
6004 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
6005 if (*s != ';' && *s != '}' && *s != tmp
6006 && (tmp != '=' || *s != ')')) {
6007 const char q = ((*s == '\'') ? '"' : '\'');
6008 /* If here for an expression, and parsed no attrs, back
6010 if (tmp == '=' && !attrs) {
6014 /* MUST advance bufptr here to avoid bogus "at end of line"
6015 context messages from yyerror().
6018 yyerror( (const char *)
6020 ? Perl_form(aTHX_ "Invalid separator character "
6021 "%c%c%c in attribute list", q, *s, q)
6022 : "Unterminated attribute list" ) );
6030 start_force(PL_curforce);
6031 NEXTVAL_NEXTTOKE.opval = attrs;
6032 CURMAD('_', PL_nextwhite);
6037 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
6038 (s - SvPVX(PL_linestr)) - stuffstart);
6043 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
6047 PL_lex_allbrackets--;
6051 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6052 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
6056 PL_lex_allbrackets++;
6059 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6065 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6068 PL_lex_allbrackets--;
6074 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6077 if (PL_lex_brackets <= 0)
6078 yyerror("Unmatched right square bracket");
6081 PL_lex_allbrackets--;
6082 if (PL_lex_state == LEX_INTERPNORMAL) {
6083 if (PL_lex_brackets == 0) {
6084 if (*s == '-' && s[1] == '>')
6085 PL_lex_state = LEX_INTERPENDMAYBE;
6086 else if (*s != '[' && *s != '{')
6087 PL_lex_state = LEX_INTERPEND;
6094 if (PL_lex_brackets > 100) {
6095 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6097 switch (PL_expect) {
6099 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6100 PL_lex_allbrackets++;
6101 OPERATOR(HASHBRACK);
6103 while (s < PL_bufend && SPACE_OR_TAB(*s))
6106 PL_tokenbuf[0] = '\0';
6107 if (d < PL_bufend && *d == '-') {
6108 PL_tokenbuf[0] = '-';
6110 while (d < PL_bufend && SPACE_OR_TAB(*d))
6113 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
6114 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6116 while (d < PL_bufend && SPACE_OR_TAB(*d))
6119 const char minus = (PL_tokenbuf[0] == '-');
6120 s = force_word(s + minus, WORD, FALSE, TRUE);
6128 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6129 PL_lex_allbrackets++;
6134 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6135 PL_lex_allbrackets++;
6140 if (PL_oldoldbufptr == PL_last_lop)
6141 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6143 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6144 PL_lex_allbrackets++;
6147 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6149 /* This hack is to get the ${} in the message. */
6151 yyerror("syntax error");
6154 OPERATOR(HASHBRACK);
6156 /* This hack serves to disambiguate a pair of curlies
6157 * as being a block or an anon hash. Normally, expectation
6158 * determines that, but in cases where we're not in a
6159 * position to expect anything in particular (like inside
6160 * eval"") we have to resolve the ambiguity. This code
6161 * covers the case where the first term in the curlies is a
6162 * quoted string. Most other cases need to be explicitly
6163 * disambiguated by prepending a "+" before the opening
6164 * curly in order to force resolution as an anon hash.
6166 * XXX should probably propagate the outer expectation
6167 * into eval"" to rely less on this hack, but that could
6168 * potentially break current behavior of eval"".
6172 if (*s == '\'' || *s == '"' || *s == '`') {
6173 /* common case: get past first string, handling escapes */
6174 for (t++; t < PL_bufend && *t != *s;)
6175 if (*t++ == '\\' && (*t == '\\' || *t == *s))
6179 else if (*s == 'q') {
6182 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6183 && !isWORDCHAR(*t))))
6185 /* skip q//-like construct */
6187 char open, close, term;
6190 while (t < PL_bufend && isSPACE(*t))
6192 /* check for q => */
6193 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6194 OPERATOR(HASHBRACK);
6198 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
6202 for (t++; t < PL_bufend; t++) {
6203 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6205 else if (*t == open)
6209 for (t++; t < PL_bufend; t++) {
6210 if (*t == '\\' && t+1 < PL_bufend)
6212 else if (*t == close && --brackets <= 0)
6214 else if (*t == open)
6221 /* skip plain q word */
6222 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
6225 else if (isWORDCHAR_lazy_if(t,UTF)) {
6227 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
6230 while (t < PL_bufend && isSPACE(*t))
6232 /* if comma follows first term, call it an anon hash */
6233 /* XXX it could be a comma expression with loop modifiers */
6234 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6235 || (*t == '=' && t[1] == '>')))
6236 OPERATOR(HASHBRACK);
6237 if (PL_expect == XREF)
6240 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6246 pl_yylval.ival = CopLINE(PL_curcop);
6247 if (isSPACE(*s) || *s == '#')
6248 PL_copline = NOLINE; /* invalidate current command line number */
6249 TOKEN(formbrack ? '=' : '{');
6251 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6255 if (PL_lex_brackets <= 0)
6256 yyerror("Unmatched right curly bracket");
6258 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6259 PL_lex_allbrackets--;
6260 if (PL_lex_state == LEX_INTERPNORMAL) {
6261 if (PL_lex_brackets == 0) {
6262 if (PL_expect & XFAKEBRACK) {
6263 PL_expect &= XENUMMASK;
6264 PL_lex_state = LEX_INTERPEND;
6269 PL_thiswhite = newSVpvs("");
6270 sv_catpvs(PL_thiswhite,"}");
6273 return yylex(); /* ignore fake brackets */
6275 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6276 && SvEVALED(PL_lex_repl))
6277 PL_lex_state = LEX_INTERPEND;
6278 else if (*s == '-' && s[1] == '>')
6279 PL_lex_state = LEX_INTERPENDMAYBE;
6280 else if (*s != '[' && *s != '{')
6281 PL_lex_state = LEX_INTERPEND;
6284 if (PL_expect & XFAKEBRACK) {
6285 PL_expect &= XENUMMASK;
6287 return yylex(); /* ignore fake brackets */
6289 start_force(PL_curforce);
6291 curmad('X', newSVpvn(s-1,1));
6292 CURMAD('_', PL_thiswhite);
6294 force_next(formbrack ? '.' : '}');
6295 if (formbrack) LEAVE;
6297 if (PL_madskills && !PL_thistoken)
6298 PL_thistoken = newSVpvs("");
6300 if (formbrack == 2) { /* means . where arguments were expected */
6301 start_force(PL_curforce);
6309 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6310 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6317 if (PL_expect == XOPERATOR) {
6318 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
6319 && isIDFIRST_lazy_if(s,UTF))
6321 CopLINE_dec(PL_curcop);
6322 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6323 CopLINE_inc(PL_curcop);
6325 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6326 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6330 PL_parser->saw_infix_sigil = 1;
6334 PL_tokenbuf[0] = '&';
6335 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf + 1,
6336 sizeof PL_tokenbuf - 1, TRUE);
6337 if (PL_tokenbuf[1]) {
6338 PL_expect = XOPERATOR;
6339 force_ident_maybe_lex('&');
6343 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6349 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6350 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6357 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6358 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6366 const char tmp = *s++;
6368 if (!PL_lex_allbrackets &&
6369 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6376 if (!PL_lex_allbrackets &&
6377 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
6385 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6386 && strchr("+-*/%.^&|<",tmp))
6387 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6388 "Reversed %c= operator",(int)tmp);
6390 if (PL_expect == XSTATE && isALPHA(tmp) &&
6391 (s == PL_linestart+1 || s[-2] == '\n') )
6393 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6394 || PL_lex_state != LEX_NORMAL) {
6399 if (strnEQ(s,"=cut",4)) {
6415 PL_thiswhite = newSVpvs("");
6416 sv_catpvn(PL_thiswhite, PL_linestart,
6417 PL_bufend - PL_linestart);
6421 PL_parser->in_pod = 1;
6425 if (PL_expect == XBLOCK) {
6427 #ifdef PERL_STRICT_CR
6428 while (SPACE_OR_TAB(*t))
6430 while (SPACE_OR_TAB(*t) || *t == '\r')
6433 if (*t == '\n' || *t == '#') {
6436 SAVEI8(PL_parser->form_lex_state);
6437 SAVEI32(PL_lex_formbrack);
6438 PL_parser->form_lex_state = PL_lex_state;
6439 PL_lex_formbrack = PL_lex_brackets + 1;
6443 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6452 const char tmp = *s++;
6454 /* was this !=~ where !~ was meant?
6455 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6457 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6458 const char *t = s+1;
6460 while (t < PL_bufend && isSPACE(*t))
6463 if (*t == '/' || *t == '?' ||
6464 ((*t == 'm' || *t == 's' || *t == 'y')
6465 && !isWORDCHAR(t[1])) ||
6466 (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6467 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6468 "!=~ should be !~");
6470 if (!PL_lex_allbrackets &&
6471 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6483 if (PL_expect != XOPERATOR) {
6484 if (s[1] != '<' && !strchr(s,'>'))
6487 s = scan_heredoc(s);
6489 s = scan_inputsymbol(s);
6490 PL_expect = XOPERATOR;
6491 TOKEN(sublex_start());
6497 if (*s == '=' && !PL_lex_allbrackets &&
6498 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6502 SHop(OP_LEFT_SHIFT);
6507 if (!PL_lex_allbrackets &&
6508 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6515 if (!PL_lex_allbrackets &&
6516 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6524 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6532 const char tmp = *s++;
6534 if (*s == '=' && !PL_lex_allbrackets &&
6535 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6539 SHop(OP_RIGHT_SHIFT);
6541 else if (tmp == '=') {
6542 if (!PL_lex_allbrackets &&
6543 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6551 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6560 if (PL_expect == XOPERATOR) {
6561 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6562 return deprecate_commaless_var_list();
6566 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6567 PL_tokenbuf[0] = '@';
6568 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6569 sizeof PL_tokenbuf - 1, FALSE);
6570 if (PL_expect == XOPERATOR)
6571 no_op("Array length", s);
6572 if (!PL_tokenbuf[1])
6574 PL_expect = XOPERATOR;
6575 force_ident_maybe_lex('#');
6579 PL_tokenbuf[0] = '$';
6580 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6581 sizeof PL_tokenbuf - 1, FALSE);
6582 if (PL_expect == XOPERATOR)
6584 if (!PL_tokenbuf[1]) {
6586 yyerror("Final $ should be \\$ or $name");
6592 const char tmp = *s;
6593 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6596 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6597 && intuit_more(s)) {
6599 PL_tokenbuf[0] = '@';
6600 if (ckWARN(WARN_SYNTAX)) {
6603 while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
6606 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6607 while (t < PL_bufend && *t != ']')
6609 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6610 "Multidimensional syntax %.*s not supported",
6611 (int)((t - PL_bufptr) + 1), PL_bufptr);
6615 else if (*s == '{') {
6617 PL_tokenbuf[0] = '%';
6618 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6619 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6621 char tmpbuf[sizeof PL_tokenbuf];
6624 } while (isSPACE(*t));
6625 if (isIDFIRST_lazy_if(t,UTF)) {
6627 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6632 && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6633 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6634 "You need to quote \"%"UTF8f"\"",
6635 UTF8fARG(UTF, len, tmpbuf));
6641 PL_expect = XOPERATOR;
6642 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6643 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6644 if (!islop || PL_last_lop_op == OP_GREPSTART)
6645 PL_expect = XOPERATOR;
6646 else if (strchr("$@\"'`q", *s))
6647 PL_expect = XTERM; /* e.g. print $fh "foo" */
6648 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6649 PL_expect = XTERM; /* e.g. print $fh &sub */
6650 else if (isIDFIRST_lazy_if(s,UTF)) {
6651 char tmpbuf[sizeof PL_tokenbuf];
6653 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6654 if ((t2 = keyword(tmpbuf, len, 0))) {
6655 /* binary operators exclude handle interpretations */
6667 PL_expect = XTERM; /* e.g. print $fh length() */
6672 PL_expect = XTERM; /* e.g. print $fh subr() */
6675 else if (isDIGIT(*s))
6676 PL_expect = XTERM; /* e.g. print $fh 3 */
6677 else if (*s == '.' && isDIGIT(s[1]))
6678 PL_expect = XTERM; /* e.g. print $fh .3 */
6679 else if ((*s == '?' || *s == '-' || *s == '+')
6680 && !isSPACE(s[1]) && s[1] != '=')
6681 PL_expect = XTERM; /* e.g. print $fh -1 */
6682 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6684 PL_expect = XTERM; /* e.g. print $fh /.../
6685 XXX except DORDOR operator
6687 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6689 PL_expect = XTERM; /* print $fh <<"EOF" */
6692 force_ident_maybe_lex('$');
6696 if (PL_expect == XOPERATOR)
6698 PL_tokenbuf[0] = '@';
6699 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6701 if (!PL_tokenbuf[1]) {
6704 if (PL_lex_state == LEX_NORMAL)
6706 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6708 PL_tokenbuf[0] = '%';
6710 /* Warn about @ where they meant $. */
6711 if (*s == '[' || *s == '{') {
6712 if (ckWARN(WARN_SYNTAX)) {
6713 S_check_scalar_slice(aTHX_ s);
6717 PL_expect = XOPERATOR;
6718 force_ident_maybe_lex('@');
6721 case '/': /* may be division, defined-or, or pattern */
6722 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6723 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6724 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6729 case '?': /* may either be conditional or pattern */
6730 if (PL_expect == XOPERATOR) {
6733 if (!PL_lex_allbrackets &&
6734 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6738 PL_lex_allbrackets++;
6744 /* A // operator. */
6745 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6746 (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6747 LEX_FAKEEOF_LOGIC)) {
6755 if (*s == '=' && !PL_lex_allbrackets &&
6756 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6765 /* Disable warning on "study /blah/" */
6766 if (PL_oldoldbufptr == PL_last_uni
6767 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6768 || memNE(PL_last_uni, "study", 5)
6769 || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
6773 deprecate("?PATTERN? without explicit operator");
6774 s = scan_pat(s,OP_MATCH);
6775 TERM(sublex_start());
6779 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6780 #ifdef PERL_STRICT_CR
6783 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6785 && (s == PL_linestart || s[-1] == '\n') )
6788 formbrack = 2; /* dot seen where arguments expected */
6791 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6795 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6798 if (!PL_lex_allbrackets &&
6799 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6806 pl_yylval.ival = OPf_SPECIAL;
6812 if (*s == '=' && !PL_lex_allbrackets &&
6813 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6820 case '0': case '1': case '2': case '3': case '4':
6821 case '5': case '6': case '7': case '8': case '9':
6822 s = scan_num(s, &pl_yylval);
6823 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6824 if (PL_expect == XOPERATOR)
6829 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
6830 COPLINE_SET_FROM_MULTI_END;
6831 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6832 if (PL_expect == XOPERATOR) {
6833 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6834 return deprecate_commaless_var_list();
6841 pl_yylval.ival = OP_CONST;
6842 TERM(sublex_start());
6845 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
6848 printbuf("### Saw string before %s\n", s);
6850 PerlIO_printf(Perl_debug_log,
6851 "### Saw unterminated string\n");
6853 if (PL_expect == XOPERATOR) {
6854 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6855 return deprecate_commaless_var_list();
6862 pl_yylval.ival = OP_CONST;
6863 /* FIXME. I think that this can be const if char *d is replaced by
6864 more localised variables. */
6865 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6866 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6867 pl_yylval.ival = OP_STRINGIFY;
6871 if (pl_yylval.ival == OP_CONST)
6872 COPLINE_SET_FROM_MULTI_END;
6873 TERM(sublex_start());
6876 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
6877 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6878 if (PL_expect == XOPERATOR)
6879 no_op("Backticks",s);
6882 readpipe_override();
6883 TERM(sublex_start());
6887 if (PL_lex_inwhat && isDIGIT(*s))
6888 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6890 if (PL_expect == XOPERATOR)
6891 no_op("Backslash",s);
6895 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6896 char *start = s + 2;
6897 while (isDIGIT(*start) || *start == '_')
6899 if (*start == '.' && isDIGIT(start[1])) {
6900 s = scan_num(s, &pl_yylval);
6903 else if ((*start == ':' && start[1] == ':')
6904 || (PL_expect == XSTATE && *start == ':'))
6906 else if (PL_expect == XSTATE) {
6908 while (d < PL_bufend && isSPACE(*d)) d++;
6909 if (*d == ':') goto keylookup;
6911 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6912 if (!isALPHA(*start) && (PL_expect == XTERM
6913 || PL_expect == XREF || PL_expect == XSTATE
6914 || PL_expect == XTERMORDORDOR)) {
6915 GV *const gv = gv_fetchpvn_flags(s, start - s,
6916 UTF ? SVf_UTF8 : 0, SVt_PVCV);
6918 s = scan_num(s, &pl_yylval);
6925 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6978 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6980 /* Some keywords can be followed by any delimiter, including ':' */
6981 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6983 /* x::* is just a word, unless x is "CORE" */
6984 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6988 while (d < PL_bufend && isSPACE(*d))
6989 d++; /* no comments skipped here, or s### is misparsed */
6991 /* Is this a word before a => operator? */
6992 if (*d == '=' && d[1] == '>') {
6996 = (OP*)newSVOP(OP_CONST, 0,
6997 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6998 pl_yylval.opval->op_private = OPpCONST_BARE;
7002 /* Check for plugged-in keyword */
7006 char *saved_bufptr = PL_bufptr;
7008 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
7010 if (result == KEYWORD_PLUGIN_DECLINE) {
7011 /* not a plugged-in keyword */
7012 PL_bufptr = saved_bufptr;
7013 } else if (result == KEYWORD_PLUGIN_STMT) {
7014 pl_yylval.opval = o;
7017 return REPORT(PLUGSTMT);
7018 } else if (result == KEYWORD_PLUGIN_EXPR) {
7019 pl_yylval.opval = o;
7021 PL_expect = XOPERATOR;
7022 return REPORT(PLUGEXPR);
7024 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
7029 /* Check for built-in keyword */
7030 tmp = keyword(PL_tokenbuf, len, 0);
7032 /* Is this a label? */
7033 if (!anydelim && PL_expect == XSTATE
7034 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
7036 pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
7037 pl_yylval.pval[len] = '\0';
7038 pl_yylval.pval[len+1] = UTF ? 1 : 0;
7043 /* Check for lexical sub */
7044 if (PL_expect != XOPERATOR) {
7045 char tmpbuf[sizeof PL_tokenbuf + 1];
7047 Copy(PL_tokenbuf, tmpbuf+1, len, char);
7048 off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
7049 if (off != NOT_IN_PAD) {
7050 assert(off); /* we assume this is boolean-true below */
7051 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
7052 HV * const stash = PAD_COMPNAME_OURSTASH(off);
7053 HEK * const stashname = HvNAME_HEK(stash);
7054 sv = newSVhek(stashname);
7055 sv_catpvs(sv, "::");
7056 sv_catpvn_flags(sv, PL_tokenbuf, len,
7057 (UTF ? SV_CATUTF8 : SV_CATBYTES));
7058 gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
7068 rv2cv_op = newOP(OP_PADANY, 0);
7069 rv2cv_op->op_targ = off;
7070 cv = find_lexical_cv(off);
7078 if (tmp < 0) { /* second-class keyword? */
7079 GV *ogv = NULL; /* override (winner) */
7080 GV *hgv = NULL; /* hidden (loser) */
7081 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
7083 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7084 UTF ? SVf_UTF8 : 0, SVt_PVCV)) &&
7087 if (GvIMPORTED_CV(gv))
7089 else if (! CvMETHOD(cv))
7093 (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
7094 UTF ? -(I32)len : (I32)len, FALSE)) &&
7095 (gv = *gvp) && isGV_with_GP(gv) &&
7096 GvCVu(gv) && GvIMPORTED_CV(gv))
7103 tmp = 0; /* overridden by import or by GLOBAL */
7106 && -tmp==KEY_lock /* XXX generalizable kludge */
7109 tmp = 0; /* any sub overrides "weak" keyword */
7111 else { /* no override */
7113 if (tmp == KEY_dump) {
7114 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
7115 "dump() better written as CORE::dump()");
7119 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
7120 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7121 "Ambiguous call resolved as CORE::%s(), "
7122 "qualify as such or use &",
7127 if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
7128 && (!anydelim || *s != '#')) {
7129 /* no override, and not s### either; skipspace is safe here
7130 * check for => on following line */
7132 STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
7133 STRLEN soff = s - SvPVX(PL_linestr);
7134 s = skipspace_flags(s, LEX_NO_INCLINE);
7135 arrow = *s == '=' && s[1] == '>';
7136 PL_bufptr = SvPVX(PL_linestr) + bufoff;
7137 s = SvPVX(PL_linestr) + soff;
7145 default: /* not a keyword */
7146 /* Trade off - by using this evil construction we can pull the
7147 variable gv into the block labelled keylookup. If not, then
7148 we have to give it function scope so that the goto from the
7149 earlier ':' case doesn't bypass the initialisation. */
7151 just_a_word_zero_gv:
7163 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7164 const char penultchar =
7165 lastchar && PL_bufptr - 2 >= PL_linestart
7169 SV *nextPL_nextwhite = 0;
7173 /* Get the rest if it looks like a package qualifier */
7175 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7177 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7180 Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
7181 UTF8fARG(UTF, len, PL_tokenbuf),
7182 *s == '\'' ? "'" : "::");
7187 if (PL_expect == XOPERATOR) {
7188 if (PL_bufptr == PL_linestart) {
7189 CopLINE_dec(PL_curcop);
7190 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7191 CopLINE_inc(PL_curcop);
7194 no_op("Bareword",s);
7197 /* Look for a subroutine with this name in current package,
7198 unless this is a lexical sub, or name is "Foo::",
7199 in which case Foo is a bareword
7200 (and a package name). */
7202 if (len > 2 && !PL_madskills &&
7203 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
7205 if (ckWARN(WARN_BAREWORD)
7206 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7207 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7208 "Bareword \"%"UTF8f"\" refers to nonexistent package",
7209 UTF8fARG(UTF, len, PL_tokenbuf));
7211 PL_tokenbuf[len] = '\0';
7217 /* Mustn't actually add anything to a symbol table.
7218 But also don't want to "initialise" any placeholder
7219 constants that might already be there into full
7220 blown PVGVs with attached PVCV. */
7221 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7222 GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
7228 /* if we saw a global override before, get the right name */
7231 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
7232 len ? len : strlen(PL_tokenbuf));
7234 SV * const tmp_sv = sv;
7235 sv = newSVpvs("CORE::GLOBAL::");
7236 sv_catsv(sv, tmp_sv);
7237 SvREFCNT_dec(tmp_sv);
7241 if (PL_madskills && !PL_thistoken) {
7242 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
7243 PL_thistoken = newSVpvn(start,s - start);
7244 PL_realtokenstart = s - SvPVX(PL_linestr);
7248 /* Presume this is going to be a bareword of some sort. */
7250 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
7251 pl_yylval.opval->op_private = OPpCONST_BARE;
7253 /* And if "Foo::", then that's what it certainly is. */
7259 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
7260 const_op->op_private = OPpCONST_BARE;
7261 rv2cv_op = newCVREF(0, const_op);
7262 cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0);
7265 /* See if it's the indirect object for a list operator. */
7267 if (PL_oldoldbufptr &&
7268 PL_oldoldbufptr < PL_bufptr &&
7269 (PL_oldoldbufptr == PL_last_lop
7270 || PL_oldoldbufptr == PL_last_uni) &&
7271 /* NO SKIPSPACE BEFORE HERE! */
7272 (PL_expect == XREF ||
7273 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
7275 bool immediate_paren = *s == '(';
7277 /* (Now we can afford to cross potential line boundary.) */
7278 s = SKIPSPACE2(s,nextPL_nextwhite);
7280 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
7283 /* Two barewords in a row may indicate method call. */
7285 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
7286 (tmp = intuit_method(s, gv, cv))) {
7288 if (tmp == METHOD && !PL_lex_allbrackets &&
7289 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7290 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7294 /* If not a declared subroutine, it's an indirect object. */
7295 /* (But it's an indir obj regardless for sort.) */
7296 /* Also, if "_" follows a filetest operator, it's a bareword */
7299 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
7301 (PL_last_lop_op != OP_MAPSTART &&
7302 PL_last_lop_op != OP_GREPSTART))))
7303 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7304 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
7307 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7312 PL_expect = XOPERATOR;
7315 s = SKIPSPACE2(s,nextPL_nextwhite);
7316 PL_nextwhite = nextPL_nextwhite;
7321 /* Is this a word before a => operator? */
7322 if (*s == '=' && s[1] == '>' && !pkgname) {
7325 /* This is our own scalar, created a few lines above,
7327 SvREADONLY_off(cSVOPx(pl_yylval.opval)->op_sv);
7328 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
7329 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
7330 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
7331 SvREADONLY_on(cSVOPx(pl_yylval.opval)->op_sv);
7335 /* If followed by a paren, it's certainly a subroutine. */
7340 while (SPACE_OR_TAB(*d))
7342 if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
7349 PL_nextwhite = PL_thiswhite;
7352 start_force(PL_curforce);
7354 NEXTVAL_NEXTTOKE.opval =
7355 off ? rv2cv_op : pl_yylval.opval;
7356 PL_expect = XOPERATOR;
7359 PL_nextwhite = nextPL_nextwhite;
7360 curmad('X', PL_thistoken);
7361 PL_thistoken = newSVpvs("");
7365 op_free(pl_yylval.opval), force_next(PRIVATEREF);
7366 else op_free(rv2cv_op), force_next(WORD);
7371 /* If followed by var or block, call it a method (unless sub) */
7373 if ((*s == '$' || *s == '{') && !cv) {
7375 PL_last_lop = PL_oldbufptr;
7376 PL_last_lop_op = OP_METHOD;
7377 if (!PL_lex_allbrackets &&
7378 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7379 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7383 /* If followed by a bareword, see if it looks like indir obj. */
7386 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
7387 && (tmp = intuit_method(s, gv, cv))) {
7389 if (tmp == METHOD && !PL_lex_allbrackets &&
7390 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7391 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7395 /* Not a method, so call it a subroutine (if defined) */
7398 if (lastchar == '-' && penultchar != '-') {
7399 const STRLEN l = len ? len : strlen(PL_tokenbuf);
7400 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7401 "Ambiguous use of -%"UTF8f" resolved as -&%"UTF8f"()",
7402 UTF8fARG(UTF, l, PL_tokenbuf),
7403 UTF8fARG(UTF, l, PL_tokenbuf));
7405 /* Check for a constant sub */
7406 if ((sv = cv_const_sv_or_av(cv))) {
7409 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7410 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7411 if (SvTYPE(sv) == SVt_PVAV)
7412 pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7415 pl_yylval.opval->op_private = OPpCONST_FOLDED;
7416 pl_yylval.opval->op_folded = 1;
7417 pl_yylval.opval->op_flags |= OPf_SPECIAL;
7422 op_free(pl_yylval.opval);
7424 off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7425 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7426 PL_last_lop = PL_oldbufptr;
7427 PL_last_lop_op = OP_ENTERSUB;
7428 /* Is there a prototype? */
7435 STRLEN protolen = CvPROTOLEN(cv);
7436 const char *proto = CvPROTO(cv);
7438 proto = S_strip_spaces(aTHX_ proto, &protolen);
7441 if ((optional = *proto == ';'))
7444 while (*proto == ';');
7448 *proto == '$' || *proto == '_'
7449 || *proto == '*' || *proto == '+'
7454 *proto == '\\' && proto[1] && proto[2] == '\0'
7457 UNIPROTO(UNIOPSUB,optional);
7458 if (*proto == '\\' && proto[1] == '[') {
7459 const char *p = proto + 2;
7460 while(*p && *p != ']')
7462 if(*p == ']' && !p[1])
7463 UNIPROTO(UNIOPSUB,optional);
7465 if (*proto == '&' && *s == '{') {
7467 sv_setpvs(PL_subname, "__ANON__");
7469 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7470 if (!PL_lex_allbrackets &&
7471 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7472 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7479 PL_nextwhite = PL_thiswhite;
7482 start_force(PL_curforce);
7483 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7486 PL_nextwhite = nextPL_nextwhite;
7487 curmad('X', PL_thistoken);
7488 PL_thistoken = newSVpvs("");
7490 force_next(off ? PRIVATEREF : WORD);
7491 if (!PL_lex_allbrackets &&
7492 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7493 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7498 /* Guess harder when madskills require "best effort". */
7499 if (PL_madskills && (!gv || !GvCVu(gv))) {
7500 int probable_sub = 0;
7501 if (strchr("\"'`$@%0123456789!*+{[<", *s))
7503 else if (isALPHA(*s)) {
7507 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
7508 if (!keyword(tmpbuf, tmplen, 0))
7511 while (d < PL_bufend && isSPACE(*d))
7513 if (*d == '=' && d[1] == '>')
7518 gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
7520 op_free(pl_yylval.opval);
7522 off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7523 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7524 PL_last_lop = PL_oldbufptr;
7525 PL_last_lop_op = OP_ENTERSUB;
7526 PL_nextwhite = PL_thiswhite;
7528 start_force(PL_curforce);
7529 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7531 PL_nextwhite = nextPL_nextwhite;
7532 curmad('X', PL_thistoken);
7533 PL_thistoken = newSVpvs("");
7534 force_next(off ? PRIVATEREF : WORD);
7535 if (!PL_lex_allbrackets &&
7536 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7537 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7541 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7543 force_next(off ? PRIVATEREF : WORD);
7544 if (!PL_lex_allbrackets &&
7545 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7546 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7551 /* Call it a bare word */
7553 if (PL_hints & HINT_STRICT_SUBS)
7554 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7557 /* after "print" and similar functions (corresponding to
7558 * "F? L" in opcode.pl), whatever wasn't already parsed as
7559 * a filehandle should be subject to "strict subs".
7560 * Likewise for the optional indirect-object argument to system
7561 * or exec, which can't be a bareword */
7562 if ((PL_last_lop_op == OP_PRINT
7563 || PL_last_lop_op == OP_PRTF
7564 || PL_last_lop_op == OP_SAY
7565 || PL_last_lop_op == OP_SYSTEM
7566 || PL_last_lop_op == OP_EXEC)
7567 && (PL_hints & HINT_STRICT_SUBS))
7568 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7569 if (lastchar != '-') {
7570 if (ckWARN(WARN_RESERVED)) {
7574 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7575 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7583 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7584 && saw_infix_sigil) {
7585 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7586 "Operator or semicolon missing before %c%"UTF8f,
7588 UTF8fARG(UTF, strlen(PL_tokenbuf),
7590 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7591 "Ambiguous use of %c resolved as operator %c",
7592 lastchar, lastchar);
7599 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7604 (OP*)newSVOP(OP_CONST, 0,
7605 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7608 case KEY___PACKAGE__:
7610 (OP*)newSVOP(OP_CONST, 0,
7612 ? newSVhek(HvNAME_HEK(PL_curstash))
7619 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7620 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
7623 gv = (GV *)*hv_fetchs(stash, "DATA", 1);
7625 gv_init(gv,stash,"DATA",4,0);
7628 GvIOp(gv) = newIO();
7629 IoIFP(GvIOp(gv)) = PL_rsfp;
7630 #if defined(HAS_FCNTL) && defined(F_SETFD)
7632 const int fd = PerlIO_fileno(PL_rsfp);
7633 fcntl(fd,F_SETFD,fd >= 3);
7636 /* Mark this internal pseudo-handle as clean */
7637 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7638 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7639 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7641 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7642 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7643 /* if the script was opened in binmode, we need to revert
7644 * it to text mode for compatibility; but only iff it has CRs
7645 * XXX this is a questionable hack at best. */
7646 if (PL_bufend-PL_bufptr > 2
7647 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7650 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7651 loc = PerlIO_tell(PL_rsfp);
7652 (void)PerlIO_seek(PL_rsfp, 0L, 0);
7655 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7657 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7658 #endif /* NETWARE */
7660 PerlIO_seek(PL_rsfp, loc, 0);
7664 #ifdef PERLIO_LAYERS
7667 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7668 else if (PL_encoding) {
7675 XPUSHs(PL_encoding);
7677 call_method("name", G_SCALAR);
7681 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7682 Perl_form(aTHX_ ":encoding(%"SVf")",
7691 if (PL_realtokenstart >= 0) {
7692 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7694 PL_endwhite = newSVpvs("");
7695 sv_catsv(PL_endwhite, PL_thiswhite);
7697 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7698 PL_realtokenstart = -1;
7700 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7710 FUN0OP(newPVOP(OP_RUNCV,0,NULL));
7719 if (PL_expect == XSTATE) {
7726 if (*s == ':' && s[1] == ':') {
7730 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7731 if ((*s == ':' && s[1] == ':')
7732 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7736 Copy(PL_bufptr, PL_tokenbuf, olen, char);
7740 Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
7741 UTF8fARG(UTF, len, PL_tokenbuf));
7744 else if (tmp == KEY_require || tmp == KEY_do
7746 /* that's a way to remember we saw "CORE::" */
7759 LOP(OP_ACCEPT,XTERM);
7762 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7767 LOP(OP_ATAN2,XTERM);
7773 LOP(OP_BINMODE,XTERM);
7776 LOP(OP_BLESS,XTERM);
7785 /* We have to disambiguate the two senses of
7786 "continue". If the next token is a '{' then
7787 treat it as the start of a continue block;
7788 otherwise treat it as a control operator.
7798 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7808 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7817 if (!PL_cryptseen) {
7818 PL_cryptseen = TRUE;
7822 LOP(OP_CRYPT,XTERM);
7825 LOP(OP_CHMOD,XTERM);
7828 LOP(OP_CHOWN,XTERM);
7831 LOP(OP_CONNECT,XTERM);
7851 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7853 if (len && !keyword(PL_tokenbuf + 1, len, 0)) {
7856 force_ident_maybe_lex('&');
7861 if (orig_keyword == KEY_do) {
7870 PL_hints |= HINT_BLOCK_SCOPE;
7880 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7881 STR_WITH_LEN("NDBM_File::"),
7882 STR_WITH_LEN("DB_File::"),
7883 STR_WITH_LEN("GDBM_File::"),
7884 STR_WITH_LEN("SDBM_File::"),
7885 STR_WITH_LEN("ODBM_File::"),
7887 LOP(OP_DBMOPEN,XTERM);
7893 PL_expect = XOPERATOR;
7894 s = force_word(s,WORD,TRUE,FALSE);
7901 pl_yylval.ival = CopLINE(PL_curcop);
7905 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7919 if (*s == '{') { /* block eval */
7920 PL_expect = XTERMBLOCK;
7921 UNIBRACK(OP_ENTERTRY);
7923 else { /* string eval */
7925 UNIBRACK(OP_ENTEREVAL);
7930 UNIBRACK(-OP_ENTEREVAL);
7944 case KEY_endhostent:
7950 case KEY_endservent:
7953 case KEY_endprotoent:
7964 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7966 pl_yylval.ival = CopLINE(PL_curcop);
7968 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7971 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7974 if ((PL_bufend - p) >= 3 &&
7975 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7977 else if ((PL_bufend - p) >= 4 &&
7978 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7981 if (isIDFIRST_lazy_if(p,UTF)) {
7982 p = scan_ident(p, PL_bufend,
7983 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
7987 Perl_croak(aTHX_ "Missing $ on loop variable");
7989 s = SvPVX(PL_linestr) + soff;
7995 LOP(OP_FORMLINE,XTERM);
8004 LOP(OP_FCNTL,XTERM);
8010 LOP(OP_FLOCK,XTERM);
8013 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8018 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8023 LOP(OP_GREPSTART, XREF);
8026 PL_expect = XOPERATOR;
8027 s = force_word(s,WORD,TRUE,FALSE);
8042 case KEY_getpriority:
8043 LOP(OP_GETPRIORITY,XTERM);
8045 case KEY_getprotobyname:
8048 case KEY_getprotobynumber:
8049 LOP(OP_GPBYNUMBER,XTERM);
8051 case KEY_getprotoent:
8063 case KEY_getpeername:
8064 UNI(OP_GETPEERNAME);
8066 case KEY_gethostbyname:
8069 case KEY_gethostbyaddr:
8070 LOP(OP_GHBYADDR,XTERM);
8072 case KEY_gethostent:
8075 case KEY_getnetbyname:
8078 case KEY_getnetbyaddr:
8079 LOP(OP_GNBYADDR,XTERM);
8084 case KEY_getservbyname:
8085 LOP(OP_GSBYNAME,XTERM);
8087 case KEY_getservbyport:
8088 LOP(OP_GSBYPORT,XTERM);
8090 case KEY_getservent:
8093 case KEY_getsockname:
8094 UNI(OP_GETSOCKNAME);
8096 case KEY_getsockopt:
8097 LOP(OP_GSOCKOPT,XTERM);
8112 pl_yylval.ival = CopLINE(PL_curcop);
8113 Perl_ck_warner_d(aTHX_
8114 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8115 "given is experimental");
8120 orig_keyword==KEY_glob ? (orig_keyword=0, -OP_GLOB) : OP_GLOB,
8128 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8130 pl_yylval.ival = CopLINE(PL_curcop);
8134 LOP(OP_INDEX,XTERM);
8140 LOP(OP_IOCTL,XTERM);
8152 PL_expect = XOPERATOR;
8153 s = force_word(s,WORD,TRUE,FALSE);
8170 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8175 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8189 LOP(OP_LISTEN,XTERM);
8198 s = scan_pat(s,OP_MATCH);
8199 TERM(sublex_start());
8202 LOP(OP_MAPSTART, XREF);
8205 LOP(OP_MKDIR,XTERM);
8208 LOP(OP_MSGCTL,XTERM);
8211 LOP(OP_MSGGET,XTERM);
8214 LOP(OP_MSGRCV,XTERM);
8217 LOP(OP_MSGSND,XTERM);
8222 PL_in_my = (U16)tmp;
8224 if (isIDFIRST_lazy_if(s,UTF)) {
8228 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8229 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
8231 if (!FEATURE_LEXSUBS_IS_ENABLED)
8233 "Experimental \"%s\" subs not enabled",
8234 tmp == KEY_my ? "my" :
8235 tmp == KEY_state ? "state" : "our");
8236 Perl_ck_warner_d(aTHX_
8237 packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
8238 "The lexical_subs feature is experimental");
8241 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
8242 if (!PL_in_my_stash) {
8245 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
8246 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
8249 if (PL_madskills) { /* just add type to declarator token */
8250 sv_catsv(PL_thistoken, PL_nextwhite);
8252 sv_catpvn(PL_thistoken, start, s - start);
8260 PL_expect = XOPERATOR;
8261 s = force_word(s,WORD,TRUE,FALSE);
8265 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8270 s = tokenize_use(0, s);
8274 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
8277 if (!PL_lex_allbrackets &&
8278 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8279 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8285 if (isIDFIRST_lazy_if(s,UTF)) {
8287 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
8289 for (t=d; isSPACE(*t);)
8291 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8293 && !(t[0] == '=' && t[1] == '>')
8294 && !(t[0] == ':' && t[1] == ':')
8295 && !keyword(s, d-s, 0)
8297 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8298 "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
8299 UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8305 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8307 pl_yylval.ival = OP_OR;
8317 LOP(OP_OPEN_DIR,XTERM);
8320 checkcomma(s,PL_tokenbuf,"filehandle");
8324 checkcomma(s,PL_tokenbuf,"filehandle");
8343 s = force_word(s,WORD,FALSE,TRUE);
8345 s = force_strict_version(s);
8346 PL_lex_expect = XBLOCK;
8350 LOP(OP_PIPE_OP,XTERM);
8353 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8354 COPLINE_SET_FROM_MULTI_END;
8357 pl_yylval.ival = OP_CONST;
8358 TERM(sublex_start());
8365 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8366 COPLINE_SET_FROM_MULTI_END;
8369 PL_expect = XOPERATOR;
8370 if (SvCUR(PL_lex_stuff)) {
8371 int warned_comma = !ckWARN(WARN_QW);
8372 int warned_comment = warned_comma;
8373 d = SvPV_force(PL_lex_stuff, len);
8375 for (; isSPACE(*d) && len; --len, ++d)
8380 if (!warned_comma || !warned_comment) {
8381 for (; !isSPACE(*d) && len; --len, ++d) {
8382 if (!warned_comma && *d == ',') {
8383 Perl_warner(aTHX_ packWARN(WARN_QW),
8384 "Possible attempt to separate words with commas");
8387 else if (!warned_comment && *d == '#') {
8388 Perl_warner(aTHX_ packWARN(WARN_QW),
8389 "Possible attempt to put comments in qw() list");
8395 for (; !isSPACE(*d) && len; --len, ++d)
8398 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8399 words = op_append_elem(OP_LIST, words,
8400 newSVOP(OP_CONST, 0, tokeq(sv)));
8405 words = newNULLLIST();
8407 SvREFCNT_dec(PL_lex_stuff);
8408 PL_lex_stuff = NULL;
8410 PL_expect = XOPERATOR;
8411 pl_yylval.opval = sawparens(words);
8416 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8419 pl_yylval.ival = OP_STRINGIFY;
8420 if (SvIVX(PL_lex_stuff) == '\'')
8421 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
8422 TERM(sublex_start());
8425 s = scan_pat(s,OP_QR);
8426 TERM(sublex_start());
8429 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8432 readpipe_override();
8433 TERM(sublex_start());
8440 PL_expect = XOPERATOR;
8442 s = force_version(s, FALSE);
8444 else if (*s != 'v' || !isDIGIT(s[1])
8445 || (s = force_version(s, TRUE), *s == 'v'))
8447 *PL_tokenbuf = '\0';
8448 s = force_word(s,WORD,TRUE,TRUE);
8449 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
8450 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8451 GV_ADD | (UTF ? SVf_UTF8 : 0));
8453 yyerror("<> should be quotes");
8455 if (orig_keyword == KEY_require) {
8463 PL_last_uni = PL_oldbufptr;
8464 PL_last_lop_op = OP_REQUIRE;
8466 return REPORT( (int)REQUIRE );
8472 PL_expect = XOPERATOR;
8473 s = force_word(s,WORD,TRUE,FALSE);
8477 LOP(OP_RENAME,XTERM);
8486 LOP(OP_RINDEX,XTERM);
8495 UNIDOR(OP_READLINE);
8498 UNIDOR(OP_BACKTICK);
8507 LOP(OP_REVERSE,XTERM);
8510 UNIDOR(OP_READLINK);
8517 if (pl_yylval.opval)
8518 TERM(sublex_start());
8520 TOKEN(1); /* force error */
8523 checkcomma(s,PL_tokenbuf,"filehandle");
8533 LOP(OP_SELECT,XTERM);
8539 LOP(OP_SEMCTL,XTERM);
8542 LOP(OP_SEMGET,XTERM);
8545 LOP(OP_SEMOP,XTERM);
8551 LOP(OP_SETPGRP,XTERM);
8553 case KEY_setpriority:
8554 LOP(OP_SETPRIORITY,XTERM);
8556 case KEY_sethostent:
8562 case KEY_setservent:
8565 case KEY_setprotoent:
8575 LOP(OP_SEEKDIR,XTERM);
8577 case KEY_setsockopt:
8578 LOP(OP_SSOCKOPT,XTERM);
8584 LOP(OP_SHMCTL,XTERM);
8587 LOP(OP_SHMGET,XTERM);
8590 LOP(OP_SHMREAD,XTERM);
8593 LOP(OP_SHMWRITE,XTERM);
8596 LOP(OP_SHUTDOWN,XTERM);
8605 LOP(OP_SOCKET,XTERM);
8607 case KEY_socketpair:
8608 LOP(OP_SOCKPAIR,XTERM);
8611 checkcomma(s,PL_tokenbuf,"subroutine name");
8614 s = force_word(s,WORD,TRUE,TRUE);
8618 LOP(OP_SPLIT,XTERM);
8621 LOP(OP_SPRINTF,XTERM);
8624 LOP(OP_SPLICE,XTERM);
8639 LOP(OP_SUBSTR,XTERM);
8645 char * const tmpbuf = PL_tokenbuf + 1;
8646 expectation attrful;
8647 bool have_name, have_proto;
8648 const int key = tmp;
8650 SV *format_name = NULL;
8656 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
8657 SV *subtoken = PL_madskills
8658 ? newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr))
8663 s = SKIPSPACE2(s,tmpwhite);
8669 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
8670 (*s == ':' && s[1] == ':'))
8673 SV *nametoke = NULL;
8677 attrful = XATTRBLOCK;
8678 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8682 nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
8684 if (key == KEY_format)
8685 format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
8688 if (memchr(tmpbuf, ':', len) || key != KEY_sub
8690 PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
8692 sv_setpvn(PL_subname, tmpbuf, len);
8694 sv_setsv(PL_subname,PL_curstname);
8695 sv_catpvs(PL_subname,"::");
8696 sv_catpvn(PL_subname,tmpbuf,len);
8698 if (SvUTF8(PL_linestr))
8699 SvUTF8_on(PL_subname);
8705 CURMAD('X', nametoke);
8706 CURMAD('_', tmpwhite);
8707 force_ident_maybe_lex('&');
8709 s = SKIPSPACE2(d,tmpwhite);
8715 if (key == KEY_my || key == KEY_our || key==KEY_state)
8718 /* diag_listed_as: Missing name in "%s sub" */
8720 "Missing name in \"%s\"", PL_bufptr);
8722 PL_expect = XTERMBLOCK;
8723 attrful = XATTRTERM;
8724 sv_setpvs(PL_subname,"?");
8728 if (key == KEY_format) {
8730 PL_thistoken = subtoken;
8734 start_force(PL_curforce);
8735 NEXTVAL_NEXTTOKE.opval
8736 = (OP*)newSVOP(OP_CONST,0, format_name);
8737 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
8744 /* Look for a prototype */
8746 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8747 COPLINE_SET_FROM_MULTI_END;
8749 Perl_croak(aTHX_ "Prototype not terminated");
8750 (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
8755 CURMAD('q', PL_thisopen);
8756 CURMAD('_', tmpwhite);
8757 CURMAD('=', PL_thisstuff);
8758 CURMAD('Q', PL_thisclose);
8759 NEXTVAL_NEXTTOKE.opval =
8760 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8761 PL_lex_stuff = NULL;
8764 s = SKIPSPACE2(s,tmpwhite);
8772 if (*s == ':' && s[1] != ':')
8773 PL_expect = attrful;
8774 else if (*s != '{' && key == KEY_sub) {
8776 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8777 else if (*s != ';' && *s != '}')
8778 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8785 curmad('^', newSVpvs(""));
8786 CURMAD('_', tmpwhite);
8790 PL_thistoken = subtoken;
8791 PERL_UNUSED_VAR(have_proto);
8794 NEXTVAL_NEXTTOKE.opval =
8795 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8796 PL_lex_stuff = NULL;
8802 sv_setpvs(PL_subname, "__ANON__");
8804 sv_setpvs(PL_subname, "__ANON__::__ANON__");
8808 force_ident_maybe_lex('&');
8814 LOP(OP_SYSTEM,XREF);
8817 LOP(OP_SYMLINK,XTERM);
8820 LOP(OP_SYSCALL,XTERM);
8823 LOP(OP_SYSOPEN,XTERM);
8826 LOP(OP_SYSSEEK,XTERM);
8829 LOP(OP_SYSREAD,XTERM);
8832 LOP(OP_SYSWRITE,XTERM);
8837 TERM(sublex_start());
8858 LOP(OP_TRUNCATE,XTERM);
8870 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8872 pl_yylval.ival = CopLINE(PL_curcop);
8876 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8878 pl_yylval.ival = CopLINE(PL_curcop);
8882 LOP(OP_UNLINK,XTERM);
8888 LOP(OP_UNPACK,XTERM);
8891 LOP(OP_UTIME,XTERM);
8897 LOP(OP_UNSHIFT,XTERM);
8900 s = tokenize_use(1, s);
8910 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8912 pl_yylval.ival = CopLINE(PL_curcop);
8913 Perl_ck_warner_d(aTHX_
8914 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8915 "when is experimental");
8919 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8921 pl_yylval.ival = CopLINE(PL_curcop);
8925 PL_hints |= HINT_BLOCK_SCOPE;
8932 LOP(OP_WAITPID,XTERM);
8938 /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8939 * we use the same number on EBCDIC */
8940 gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8944 if (PL_expect == XOPERATOR) {
8945 if (*s == '=' && !PL_lex_allbrackets &&
8946 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8954 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8956 pl_yylval.ival = OP_XOR;
8962 #pragma segment Main
8968 Looks up an identifier in the pad or in a package
8971 PRIVATEREF if this is a lexical name.
8972 WORD if this belongs to a package.
8975 if we're in a my declaration
8976 croak if they tried to say my($foo::bar)
8977 build the ops for a my() declaration
8978 if it's an access to a my() variable
8979 build ops for access to a my() variable
8980 if in a dq string, and they've said @foo and we can't find @foo
8982 build ops for a bareword
8986 S_pending_ident(pTHX)
8990 const char pit = (char)pl_yylval.ival;
8991 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8992 /* All routes through this function want to know if there is a colon. */
8993 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8995 DEBUG_T({ PerlIO_printf(Perl_debug_log,
8996 "### Pending identifier '%s'\n", PL_tokenbuf); });
8998 /* if we're in a my(), we can't allow dynamics here.
8999 $foo'bar has already been turned into $foo::bar, so
9000 just check for colons.
9002 if it's a legal name, the OP is a PADANY.
9005 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9007 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
9008 "variable %s in \"our\"",
9009 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
9010 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
9014 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
9015 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
9016 UTF ? SVf_UTF8 : 0);
9018 pl_yylval.opval = newOP(OP_PADANY, 0);
9019 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
9020 UTF ? SVf_UTF8 : 0);
9026 build the ops for accesses to a my() variable.
9031 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9032 UTF ? SVf_UTF8 : 0);
9033 if (tmp != NOT_IN_PAD) {
9034 /* might be an "our" variable" */
9035 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9036 /* build ops for a bareword */
9037 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9038 HEK * const stashname = HvNAME_HEK(stash);
9039 SV * const sym = newSVhek(stashname);
9040 sv_catpvs(sym, "::");
9041 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9042 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
9043 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9047 ? (GV_ADDMULTI | GV_ADDINEVAL)
9050 ((PL_tokenbuf[0] == '$') ? SVt_PV
9051 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9056 pl_yylval.opval = newOP(OP_PADANY, 0);
9057 pl_yylval.opval->op_targ = tmp;
9063 Whine if they've said @foo in a doublequoted string,
9064 and @foo isn't a variable we can find in the symbol
9067 if (ckWARN(WARN_AMBIGUOUS) &&
9068 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
9069 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
9070 ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
9071 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9072 /* DO NOT warn for @- and @+ */
9073 && !( PL_tokenbuf[2] == '\0' &&
9074 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
9077 /* Downgraded from fatal to warning 20000522 mjd */
9078 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9079 "Possible unintended interpolation of %"UTF8f
9081 UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9085 /* build ops for a bareword */
9086 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
9087 newSVpvn_flags(PL_tokenbuf + 1,
9089 UTF ? SVf_UTF8 : 0 ));
9090 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9092 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
9093 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
9094 | ( UTF ? SVf_UTF8 : 0 ),
9095 ((PL_tokenbuf[0] == '$') ? SVt_PV
9096 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9102 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9106 PERL_ARGS_ASSERT_CHECKCOMMA;
9108 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
9109 if (ckWARN(WARN_SYNTAX)) {
9112 for (w = s+2; *w && level; w++) {
9120 /* the list of chars below is for end of statements or
9121 * block / parens, boolean operators (&&, ||, //) and branch
9122 * constructs (or, and, if, until, unless, while, err, for).
9123 * Not a very solid hack... */
9124 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9125 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9126 "%s (...) interpreted as function",name);
9129 while (s < PL_bufend && isSPACE(*s))
9133 while (s < PL_bufend && isSPACE(*s))
9135 if (isIDFIRST_lazy_if(s,UTF)) {
9136 const char * const w = s;
9137 s += UTF ? UTF8SKIP(s) : 1;
9138 while (isWORDCHAR_lazy_if(s,UTF))
9139 s += UTF ? UTF8SKIP(s) : 1;
9140 while (s < PL_bufend && isSPACE(*s))
9144 if (keyword(w, s - w, 0))
9147 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9148 if (gv && GvCVu(gv))
9150 Perl_croak(aTHX_ "No comma allowed after %s", what);
9155 /* S_new_constant(): do any overload::constant lookup.
9157 Either returns sv, or mortalizes/frees sv and returns a new SV*.
9158 Best used as sv=new_constant(..., sv, ...).
9159 If s, pv are NULL, calls subroutine with one argument,
9160 and <type> is used with error messages only.
9161 <type> is assumed to be well formed UTF-8 */
9164 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9165 SV *sv, SV *pv, const char *type, STRLEN typelen)
9168 HV * table = GvHV(PL_hintgv); /* ^H */
9173 const char *why1 = "", *why2 = "", *why3 = "";
9175 PERL_ARGS_ASSERT_NEW_CONSTANT;
9176 /* We assume that this is true: */
9177 if (*key == 'c') { assert (strEQ(key, "charnames")); }
9180 /* charnames doesn't work well if there have been errors found */
9181 if (PL_error_count > 0 && *key == 'c')
9183 SvREFCNT_dec_NN(sv);
9184 return &PL_sv_undef;
9187 sv_2mortal(sv); /* Parent created it permanently */
9189 || ! (PL_hints & HINT_LOCALIZE_HH)
9190 || ! (cvp = hv_fetch(table, key, keylen, FALSE))
9195 /* Here haven't found what we're looking for. If it is charnames,
9196 * perhaps it needs to be loaded. Try doing that before giving up */
9198 Perl_load_module(aTHX_
9200 newSVpvs("_charnames"),
9201 /* version parameter; no need to specify it, as if
9202 * we get too early a version, will fail anyway,
9203 * not being able to find '_charnames' */
9209 table = GvHV(PL_hintgv);
9211 && (PL_hints & HINT_LOCALIZE_HH)
9212 && (cvp = hv_fetch(table, key, keylen, FALSE))
9218 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9219 msg = Perl_form(aTHX_
9220 "Constant(%.*s) unknown",
9221 (int)(type ? typelen : len),
9227 why3 = "} is not defined";
9230 msg = Perl_form(aTHX_
9231 /* The +3 is for '\N{'; -4 for that, plus '}' */
9232 "Unknown charname '%.*s'", (int)typelen - 4, type + 3
9236 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
9237 (int)(type ? typelen : len),
9238 (type ? type: s), why1, why2, why3);
9241 yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9242 return SvREFCNT_inc_simple_NN(sv);
9247 pv = newSVpvn_flags(s, len, SVs_TEMP);
9249 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9251 typesv = &PL_sv_undef;
9253 PUSHSTACKi(PERLSI_OVERLOAD);
9265 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9269 /* Check the eval first */
9270 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9272 const char * errstr;
9273 sv_catpvs(errsv, "Propagated");
9274 errstr = SvPV_const(errsv, errlen);
9275 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9277 res = SvREFCNT_inc_simple_NN(sv);
9281 SvREFCNT_inc_simple_void_NN(res);
9290 why1 = "Call to &{$^H{";
9292 why3 = "}} did not return a defined value";
9294 (void)sv_2mortal(sv);
9301 PERL_STATIC_INLINE void
9302 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) {
9304 PERL_ARGS_ASSERT_PARSE_IDENT;
9308 Perl_croak(aTHX_ "%s", ident_too_long);
9309 if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
9310 /* The UTF-8 case must come first, otherwise things
9311 * like c\N{COMBINING TILDE} would start failing, as the
9312 * isWORDCHAR_A case below would gobble the 'c' up.
9315 char *t = *s + UTF8SKIP(*s);
9316 while (isIDCONT_utf8((U8*)t))
9318 if (*d + (t - *s) > e)
9319 Perl_croak(aTHX_ "%s", ident_too_long);
9320 Copy(*s, *d, t - *s, char);
9324 else if ( isWORDCHAR_A(**s) ) {
9327 } while (isWORDCHAR_A(**s) && *d < e);
9329 else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
9334 else if (allow_package && **s == ':' && (*s)[1] == ':'
9335 /* Disallow things like Foo::$bar. For the curious, this is
9336 * the code path that triggers the "Bad name after" warning
9337 * when looking for barewords.
9339 && (*s)[2] != '$') {
9349 /* Returns a NUL terminated string, with the length of the string written to
9353 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9357 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9358 bool is_utf8 = cBOOL(UTF);
9360 PERL_ARGS_ASSERT_SCAN_WORD;
9362 parse_ident(&s, &d, e, allow_package, is_utf8);
9369 S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck_uni)
9372 char *bracket = NULL;
9375 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9376 bool is_utf8 = cBOOL(UTF);
9378 PERL_ARGS_ASSERT_SCAN_IDENT;
9383 while (isDIGIT(*s)) {
9385 Perl_croak(aTHX_ "%s", ident_too_long);
9390 parse_ident(&s, &d, e, 1, is_utf8);
9395 /* Either a digit variable, or parse_ident() found an identifier
9396 (anything valid as a bareword), so job done and return. */
9397 if (PL_lex_state != LEX_NORMAL)
9398 PL_lex_state = LEX_INTERPENDMAYBE;
9401 if (*s == '$' && s[1] &&
9402 (isIDFIRST_lazy_if(s+1,is_utf8)
9403 || isDIGIT_A((U8)s[1])
9406 || strnEQ(s+1,"::",2)) )
9408 /* Dereferencing a value in a scalar variable.
9409 The alternatives are different syntaxes for a scalar variable.
9410 Using ' as a leading package separator isn't allowed. :: is. */
9413 /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */
9417 while (s < send && SPACE_OR_TAB(*s))
9421 /* Is the byte 'd' a legal single character identifier name? 'u' is true
9422 * iff Unicode semantics are to be used. The legal ones are any of:
9424 * b) ASCII punctuation
9425 * c) When not under Unicode rules, any upper Latin1 character
9426 * d) \c?, \c\, \c^, \c_, and \cA..\cZ, minus the ones that have traditionally
9427 * been matched by \s on ASCII platforms. That is: \c?, plus 1-32, minus
9429 #define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d)) \
9430 || isDIGIT_A((U8)(d)) \
9431 || (!(u) && !isASCII((U8)(d))) \
9432 || ((((U8)(d)) < 32) \
9433 && (((((U8)(d)) >= 14) \
9434 || (((U8)(d)) <= 8 && (d) != 0) \
9435 || (((U8)(d)) == 13)))) \
9436 || (((U8)(d)) == toCTRL('?')))
9438 && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8)))
9441 const STRLEN skip = UTF8SKIP(s);
9444 for ( i = 0; i < skip; i++ )
9452 /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
9453 if (*d == '^' && *s && isCONTROLVAR(*s)) {
9457 /* Warn about ambiguous code after unary operators if {...} notation isn't
9458 used. There's no difference in ambiguity; it's merely a heuristic
9459 about when not to warn. */
9460 else if (ck_uni && !bracket)
9463 /* If we were processing {...} notation then... */
9464 if (isIDFIRST_lazy_if(d,is_utf8)) {
9465 /* if it starts as a valid identifier, assume that it is one.
9466 (the later check for } being at the expected point will trap
9467 cases where this doesn't pan out.) */
9468 d += is_utf8 ? UTF8SKIP(d) : 1;
9469 parse_ident(&s, &d, e, 1, is_utf8);
9471 while (s < send && SPACE_OR_TAB(*s))
9473 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9474 /* ${foo[0]} and ${foo{bar}} notation. */
9475 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9476 const char * const brack =
9478 ((*s == '[') ? "[...]" : "{...}");
9479 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9480 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9481 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9482 funny, dest, brack, funny, dest, brack);
9485 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9486 PL_lex_allbrackets++;
9490 /* Handle extended ${^Foo} variables
9491 * 1999-02-27 mjd-perl-patch@plover.com */
9492 else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
9496 while (isWORDCHAR(*s) && d < e) {
9500 Perl_croak(aTHX_ "%s", ident_too_long);
9504 while (s < send && SPACE_OR_TAB(*s))
9507 /* Expect to find a closing } after consuming any trailing whitespace.
9511 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9512 PL_lex_state = LEX_INTERPEND;
9515 if (PL_lex_state == LEX_NORMAL) {
9516 if (ckWARN(WARN_AMBIGUOUS) &&
9517 (keyword(dest, d - dest, 0)
9518 || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0)))
9520 SV *tmp = newSVpvn_flags( dest, d - dest,
9521 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
9524 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9525 "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9526 funny, tmp, funny, tmp);
9531 /* Didn't find the closing } at the point we expected, so restore
9532 state such that the next thing to process is the opening { and */
9533 s = bracket; /* let the parser handle it */
9537 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9538 PL_lex_state = LEX_INTERPEND;
9543 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
9545 /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
9546 * the parse starting at 's', based on the subset that are valid in this
9547 * context input to this routine in 'valid_flags'. Advances s. Returns
9548 * TRUE if the input should be treated as a valid flag, so the next char
9549 * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
9550 * first call on the current regex. This routine will set it to any
9551 * charset modifier found. The caller shouldn't change it. This way,
9552 * another charset modifier encountered in the parse can be detected as an
9553 * error, as we have decided to allow only one */
9556 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9558 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9559 if (isWORDCHAR_lazy_if(*s, UTF)) {
9560 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9561 UTF ? SVf_UTF8 : 0);
9563 /* Pretend that it worked, so will continue processing before
9572 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
9573 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
9574 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
9575 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
9576 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
9577 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9578 case LOCALE_PAT_MOD:
9580 goto multiple_charsets;
9582 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9585 case UNICODE_PAT_MOD:
9587 goto multiple_charsets;
9589 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9592 case ASCII_RESTRICT_PAT_MOD:
9594 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9598 /* Error if previous modifier wasn't an 'a', but if it was, see
9599 * if, and accept, a second occurrence (only) */
9601 || get_regex_charset(*pmfl)
9602 != REGEX_ASCII_RESTRICTED_CHARSET)
9604 goto multiple_charsets;
9606 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9610 case DEPENDS_PAT_MOD:
9612 goto multiple_charsets;
9614 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9623 if (*charset != c) {
9624 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9626 else if (c == 'a') {
9627 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9630 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9633 /* Pretend that it worked, so will continue processing before dieing */
9639 S_scan_pat(pTHX_ char *start, I32 type)
9644 const char * const valid_flags =
9645 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9646 char charset = '\0'; /* character set modifier */
9651 PERL_ARGS_ASSERT_SCAN_PAT;
9653 s = scan_str(start,!!PL_madskills,FALSE, (PL_in_eval & EVAL_RE_REPARSING),
9654 TRUE /* look for escaped bracketed metas */ );
9657 const char * const delimiter = skipspace(start);
9661 ? "Search pattern not terminated or ternary operator parsed as search pattern"
9662 : "Search pattern not terminated" ));
9665 pm = (PMOP*)newPMOP(type, 0);
9666 if (PL_multi_open == '?') {
9667 /* This is the only point in the code that sets PMf_ONCE: */
9668 pm->op_pmflags |= PMf_ONCE;
9670 /* Hence it's safe to do this bit of PMOP book-keeping here, which
9671 allows us to restrict the list needed by reset to just the ??
9673 assert(type != OP_TRANS);
9675 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9678 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9681 elements = mg->mg_len / sizeof(PMOP**);
9682 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9683 ((PMOP**)mg->mg_ptr) [elements++] = pm;
9684 mg->mg_len = elements * sizeof(PMOP**);
9685 PmopSTASH_set(pm,PL_curstash);
9692 /* if qr/...(?{..}).../, then need to parse the pattern within a new
9693 * anon CV. False positives like qr/[(?{]/ are harmless */
9695 if (type == OP_QR) {
9697 char *e, *p = SvPV(PL_lex_stuff, len);
9699 for (; p < e; p++) {
9700 if (p[0] == '(' && p[1] == '?'
9701 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9703 pm->op_pmflags |= PMf_HAS_CV;
9707 pm->op_pmflags |= PMf_IS_QR;
9710 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
9712 if (PL_madskills && modstart != s) {
9713 SV* tmptoken = newSVpvn(modstart, s - modstart);
9714 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9717 /* issue a warning if /c is specified,but /g is not */
9718 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9720 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9721 "Use of /c modifier is meaningless without /g" );
9724 PL_lex_op = (OP*)pm;
9725 pl_yylval.ival = OP_MATCH;
9730 S_scan_subst(pTHX_ char *start)
9738 char charset = '\0'; /* character set modifier */
9743 PERL_ARGS_ASSERT_SCAN_SUBST;
9745 pl_yylval.ival = OP_NULL;
9747 s = scan_str(start,!!PL_madskills,FALSE,FALSE,
9748 TRUE /* look for escaped bracketed metas */ );
9751 Perl_croak(aTHX_ "Substitution pattern not terminated");
9753 if (s[-1] == PL_multi_open)
9757 CURMAD('q', PL_thisopen);
9758 CURMAD('_', PL_thiswhite);
9759 CURMAD('E', PL_thisstuff);
9760 CURMAD('Q', PL_thisclose);
9761 PL_realtokenstart = s - SvPVX(PL_linestr);
9765 first_start = PL_multi_start;
9766 first_line = CopLINE(PL_curcop);
9767 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
9770 SvREFCNT_dec(PL_lex_stuff);
9771 PL_lex_stuff = NULL;
9773 Perl_croak(aTHX_ "Substitution replacement not terminated");
9775 PL_multi_start = first_start; /* so whole substitution is taken together */
9777 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9781 CURMAD('z', PL_thisopen);
9782 CURMAD('R', PL_thisstuff);
9783 CURMAD('Z', PL_thisclose);
9789 if (*s == EXEC_PAT_MOD) {
9793 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9802 curmad('m', newSVpvn(modstart, s - modstart));
9803 append_madprops(PL_thismad, (OP*)pm, 0);
9807 if ((pm->op_pmflags & PMf_CONTINUE)) {
9808 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9812 SV * const repl = newSVpvs("");
9815 pm->op_pmflags |= PMf_EVAL;
9818 sv_catpvs(repl, "eval ");
9820 sv_catpvs(repl, "do ");
9822 sv_catpvs(repl, "{");
9823 sv_catsv(repl, PL_sublex_info.repl);
9824 sv_catpvs(repl, "}");
9826 SvREFCNT_dec(PL_sublex_info.repl);
9827 PL_sublex_info.repl = repl;
9829 if (CopLINE(PL_curcop) != first_line) {
9830 sv_upgrade(PL_sublex_info.repl, SVt_PVNV);
9831 ((XPVNV*)SvANY(PL_sublex_info.repl))->xnv_u.xpad_cop_seq.xlow =
9832 CopLINE(PL_curcop) - first_line;
9833 CopLINE_set(PL_curcop, first_line);
9836 PL_lex_op = (OP*)pm;
9837 pl_yylval.ival = OP_SUBST;
9842 S_scan_trans(pTHX_ char *start)
9850 bool nondestruct = 0;
9855 PERL_ARGS_ASSERT_SCAN_TRANS;
9857 pl_yylval.ival = OP_NULL;
9859 s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE);
9861 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9863 if (s[-1] == PL_multi_open)
9867 CURMAD('q', PL_thisopen);
9868 CURMAD('_', PL_thiswhite);
9869 CURMAD('E', PL_thisstuff);
9870 CURMAD('Q', PL_thisclose);
9871 PL_realtokenstart = s - SvPVX(PL_linestr);
9875 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
9878 SvREFCNT_dec(PL_lex_stuff);
9879 PL_lex_stuff = NULL;
9881 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9884 CURMAD('z', PL_thisopen);
9885 CURMAD('R', PL_thisstuff);
9886 CURMAD('Z', PL_thisclose);
9889 complement = del = squash = 0;
9896 complement = OPpTRANS_COMPLEMENT;
9899 del = OPpTRANS_DELETE;
9902 squash = OPpTRANS_SQUASH;
9914 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9915 o->op_private &= ~OPpTRANS_ALL;
9916 o->op_private |= del|squash|complement|
9917 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9918 (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF : 0);
9921 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9926 curmad('m', newSVpvn(modstart, s - modstart));
9927 append_madprops(PL_thismad, o, 0);
9936 Takes a pointer to the first < in <<FOO.
9937 Returns a pointer to the byte following <<FOO.
9939 This function scans a heredoc, which involves different methods
9940 depending on whether we are in a string eval, quoted construct, etc.
9941 This is because PL_linestr could containing a single line of input, or
9942 a whole string being evalled, or the contents of the current quote-
9945 The two basic methods are:
9946 - Steal lines from the input stream
9947 - Scan the heredoc in PL_linestr and remove it therefrom
9949 In a file scope or filtered eval, the first method is used; in a
9950 string eval, the second.
9952 In a quote-like operator, we have to choose between the two,
9953 depending on where we can find a newline. We peek into outer lex-
9954 ing scopes until we find one with a newline in it. If we reach the
9955 outermost lexing scope and it is a file, we use the stream method.
9956 Otherwise it is treated as an eval.
9960 S_scan_heredoc(pTHX_ char *s)
9963 I32 op_type = OP_SCALAR;
9970 const bool infile = PL_rsfp || PL_parser->filtered;
9971 const line_t origline = CopLINE(PL_curcop);
9972 LEXSHARED *shared = PL_parser->lex_shared;
9974 I32 stuffstart = s - SvPVX(PL_linestr);
9977 PL_realtokenstart = -1;
9980 PERL_ARGS_ASSERT_SCAN_HEREDOC;
9983 d = PL_tokenbuf + 1;
9984 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9985 *PL_tokenbuf = '\n';
9987 while (SPACE_OR_TAB(*peek))
9989 if (*peek == '`' || *peek == '\'' || *peek =='"') {
9992 s = delimcpy(d, e, s, PL_bufend, term, &len);
9994 Perl_croak(aTHX_ "Unterminated delimiter for here document");
10000 /* <<\FOO is equivalent to <<'FOO' */
10004 if (!isWORDCHAR_lazy_if(s,UTF))
10005 deprecate("bare << to mean <<\"\"");
10006 for (; isWORDCHAR_lazy_if(s,UTF); s++) {
10011 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
10012 Perl_croak(aTHX_ "Delimiter for here document is too long");
10015 len = d - PL_tokenbuf;
10018 if (PL_madskills) {
10019 tstart = PL_tokenbuf + 1;
10020 PL_thisclose = newSVpvn(tstart, len - 1);
10021 tstart = SvPVX(PL_linestr) + stuffstart;
10022 PL_thisopen = newSVpvn(tstart, s - tstart);
10023 stuffstart = s - SvPVX(PL_linestr);
10026 #ifndef PERL_STRICT_CR
10027 d = strchr(s, '\r');
10029 char * const olds = s;
10031 while (s < PL_bufend) {
10037 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
10046 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10051 if (PL_madskills) {
10052 tstart = SvPVX(PL_linestr) + stuffstart;
10054 sv_catpvn(PL_thisstuff, tstart, s - tstart);
10056 PL_thisstuff = newSVpvn(tstart, s - tstart);
10059 stuffstart = s - SvPVX(PL_linestr);
10062 tmpstr = newSV_type(SVt_PVIV);
10063 SvGROW(tmpstr, 80);
10064 if (term == '\'') {
10065 op_type = OP_CONST;
10066 SvIV_set(tmpstr, -1);
10068 else if (term == '`') {
10069 op_type = OP_BACKTICK;
10070 SvIV_set(tmpstr, '\\');
10073 PL_multi_start = origline + 1 + PL_parser->herelines;
10074 PL_multi_open = PL_multi_close = '<';
10075 /* inside a string eval or quote-like operator */
10076 if (!infile || PL_lex_inwhat) {
10079 char * const olds = s;
10080 PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
10081 /* These two fields are not set until an inner lexing scope is
10082 entered. But we need them set here. */
10083 shared->ls_bufptr = s;
10084 shared->ls_linestr = PL_linestr;
10086 /* Look for a newline. If the current buffer does not have one,
10087 peek into the line buffer of the parent lexing scope, going
10088 up as many levels as necessary to find one with a newline
10091 while (!(s = (char *)memchr(
10092 (void *)shared->ls_bufptr, '\n',
10093 SvEND(shared->ls_linestr)-shared->ls_bufptr
10095 shared = shared->ls_prev;
10096 /* shared is only null if we have gone beyond the outermost
10097 lexing scope. In a file, we will have broken out of the
10098 loop in the previous iteration. In an eval, the string buf-
10099 fer ends with "\n;", so the while condition above will have
10100 evaluated to false. So shared can never be null. */
10102 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10103 most lexing scope. In a file, shared->ls_linestr at that
10104 level is just one line, so there is no body to steal. */
10105 if (infile && !shared->ls_prev) {
10111 s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10114 linestr = shared->ls_linestr;
10115 bufend = SvEND(linestr);
10117 while (s < bufend - len + 1 &&
10118 memNE(s,PL_tokenbuf,len) ) {
10120 ++PL_parser->herelines;
10122 if (s >= bufend - len + 1) {
10125 sv_setpvn(tmpstr,d+1,s-d);
10127 if (PL_madskills) {
10129 sv_catpvn(PL_thisstuff, d + 1, s - d);
10131 PL_thisstuff = newSVpvn(d + 1, s - d);
10132 stuffstart = s - SvPVX(PL_linestr);
10136 /* the preceding stmt passes a newline */
10137 PL_parser->herelines++;
10139 /* s now points to the newline after the heredoc terminator.
10140 d points to the newline before the body of the heredoc.
10143 /* We are going to modify linestr in place here, so set
10144 aside copies of the string if necessary for re-evals or
10146 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10147 check shared->re_eval_str. */
10148 if (shared->re_eval_start || shared->re_eval_str) {
10149 /* Set aside the rest of the regexp */
10150 if (!shared->re_eval_str)
10151 shared->re_eval_str =
10152 newSVpvn(shared->re_eval_start,
10153 bufend - shared->re_eval_start);
10154 shared->re_eval_start -= s-d;
10156 if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
10157 CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
10158 cx->blk_eval.cur_text == linestr)
10160 cx->blk_eval.cur_text = newSVsv(linestr);
10161 SvSCREAM_on(cx->blk_eval.cur_text);
10163 /* Copy everything from s onwards back to d. */
10164 Move(s,d,bufend-s + 1,char);
10165 SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10166 /* Setting PL_bufend only applies when we have not dug deeper
10167 into other scopes, because sublex_done sets PL_bufend to
10168 SvEND(PL_linestr). */
10169 if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
10176 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
10177 term = PL_tokenbuf[1];
10179 linestr_save = PL_linestr; /* must restore this afterwards */
10180 d = s; /* and this */
10181 PL_linestr = newSVpvs("");
10182 PL_bufend = SvPVX(PL_linestr);
10185 if (PL_madskills) {
10186 tstart = SvPVX(PL_linestr) + stuffstart;
10188 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10190 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10193 PL_bufptr = PL_bufend;
10194 CopLINE_set(PL_curcop,
10195 origline + 1 + PL_parser->herelines);
10196 if (!lex_next_chunk(LEX_NO_TERM)
10197 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
10198 SvREFCNT_dec(linestr_save);
10201 CopLINE_set(PL_curcop, origline);
10202 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10203 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10204 /* ^That should be enough to avoid this needing to grow: */
10205 sv_catpvs(PL_linestr, "\n\0");
10206 assert(s == SvPVX(PL_linestr));
10207 PL_bufend = SvEND(PL_linestr);
10211 stuffstart = s - SvPVX(PL_linestr);
10213 PL_parser->herelines++;
10214 PL_last_lop = PL_last_uni = NULL;
10215 #ifndef PERL_STRICT_CR
10216 if (PL_bufend - PL_linestart >= 2) {
10217 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
10218 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10220 PL_bufend[-2] = '\n';
10222 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10224 else if (PL_bufend[-1] == '\r')
10225 PL_bufend[-1] = '\n';
10227 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10228 PL_bufend[-1] = '\n';
10230 if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
10231 SvREFCNT_dec(PL_linestr);
10232 PL_linestr = linestr_save;
10233 PL_linestart = SvPVX(linestr_save);
10234 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10239 sv_catsv(tmpstr,PL_linestr);
10243 PL_multi_end = origline + PL_parser->herelines;
10244 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10245 SvPV_shrink_to_cur(tmpstr);
10248 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10250 else if (PL_encoding)
10251 sv_recode_to_utf8(tmpstr, PL_encoding);
10253 PL_lex_stuff = tmpstr;
10254 pl_yylval.ival = op_type;
10258 SvREFCNT_dec(tmpstr);
10259 CopLINE_set(PL_curcop, origline);
10260 missingterm(PL_tokenbuf + 1);
10263 /* scan_inputsymbol
10264 takes: current position in input buffer
10265 returns: new position in input buffer
10266 side-effects: pl_yylval and lex_op are set.
10271 <FH> read from filehandle
10272 <pkg::FH> read from package qualified filehandle
10273 <pkg'FH> read from package qualified filehandle
10274 <$fh> read from filehandle in $fh
10275 <*.h> filename glob
10280 S_scan_inputsymbol(pTHX_ char *start)
10283 char *s = start; /* current position in buffer */
10286 char *d = PL_tokenbuf; /* start of temp holding space */
10287 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
10289 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10291 end = strchr(s, '\n');
10294 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
10296 /* die if we didn't have space for the contents of the <>,
10297 or if it didn't end, or if we see a newline
10300 if (len >= (I32)sizeof PL_tokenbuf)
10301 Perl_croak(aTHX_ "Excessively long <> operator");
10303 Perl_croak(aTHX_ "Unterminated <> operator");
10308 Remember, only scalar variables are interpreted as filehandles by
10309 this code. Anything more complex (e.g., <$fh{$num}>) will be
10310 treated as a glob() call.
10311 This code makes use of the fact that except for the $ at the front,
10312 a scalar variable and a filehandle look the same.
10314 if (*d == '$' && d[1]) d++;
10316 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10317 while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
10318 d += UTF ? UTF8SKIP(d) : 1;
10320 /* If we've tried to read what we allow filehandles to look like, and
10321 there's still text left, then it must be a glob() and not a getline.
10322 Use scan_str to pull out the stuff between the <> and treat it
10323 as nothing more than a string.
10326 if (d - PL_tokenbuf != len) {
10327 pl_yylval.ival = OP_GLOB;
10328 s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE);
10330 Perl_croak(aTHX_ "Glob not terminated");
10334 bool readline_overriden = FALSE;
10337 /* we're in a filehandle read situation */
10340 /* turn <> into <ARGV> */
10342 Copy("ARGV",d,5,char);
10344 /* Check whether readline() is overriden */
10345 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
10347 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
10349 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
10350 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
10351 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
10352 readline_overriden = TRUE;
10354 /* if <$fh>, create the ops to turn the variable into a
10358 /* try to find it in the pad for this block, otherwise find
10359 add symbol table ops
10361 const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
10362 if (tmp != NOT_IN_PAD) {
10363 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10364 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10365 HEK * const stashname = HvNAME_HEK(stash);
10366 SV * const sym = sv_2mortal(newSVhek(stashname));
10367 sv_catpvs(sym, "::");
10368 sv_catpv(sym, d+1);
10373 OP * const o = newOP(OP_PADSV, 0);
10375 PL_lex_op = readline_overriden
10376 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10377 op_append_elem(OP_LIST, o,
10378 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10379 : (OP*)newUNOP(OP_READLINE, 0, o);
10388 ? (GV_ADDMULTI | GV_ADDINEVAL)
10389 : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
10391 PL_lex_op = readline_overriden
10392 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10393 op_append_elem(OP_LIST,
10394 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10395 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10396 : (OP*)newUNOP(OP_READLINE, 0,
10397 newUNOP(OP_RV2SV, 0,
10398 newGVOP(OP_GV, 0, gv)));
10400 if (!readline_overriden)
10401 PL_lex_op->op_flags |= OPf_SPECIAL;
10402 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10403 pl_yylval.ival = OP_NULL;
10406 /* If it's none of the above, it must be a literal filehandle
10407 (<Foo::BAR> or <FOO>) so build a simple readline OP */
10409 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
10410 PL_lex_op = readline_overriden
10411 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10412 op_append_elem(OP_LIST,
10413 newGVOP(OP_GV, 0, gv),
10414 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10415 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
10416 pl_yylval.ival = OP_NULL;
10426 start position in buffer
10427 keep_quoted preserve \ on the embedded delimiter(s)
10428 keep_delims preserve the delimiters around the string
10429 re_reparse compiling a run-time /(?{})/:
10430 collapse // to /, and skip encoding src
10431 deprecate_escaped_meta issue a deprecation warning for cer-
10432 tain paired metacharacters that appear
10434 returns: position to continue reading from buffer
10435 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10436 updates the read buffer.
10438 This subroutine pulls a string out of the input. It is called for:
10439 q single quotes q(literal text)
10440 ' single quotes 'literal text'
10441 qq double quotes qq(interpolate $here please)
10442 " double quotes "interpolate $here please"
10443 qx backticks qx(/bin/ls -l)
10444 ` backticks `/bin/ls -l`
10445 qw quote words @EXPORT_OK = qw( func() $spam )
10446 m// regexp match m/this/
10447 s/// regexp substitute s/this/that/
10448 tr/// string transliterate tr/this/that/
10449 y/// string transliterate y/this/that/
10450 ($*@) sub prototypes sub foo ($)
10451 (stuff) sub attr parameters sub foo : attr(stuff)
10452 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
10454 In most of these cases (all but <>, patterns and transliterate)
10455 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
10456 calls scan_str(). s/// makes yylex() call scan_subst() which calls
10457 scan_str(). tr/// and y/// make yylex() call scan_trans() which
10460 It skips whitespace before the string starts, and treats the first
10461 character as the delimiter. If the delimiter is one of ([{< then
10462 the corresponding "close" character )]}> is used as the closing
10463 delimiter. It allows quoting of delimiters, and if the string has
10464 balanced delimiters ([{<>}]) it allows nesting.
10466 On success, the SV with the resulting string is put into lex_stuff or,
10467 if that is already non-NULL, into lex_repl. The second case occurs only
10468 when parsing the RHS of the special constructs s/// and tr/// (y///).
10469 For convenience, the terminating delimiter character is stuffed into
10474 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
10475 bool deprecate_escaped_meta
10479 SV *sv; /* scalar value: string */
10480 const char *tmps; /* temp string, used for delimiter matching */
10481 char *s = start; /* current position in the buffer */
10482 char term; /* terminating character */
10483 char *to; /* current position in the sv's data */
10484 I32 brackets = 1; /* bracket nesting level */
10485 bool has_utf8 = FALSE; /* is there any utf8 content? */
10486 I32 termcode; /* terminating char. code */
10487 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
10488 STRLEN termlen; /* length of terminating string */
10489 int last_off = 0; /* last position for nesting bracket */
10490 char *escaped_open = NULL;
10497 PERL_ARGS_ASSERT_SCAN_STR;
10499 /* skip space before the delimiter */
10505 if (PL_realtokenstart >= 0) {
10506 stuffstart = PL_realtokenstart;
10507 PL_realtokenstart = -1;
10510 stuffstart = start - SvPVX(PL_linestr);
10512 /* mark where we are, in case we need to report errors */
10515 /* after skipping whitespace, the next character is the terminator */
10518 termcode = termstr[0] = term;
10522 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10523 Copy(s, termstr, termlen, U8);
10524 if (!UTF8_IS_INVARIANT(term))
10528 /* mark where we are */
10529 PL_multi_start = CopLINE(PL_curcop);
10530 PL_multi_open = term;
10531 herelines = PL_parser->herelines;
10533 /* find corresponding closing delimiter */
10534 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10535 termcode = termstr[0] = term = tmps[5];
10537 PL_multi_close = term;
10539 /* A warning is raised if the input parameter requires it for escaped (by a
10540 * backslash) paired metacharacters {} [] and () when the delimiters are
10541 * those same characters, and the backslash is ineffective. This doesn't
10542 * happen for <>, as they aren't metas. */
10543 if (deprecate_escaped_meta
10544 && (PL_multi_open == PL_multi_close
10545 || PL_multi_open == '<'
10546 || ! ckWARN_d(WARN_DEPRECATED)))
10548 deprecate_escaped_meta = FALSE;
10551 /* create a new SV to hold the contents. 79 is the SV's initial length.
10552 What a random number. */
10553 sv = newSV_type(SVt_PVIV);
10555 SvIV_set(sv, termcode);
10556 (void)SvPOK_only(sv); /* validate pointer */
10558 /* move past delimiter and try to read a complete string */
10560 sv_catpvn(sv, s, termlen);
10563 tstart = SvPVX(PL_linestr) + stuffstart;
10564 if (PL_madskills && !PL_thisopen && !keep_delims) {
10565 PL_thisopen = newSVpvn(tstart, s - tstart);
10566 stuffstart = s - SvPVX(PL_linestr);
10570 if (PL_encoding && !UTF && !re_reparse) {
10574 int offset = s - SvPVX_const(PL_linestr);
10575 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
10576 &offset, (char*)termstr, termlen);
10580 if (SvIsCOW(PL_linestr)) {
10581 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos;
10582 STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos;
10583 STRLEN last_lop_pos, re_eval_start_pos, s_pos;
10584 char *buf = SvPVX(PL_linestr);
10585 bufend_pos = PL_parser->bufend - buf;
10586 bufptr_pos = PL_parser->bufptr - buf;
10587 oldbufptr_pos = PL_parser->oldbufptr - buf;
10588 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
10589 linestart_pos = PL_parser->linestart - buf;
10590 last_uni_pos = PL_parser->last_uni
10591 ? PL_parser->last_uni - buf
10593 last_lop_pos = PL_parser->last_lop
10594 ? PL_parser->last_lop - buf
10596 re_eval_start_pos =
10597 PL_parser->lex_shared->re_eval_start ?
10598 PL_parser->lex_shared->re_eval_start - buf : 0;
10601 sv_force_normal(PL_linestr);
10603 buf = SvPVX(PL_linestr);
10604 PL_parser->bufend = buf + bufend_pos;
10605 PL_parser->bufptr = buf + bufptr_pos;
10606 PL_parser->oldbufptr = buf + oldbufptr_pos;
10607 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
10608 PL_parser->linestart = buf + linestart_pos;
10609 if (PL_parser->last_uni)
10610 PL_parser->last_uni = buf + last_uni_pos;
10611 if (PL_parser->last_lop)
10612 PL_parser->last_lop = buf + last_lop_pos;
10613 if (PL_parser->lex_shared->re_eval_start)
10614 PL_parser->lex_shared->re_eval_start =
10615 buf + re_eval_start_pos;
10618 ns = SvPVX_const(PL_linestr) + offset;
10619 svlast = SvEND(sv) - 1;
10621 for (; s < ns; s++) {
10622 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10623 COPLINE_INC_WITH_HERELINES;
10626 goto read_more_line;
10628 /* handle quoted delimiters */
10629 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10631 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10633 if ((svlast-1 - t) % 2) {
10634 if (!keep_quoted) {
10635 *(svlast-1) = term;
10637 SvCUR_set(sv, SvCUR(sv) - 1);
10642 if (PL_multi_open == PL_multi_close) {
10648 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
10649 /* At here, all closes are "was quoted" one,
10650 so we don't check PL_multi_close. */
10652 if (!keep_quoted && *(t+1) == PL_multi_open)
10657 else if (*t == PL_multi_open)
10665 SvCUR_set(sv, w - SvPVX_const(sv));
10667 last_off = w - SvPVX(sv);
10668 if (--brackets <= 0)
10673 if (!keep_delims) {
10674 SvCUR_set(sv, SvCUR(sv) - 1);
10680 /* extend sv if need be */
10681 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10682 /* set 'to' to the next character in the sv's string */
10683 to = SvPVX(sv)+SvCUR(sv);
10685 /* if open delimiter is the close delimiter read unbridle */
10686 if (PL_multi_open == PL_multi_close) {
10687 for (; s < PL_bufend; s++,to++) {
10688 /* embedded newlines increment the current line number */
10689 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10690 COPLINE_INC_WITH_HERELINES;
10691 /* handle quoted delimiters */
10692 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10695 || (re_reparse && s[1] == '\\'))
10698 /* any other quotes are simply copied straight through */
10702 /* terminate when run out of buffer (the for() condition), or
10703 have found the terminator */
10704 else if (*s == term) {
10707 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10710 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10716 /* if the terminator isn't the same as the start character (e.g.,
10717 matched brackets), we have to allow more in the quoting, and
10718 be prepared for nested brackets.
10721 /* read until we run out of string, or we find the terminator */
10722 for (; s < PL_bufend; s++,to++) {
10723 /* embedded newlines increment the line count */
10724 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10725 COPLINE_INC_WITH_HERELINES;
10726 /* backslashes can escape the open or closing characters */
10727 if (*s == '\\' && s+1 < PL_bufend) {
10728 if (!keep_quoted &&
10729 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10733 /* Here, 'deprecate_escaped_meta' is true iff the
10734 * delimiters are paired metacharacters, and 's' points
10735 * to an occurrence of one of them within the string,
10736 * which was preceded by a backslash. If this is a
10737 * context where the delimiter is also a metacharacter,
10738 * the backslash is useless, and deprecated. () and []
10739 * are meta in any context. {} are meta only when
10740 * appearing in a quantifier or in things like '\p{'
10741 * (but '\\p{' isn't meta). They also aren't meta
10742 * unless there is a matching closed, escaped char
10743 * later on within the string. If 's' points to an
10744 * open, set a flag; if to a close, test that flag, and
10745 * raise a warning if it was set */
10747 if (deprecate_escaped_meta) {
10748 if (*s == PL_multi_open) {
10752 /* Look for a closing '\}' */
10753 else if (regcurly(s, TRUE)) {
10756 /* Look for e.g. '\x{' */
10757 else if (s - start > 2
10758 && _generic_isCC(*(s-2),
10759 _CC_BACKSLASH_FOO_LBRACE_IS_META))
10760 { /* Exclude '\\x', '\\\\x', etc. */
10761 char *lookbehind = s - 4;
10762 bool is_meta = TRUE;
10763 while (lookbehind >= start
10764 && *lookbehind == '\\')
10766 is_meta = ! is_meta;
10774 else if (escaped_open) {
10775 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
10776 "Useless use of '\\'; doesn't escape metacharacter '%c'", PL_multi_open);
10777 escaped_open = NULL;
10784 /* allow nested opens and closes */
10785 else if (*s == PL_multi_close && --brackets <= 0)
10787 else if (*s == PL_multi_open)
10789 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10794 /* terminate the copied string and update the sv's end-of-string */
10796 SvCUR_set(sv, to - SvPVX_const(sv));
10799 * this next chunk reads more into the buffer if we're not done yet
10803 break; /* handle case where we are done yet :-) */
10805 #ifndef PERL_STRICT_CR
10806 if (to - SvPVX_const(sv) >= 2) {
10807 if ((to[-2] == '\r' && to[-1] == '\n') ||
10808 (to[-2] == '\n' && to[-1] == '\r'))
10812 SvCUR_set(sv, to - SvPVX_const(sv));
10814 else if (to[-1] == '\r')
10817 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10822 /* if we're out of file, or a read fails, bail and reset the current
10823 line marker so we can report where the unterminated string began
10826 if (PL_madskills) {
10827 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10829 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10831 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10834 COPLINE_INC_WITH_HERELINES;
10835 PL_bufptr = PL_bufend;
10836 if (!lex_next_chunk(0)) {
10838 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10847 /* at this point, we have successfully read the delimited string */
10849 if (!PL_encoding || UTF || re_reparse) {
10851 if (PL_madskills) {
10852 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10853 const int len = s - tstart;
10855 sv_catpvn(PL_thisstuff, tstart, len);
10857 PL_thisstuff = newSVpvn(tstart, len);
10858 if (!PL_thisclose && !keep_delims)
10859 PL_thisclose = newSVpvn(s,termlen);
10864 sv_catpvn(sv, s, termlen);
10869 if (PL_madskills) {
10870 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10871 const int len = s - tstart - termlen;
10873 sv_catpvn(PL_thisstuff, tstart, len);
10875 PL_thisstuff = newSVpvn(tstart, len);
10876 if (!PL_thisclose && !keep_delims)
10877 PL_thisclose = newSVpvn(s - termlen,termlen);
10881 if (has_utf8 || (PL_encoding && !re_reparse))
10884 PL_multi_end = CopLINE(PL_curcop);
10885 CopLINE_set(PL_curcop, PL_multi_start);
10886 PL_parser->herelines = herelines;
10888 /* if we allocated too much space, give some back */
10889 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10890 SvLEN_set(sv, SvCUR(sv) + 1);
10891 SvPV_renew(sv, SvLEN(sv));
10894 /* decide whether this is the first or second quoted string we've read
10899 PL_sublex_info.repl = sv;
10907 takes: pointer to position in buffer
10908 returns: pointer to new position in buffer
10909 side-effects: builds ops for the constant in pl_yylval.op
10911 Read a number in any of the formats that Perl accepts:
10913 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10914 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
10917 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10919 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10922 If it reads a number without a decimal point or an exponent, it will
10923 try converting the number to an integer and see if it can do so
10924 without loss of precision.
10928 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10931 const char *s = start; /* current position in buffer */
10932 char *d; /* destination in temp buffer */
10933 char *e; /* end of temp buffer */
10934 NV nv; /* number read, as a double */
10935 SV *sv = NULL; /* place to put the converted number */
10936 bool floatit; /* boolean: int or float? */
10937 const char *lastub = NULL; /* position of last underbar */
10938 static const char* const number_too_long = "Number too long";
10940 PERL_ARGS_ASSERT_SCAN_NUM;
10942 /* We use the first character to decide what type of number this is */
10946 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
10948 /* if it starts with a 0, it could be an octal number, a decimal in
10949 0.13 disguise, or a hexadecimal number, or a binary number. */
10953 u holds the "number so far"
10954 shift the power of 2 of the base
10955 (hex == 4, octal == 3, binary == 1)
10956 overflowed was the number more than we can hold?
10958 Shift is used when we add a digit. It also serves as an "are
10959 we in octal/hex/binary?" indicator to disallow hex characters
10960 when in octal mode.
10965 bool overflowed = FALSE;
10966 bool just_zero = TRUE; /* just plain 0 or binary number? */
10967 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10968 static const char* const bases[5] =
10969 { "", "binary", "", "octal", "hexadecimal" };
10970 static const char* const Bases[5] =
10971 { "", "Binary", "", "Octal", "Hexadecimal" };
10972 static const char* const maxima[5] =
10974 "0b11111111111111111111111111111111",
10978 const char *base, *Base, *max;
10980 /* check for hex */
10981 if (s[1] == 'x' || s[1] == 'X') {
10985 } else if (s[1] == 'b' || s[1] == 'B') {
10990 /* check for a decimal in disguise */
10991 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10993 /* so it must be octal */
11000 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11001 "Misplaced _ in number");
11005 base = bases[shift];
11006 Base = Bases[shift];
11007 max = maxima[shift];
11009 /* read the rest of the number */
11011 /* x is used in the overflow test,
11012 b is the digit we're adding on. */
11017 /* if we don't mention it, we're done */
11021 /* _ are ignored -- but warned about if consecutive */
11023 if (lastub && s == lastub + 1)
11024 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11025 "Misplaced _ in number");
11029 /* 8 and 9 are not octal */
11030 case '8': case '9':
11032 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11036 case '2': case '3': case '4':
11037 case '5': case '6': case '7':
11039 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11042 case '0': case '1':
11043 b = *s++ & 15; /* ASCII digit -> value of digit */
11047 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11048 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11049 /* make sure they said 0x */
11052 b = (*s++ & 7) + 9;
11054 /* Prepare to put the digit we have onto the end
11055 of the number so far. We check for overflows.
11061 x = u << shift; /* make room for the digit */
11063 if ((x >> shift) != u
11064 && !(PL_hints & HINT_NEW_BINARY)) {
11067 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11068 "Integer overflow in %s number",
11071 u = x | b; /* add the digit to the end */
11074 n *= nvshift[shift];
11075 /* If an NV has not enough bits in its
11076 * mantissa to represent an UV this summing of
11077 * small low-order numbers is a waste of time
11078 * (because the NV cannot preserve the
11079 * low-order bits anyway): we could just
11080 * remember when did we overflow and in the
11081 * end just multiply n by the right
11089 /* if we get here, we had success: make a scalar value from
11094 /* final misplaced underbar check */
11095 if (s[-1] == '_') {
11096 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11100 if (n > 4294967295.0)
11101 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11102 "%s number > %s non-portable",
11108 if (u > 0xffffffff)
11109 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11110 "%s number > %s non-portable",
11115 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
11116 sv = new_constant(start, s - start, "integer",
11117 sv, NULL, NULL, 0);
11118 else if (PL_hints & HINT_NEW_BINARY)
11119 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
11124 handle decimal numbers.
11125 we're also sent here when we read a 0 as the first digit
11127 case '1': case '2': case '3': case '4': case '5':
11128 case '6': case '7': case '8': case '9': case '.':
11131 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
11134 /* read next group of digits and _ and copy into d */
11135 while (isDIGIT(*s) || *s == '_') {
11136 /* skip underscores, checking for misplaced ones
11140 if (lastub && s == lastub + 1)
11141 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11142 "Misplaced _ in number");
11146 /* check for end of fixed-length buffer */
11148 Perl_croak(aTHX_ "%s", number_too_long);
11149 /* if we're ok, copy the character */
11154 /* final misplaced underbar check */
11155 if (lastub && s == lastub + 1) {
11156 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11159 /* read a decimal portion if there is one. avoid
11160 3..5 being interpreted as the number 3. followed
11163 if (*s == '.' && s[1] != '.') {
11168 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11169 "Misplaced _ in number");
11173 /* copy, ignoring underbars, until we run out of digits.
11175 for (; isDIGIT(*s) || *s == '_'; s++) {
11176 /* fixed length buffer check */
11178 Perl_croak(aTHX_ "%s", number_too_long);
11180 if (lastub && s == lastub + 1)
11181 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11182 "Misplaced _ in number");
11188 /* fractional part ending in underbar? */
11189 if (s[-1] == '_') {
11190 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11191 "Misplaced _ in number");
11193 if (*s == '.' && isDIGIT(s[1])) {
11194 /* oops, it's really a v-string, but without the "v" */
11200 /* read exponent part, if present */
11201 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
11205 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
11206 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
11208 /* stray preinitial _ */
11210 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11211 "Misplaced _ in number");
11215 /* allow positive or negative exponent */
11216 if (*s == '+' || *s == '-')
11219 /* stray initial _ */
11221 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11222 "Misplaced _ in number");
11226 /* read digits of exponent */
11227 while (isDIGIT(*s) || *s == '_') {
11230 Perl_croak(aTHX_ "%s", number_too_long);
11234 if (((lastub && s == lastub + 1) ||
11235 (!isDIGIT(s[1]) && s[1] != '_')))
11236 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11237 "Misplaced _ in number");
11245 We try to do an integer conversion first if no characters
11246 indicating "float" have been found.
11251 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
11253 if (flags == IS_NUMBER_IN_UV) {
11255 sv = newSViv(uv); /* Prefer IVs over UVs. */
11258 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11259 if (uv <= (UV) IV_MIN)
11260 sv = newSViv(-(IV)uv);
11267 /* terminate the string */
11269 nv = Atof(PL_tokenbuf);
11274 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
11275 const char *const key = floatit ? "float" : "integer";
11276 const STRLEN keylen = floatit ? 5 : 7;
11277 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
11278 key, keylen, sv, NULL, NULL, 0);
11282 /* if it starts with a v, it could be a v-string */
11285 sv = newSV(5); /* preallocate storage space */
11286 ENTER_with_name("scan_vstring");
11288 s = scan_vstring(s, PL_bufend, sv);
11289 SvREFCNT_inc_simple_void_NN(sv);
11290 LEAVE_with_name("scan_vstring");
11294 /* make the op for the constant and return */
11297 lvalp->opval = newSVOP(OP_CONST, 0, sv);
11299 lvalp->opval = NULL;
11305 S_scan_formline(pTHX_ char *s)
11310 SV * const stuff = newSVpvs("");
11311 bool needargs = FALSE;
11312 bool eofmt = FALSE;
11314 char *tokenstart = s;
11315 SV* savewhite = NULL;
11317 if (PL_madskills) {
11318 savewhite = PL_thiswhite;
11323 PERL_ARGS_ASSERT_SCAN_FORMLINE;
11325 while (!needargs) {
11328 #ifdef PERL_STRICT_CR
11329 while (SPACE_OR_TAB(*t))
11332 while (SPACE_OR_TAB(*t) || *t == '\r')
11335 if (*t == '\n' || t == PL_bufend) {
11340 eol = (char *) memchr(s,'\n',PL_bufend-s);
11344 for (t = s; t < eol; t++) {
11345 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
11347 goto enough; /* ~~ must be first line in formline */
11349 if (*t == '@' || *t == '^')
11353 sv_catpvn(stuff, s, eol-s);
11354 #ifndef PERL_STRICT_CR
11355 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
11356 char *end = SvPVX(stuff) + SvCUR(stuff);
11359 SvCUR_set(stuff, SvCUR(stuff) - 1);
11367 if ((PL_rsfp || PL_parser->filtered)
11368 && PL_parser->form_lex_state == LEX_NORMAL) {
11371 if (PL_madskills) {
11373 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
11375 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
11378 PL_bufptr = PL_bufend;
11379 COPLINE_INC_WITH_HERELINES;
11380 got_some = lex_next_chunk(0);
11381 CopLINE_dec(PL_curcop);
11384 tokenstart = PL_bufptr;
11392 if (!SvCUR(stuff) || needargs)
11393 PL_lex_state = PL_parser->form_lex_state;
11394 if (SvCUR(stuff)) {
11395 PL_expect = XSTATE;
11397 start_force(PL_curforce);
11398 NEXTVAL_NEXTTOKE.ival = 0;
11399 force_next(FORMLBRACK);
11402 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
11404 else if (PL_encoding)
11405 sv_recode_to_utf8(stuff, PL_encoding);
11407 start_force(PL_curforce);
11408 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
11412 SvREFCNT_dec(stuff);
11414 PL_lex_formbrack = 0;
11417 if (PL_madskills) {
11419 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
11421 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
11422 PL_thiswhite = savewhite;
11429 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
11432 const I32 oldsavestack_ix = PL_savestack_ix;
11433 CV* const outsidecv = PL_compcv;
11435 SAVEI32(PL_subline);
11436 save_item(PL_subname);
11437 SAVESPTR(PL_compcv);
11439 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
11440 CvFLAGS(PL_compcv) |= flags;
11442 PL_subline = CopLINE(PL_curcop);
11443 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
11444 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
11445 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
11446 if (outsidecv && CvPADLIST(outsidecv))
11447 CvPADLIST(PL_compcv)->xpadl_outid =
11448 PadlistNAMES(CvPADLIST(outsidecv));
11450 return oldsavestack_ix;
11454 #pragma segment Perl_yylex
11457 S_yywarn(pTHX_ const char *const s, U32 flags)
11461 PERL_ARGS_ASSERT_YYWARN;
11463 PL_in_eval |= EVAL_WARNONLY;
11464 yyerror_pv(s, flags);
11465 PL_in_eval &= ~EVAL_WARNONLY;
11470 Perl_yyerror(pTHX_ const char *const s)
11472 PERL_ARGS_ASSERT_YYERROR;
11473 return yyerror_pvn(s, strlen(s), 0);
11477 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11479 PERL_ARGS_ASSERT_YYERROR_PV;
11480 return yyerror_pvn(s, strlen(s), flags);
11484 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11487 const char *context = NULL;
11490 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
11491 int yychar = PL_parser->yychar;
11493 PERL_ARGS_ASSERT_YYERROR_PVN;
11495 if (!yychar || (yychar == ';' && !PL_rsfp))
11496 sv_catpvs(where_sv, "at EOF");
11497 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
11498 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
11499 PL_oldbufptr != PL_bufptr) {
11502 The code below is removed for NetWare because it abends/crashes on NetWare
11503 when the script has error such as not having the closing quotes like:
11504 if ($var eq "value)
11505 Checking of white spaces is anyway done in NetWare code.
11508 while (isSPACE(*PL_oldoldbufptr))
11511 context = PL_oldoldbufptr;
11512 contlen = PL_bufptr - PL_oldoldbufptr;
11514 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
11515 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
11518 The code below is removed for NetWare because it abends/crashes on NetWare
11519 when the script has error such as not having the closing quotes like:
11520 if ($var eq "value)
11521 Checking of white spaces is anyway done in NetWare code.
11524 while (isSPACE(*PL_oldbufptr))
11527 context = PL_oldbufptr;
11528 contlen = PL_bufptr - PL_oldbufptr;
11530 else if (yychar > 255)
11531 sv_catpvs(where_sv, "next token ???");
11532 else if (yychar == -2) { /* YYEMPTY */
11533 if (PL_lex_state == LEX_NORMAL ||
11534 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
11535 sv_catpvs(where_sv, "at end of line");
11536 else if (PL_lex_inpat)
11537 sv_catpvs(where_sv, "within pattern");
11539 sv_catpvs(where_sv, "within string");
11542 sv_catpvs(where_sv, "next char ");
11544 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11545 else if (isPRINT_LC(yychar)) {
11546 const char string = yychar;
11547 sv_catpvn(where_sv, &string, 1);
11550 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11552 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
11553 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
11554 OutCopFILE(PL_curcop),
11555 (IV)(PL_parser->preambling == NOLINE
11556 ? CopLINE(PL_curcop)
11557 : PL_parser->preambling));
11559 Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
11560 UTF8fARG(UTF, contlen, context));
11562 Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
11563 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11564 Perl_sv_catpvf(aTHX_ msg,
11565 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
11566 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11569 if (PL_in_eval & EVAL_WARNONLY) {
11570 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
11574 if (PL_error_count >= 10) {
11576 if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
11577 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11578 SVfARG(errsv), OutCopFILE(PL_curcop));
11580 Perl_croak(aTHX_ "%s has too many errors.\n",
11581 OutCopFILE(PL_curcop));
11584 PL_in_my_stash = NULL;
11588 #pragma segment Main
11592 S_swallow_bom(pTHX_ U8 *s)
11595 const STRLEN slen = SvCUR(PL_linestr);
11597 PERL_ARGS_ASSERT_SWALLOW_BOM;
11601 if (s[1] == 0xFE) {
11602 /* UTF-16 little-endian? (or UTF-32LE?) */
11603 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
11604 /* diag_listed_as: Unsupported script encoding %s */
11605 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11606 #ifndef PERL_NO_UTF16_FILTER
11607 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11609 if (PL_bufend > (char*)s) {
11610 s = add_utf16_textfilter(s, TRUE);
11613 /* diag_listed_as: Unsupported script encoding %s */
11614 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11619 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
11620 #ifndef PERL_NO_UTF16_FILTER
11621 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11623 if (PL_bufend > (char *)s) {
11624 s = add_utf16_textfilter(s, FALSE);
11627 /* diag_listed_as: Unsupported script encoding %s */
11628 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11632 case BOM_UTF8_FIRST_BYTE: {
11633 const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
11634 if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
11635 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11636 s += len + 1; /* UTF-8 */
11643 if (s[2] == 0xFE && s[3] == 0xFF) {
11644 /* UTF-32 big-endian */
11645 /* diag_listed_as: Unsupported script encoding %s */
11646 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11649 else if (s[2] == 0 && s[3] != 0) {
11652 * are a good indicator of UTF-16BE. */
11653 #ifndef PERL_NO_UTF16_FILTER
11654 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11655 s = add_utf16_textfilter(s, FALSE);
11657 /* diag_listed_as: Unsupported script encoding %s */
11658 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11664 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11667 * are a good indicator of UTF-16LE. */
11668 #ifndef PERL_NO_UTF16_FILTER
11669 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11670 s = add_utf16_textfilter(s, TRUE);
11672 /* diag_listed_as: Unsupported script encoding %s */
11673 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11681 #ifndef PERL_NO_UTF16_FILTER
11683 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11686 SV *const filter = FILTER_DATA(idx);
11687 /* We re-use this each time round, throwing the contents away before we
11689 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11690 SV *const utf8_buffer = filter;
11691 IV status = IoPAGE(filter);
11692 const bool reverse = cBOOL(IoLINES(filter));
11695 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11697 /* As we're automatically added, at the lowest level, and hence only called
11698 from this file, we can be sure that we're not called in block mode. Hence
11699 don't bother writing code to deal with block mode. */
11701 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11704 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11706 DEBUG_P(PerlIO_printf(Perl_debug_log,
11707 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11708 FPTR2DPTR(void *, S_utf16_textfilter),
11709 reverse ? 'l' : 'b', idx, maxlen, status,
11710 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11717 /* First, look in our buffer of existing UTF-8 data: */
11718 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11722 } else if (status == 0) {
11724 IoPAGE(filter) = 0;
11725 nl = SvEND(utf8_buffer);
11728 STRLEN got = nl - SvPVX(utf8_buffer);
11729 /* Did we have anything to append? */
11731 sv_catpvn(sv, SvPVX(utf8_buffer), got);
11732 /* Everything else in this code works just fine if SVp_POK isn't
11733 set. This, however, needs it, and we need it to work, else
11734 we loop infinitely because the buffer is never consumed. */
11735 sv_chop(utf8_buffer, nl);
11739 /* OK, not a complete line there, so need to read some more UTF-16.
11740 Read an extra octect if the buffer currently has an odd number. */
11744 if (SvCUR(utf16_buffer) >= 2) {
11745 /* Location of the high octet of the last complete code point.
11746 Gosh, UTF-16 is a pain. All the benefits of variable length,
11747 *coupled* with all the benefits of partial reads and
11749 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11750 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11752 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11756 /* We have the first half of a surrogate. Read more. */
11757 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11760 status = FILTER_READ(idx + 1, utf16_buffer,
11761 160 + (SvCUR(utf16_buffer) & 1));
11762 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
11763 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11766 IoPAGE(filter) = status;
11771 chars = SvCUR(utf16_buffer) >> 1;
11772 have = SvCUR(utf8_buffer);
11773 SvGROW(utf8_buffer, have + chars * 3 + 1);
11776 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11777 (U8*)SvPVX_const(utf8_buffer) + have,
11778 chars * 2, &newlen);
11780 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11781 (U8*)SvPVX_const(utf8_buffer) + have,
11782 chars * 2, &newlen);
11784 SvCUR_set(utf8_buffer, have + newlen);
11787 /* No need to keep this SV "well-formed" with a '\0' after the end, as
11788 it's private to us, and utf16_to_utf8{,reversed} take a
11789 (pointer,length) pair, rather than a NUL-terminated string. */
11790 if(SvCUR(utf16_buffer) & 1) {
11791 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11792 SvCUR_set(utf16_buffer, 1);
11794 SvCUR_set(utf16_buffer, 0);
11797 DEBUG_P(PerlIO_printf(Perl_debug_log,
11798 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11800 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11801 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11806 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11808 SV *filter = filter_add(S_utf16_textfilter, NULL);
11810 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11812 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11813 sv_setpvs(filter, "");
11814 IoLINES(filter) = reversed;
11815 IoPAGE(filter) = 1; /* Not EOF */
11817 /* Sadly, we have to return a valid pointer, come what may, so we have to
11818 ignore any error return from this. */
11819 SvCUR_set(PL_linestr, 0);
11820 if (FILTER_READ(0, PL_linestr, 0)) {
11821 SvUTF8_on(PL_linestr);
11823 SvUTF8_on(PL_linestr);
11825 PL_bufend = SvEND(PL_linestr);
11826 return (U8*)SvPVX(PL_linestr);
11831 Returns a pointer to the next character after the parsed
11832 vstring, as well as updating the passed in sv.
11834 Function must be called like
11836 sv = sv_2mortal(newSV(5));
11837 s = scan_vstring(s,e,sv);
11839 where s and e are the start and end of the string.
11840 The sv should already be large enough to store the vstring
11841 passed in, for performance reasons.
11843 This function may croak if fatal warnings are enabled in the
11844 calling scope, hence the sv_2mortal in the example (to prevent
11845 a leak). Make sure to do SvREFCNT_inc afterwards if you use
11851 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11854 const char *pos = s;
11855 const char *start = s;
11857 PERL_ARGS_ASSERT_SCAN_VSTRING;
11859 if (*pos == 'v') pos++; /* get past 'v' */
11860 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11862 if ( *pos != '.') {
11863 /* this may not be a v-string if followed by => */
11864 const char *next = pos;
11865 while (next < e && isSPACE(*next))
11867 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11868 /* return string not v-string */
11869 sv_setpvn(sv,(char *)s,pos-s);
11870 return (char *)pos;
11874 if (!isALPHA(*pos)) {
11875 U8 tmpbuf[UTF8_MAXBYTES+1];
11878 s++; /* get past 'v' */
11883 /* this is atoi() that tolerates underscores */
11886 const char *end = pos;
11888 while (--end >= s) {
11890 const UV orev = rev;
11891 rev += (*end - '0') * mult;
11894 /* diag_listed_as: Integer overflow in %s number */
11895 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11896 "Integer overflow in decimal number");
11900 if (rev > 0x7FFFFFFF)
11901 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11903 /* Append native character for the rev point */
11904 tmpend = uvchr_to_utf8(tmpbuf, rev);
11905 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11906 if (!UVCHR_IS_INVARIANT(rev))
11908 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11914 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11918 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11925 Perl_keyword_plugin_standard(pTHX_
11926 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11928 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11929 PERL_UNUSED_CONTEXT;
11930 PERL_UNUSED_ARG(keyword_ptr);
11931 PERL_UNUSED_ARG(keyword_len);
11932 PERL_UNUSED_ARG(op_ptr);
11933 return KEYWORD_PLUGIN_DECLINE;
11936 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11938 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11940 SAVEI32(PL_lex_brackets);
11941 if (PL_lex_brackets > 100)
11942 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11943 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11944 SAVEI32(PL_lex_allbrackets);
11945 PL_lex_allbrackets = 0;
11946 SAVEI8(PL_lex_fakeeof);
11947 PL_lex_fakeeof = (U8)fakeeof;
11948 if(yyparse(gramtype) && !PL_parser->error_count)
11949 qerror(Perl_mess(aTHX_ "Parse error"));
11952 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11954 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11958 SAVEVPTR(PL_eval_root);
11959 PL_eval_root = NULL;
11960 parse_recdescent(gramtype, fakeeof);
11966 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11968 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11971 if (flags & ~PARSE_OPTIONAL)
11972 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11973 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11974 if (!exprop && !(flags & PARSE_OPTIONAL)) {
11975 if (!PL_parser->error_count)
11976 qerror(Perl_mess(aTHX_ "Parse error"));
11977 exprop = newOP(OP_NULL, 0);
11983 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11985 Parse a Perl arithmetic expression. This may contain operators of precedence
11986 down to the bit shift operators. The expression must be followed (and thus
11987 terminated) either by a comparison or lower-precedence operator or by
11988 something that would normally terminate an expression such as semicolon.
11989 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11990 otherwise it is mandatory. It is up to the caller to ensure that the
11991 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11992 the source of the code to be parsed and the lexical context for the
11995 The op tree representing the expression is returned. If an optional
11996 expression is absent, a null pointer is returned, otherwise the pointer
11999 If an error occurs in parsing or compilation, in most cases a valid op
12000 tree is returned anyway. The error is reflected in the parser state,
12001 normally resulting in a single exception at the top level of parsing
12002 which covers all the compilation errors that occurred. Some compilation
12003 errors, however, will throw an exception immediately.
12009 Perl_parse_arithexpr(pTHX_ U32 flags)
12011 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
12015 =for apidoc Amx|OP *|parse_termexpr|U32 flags
12017 Parse a Perl term expression. This may contain operators of precedence
12018 down to the assignment operators. The expression must be followed (and thus
12019 terminated) either by a comma or lower-precedence operator or by
12020 something that would normally terminate an expression such as semicolon.
12021 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12022 otherwise it is mandatory. It is up to the caller to ensure that the
12023 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12024 the source of the code to be parsed and the lexical context for the
12027 The op tree representing the expression is returned. If an optional
12028 expression is absent, a null pointer is returned, otherwise the pointer
12031 If an error occurs in parsing or compilation, in most cases a valid op
12032 tree is returned anyway. The error is reflected in the parser state,
12033 normally resulting in a single exception at the top level of parsing
12034 which covers all the compilation errors that occurred. Some compilation
12035 errors, however, will throw an exception immediately.
12041 Perl_parse_termexpr(pTHX_ U32 flags)
12043 return parse_expr(LEX_FAKEEOF_COMMA, flags);
12047 =for apidoc Amx|OP *|parse_listexpr|U32 flags
12049 Parse a Perl list expression. This may contain operators of precedence
12050 down to the comma operator. The expression must be followed (and thus
12051 terminated) either by a low-precedence logic operator such as C<or> or by
12052 something that would normally terminate an expression such as semicolon.
12053 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12054 otherwise it is mandatory. It is up to the caller to ensure that the
12055 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12056 the source of the code to be parsed and the lexical context for the
12059 The op tree representing the expression is returned. If an optional
12060 expression is absent, a null pointer is returned, otherwise the pointer
12063 If an error occurs in parsing or compilation, in most cases a valid op
12064 tree is returned anyway. The error is reflected in the parser state,
12065 normally resulting in a single exception at the top level of parsing
12066 which covers all the compilation errors that occurred. Some compilation
12067 errors, however, will throw an exception immediately.
12073 Perl_parse_listexpr(pTHX_ U32 flags)
12075 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
12079 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
12081 Parse a single complete Perl expression. This allows the full
12082 expression grammar, including the lowest-precedence operators such
12083 as C<or>. The expression must be followed (and thus terminated) by a
12084 token that an expression would normally be terminated by: end-of-file,
12085 closing bracketing punctuation, semicolon, or one of the keywords that
12086 signals a postfix expression-statement modifier. If I<flags> includes
12087 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
12088 mandatory. It is up to the caller to ensure that the dynamic parser
12089 state (L</PL_parser> et al) is correctly set to reflect the source of
12090 the code to be parsed and the lexical context for the expression.
12092 The op tree representing the expression is returned. If an optional
12093 expression is absent, a null pointer is returned, otherwise the pointer
12096 If an error occurs in parsing or compilation, in most cases a valid op
12097 tree is returned anyway. The error is reflected in the parser state,
12098 normally resulting in a single exception at the top level of parsing
12099 which covers all the compilation errors that occurred. Some compilation
12100 errors, however, will throw an exception immediately.
12106 Perl_parse_fullexpr(pTHX_ U32 flags)
12108 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
12112 =for apidoc Amx|OP *|parse_block|U32 flags
12114 Parse a single complete Perl code block. This consists of an opening
12115 brace, a sequence of statements, and a closing brace. The block
12116 constitutes a lexical scope, so C<my> variables and various compile-time
12117 effects can be contained within it. It is up to the caller to ensure
12118 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12119 reflect the source of the code to be parsed and the lexical context for
12122 The op tree representing the code block is returned. This is always a
12123 real op, never a null pointer. It will normally be a C<lineseq> list,
12124 including C<nextstate> or equivalent ops. No ops to construct any kind
12125 of runtime scope are included by virtue of it being a block.
12127 If an error occurs in parsing or compilation, in most cases a valid op
12128 tree (most likely null) is returned anyway. The error is reflected in
12129 the parser state, normally resulting in a single exception at the top
12130 level of parsing which covers all the compilation errors that occurred.
12131 Some compilation errors, however, will throw an exception immediately.
12133 The I<flags> parameter is reserved for future use, and must always
12140 Perl_parse_block(pTHX_ U32 flags)
12143 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
12144 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
12148 =for apidoc Amx|OP *|parse_barestmt|U32 flags
12150 Parse a single unadorned Perl statement. This may be a normal imperative
12151 statement or a declaration that has compile-time effect. It does not
12152 include any label or other affixture. It is up to the caller to ensure
12153 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12154 reflect the source of the code to be parsed and the lexical context for
12157 The op tree representing the statement is returned. This may be a
12158 null pointer if the statement is null, for example if it was actually
12159 a subroutine definition (which has compile-time side effects). If not
12160 null, it will be ops directly implementing the statement, suitable to
12161 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
12162 equivalent op (except for those embedded in a scope contained entirely
12163 within the statement).
12165 If an error occurs in parsing or compilation, in most cases a valid op
12166 tree (most likely null) is returned anyway. The error is reflected in
12167 the parser state, normally resulting in a single exception at the top
12168 level of parsing which covers all the compilation errors that occurred.
12169 Some compilation errors, however, will throw an exception immediately.
12171 The I<flags> parameter is reserved for future use, and must always
12178 Perl_parse_barestmt(pTHX_ U32 flags)
12181 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
12182 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
12186 =for apidoc Amx|SV *|parse_label|U32 flags
12188 Parse a single label, possibly optional, of the type that may prefix a
12189 Perl statement. It is up to the caller to ensure that the dynamic parser
12190 state (L</PL_parser> et al) is correctly set to reflect the source of
12191 the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
12192 label is optional, otherwise it is mandatory.
12194 The name of the label is returned in the form of a fresh scalar. If an
12195 optional label is absent, a null pointer is returned.
12197 If an error occurs in parsing, which can only occur if the label is
12198 mandatory, a valid label is returned anyway. The error is reflected in
12199 the parser state, normally resulting in a single exception at the top
12200 level of parsing which covers all the compilation errors that occurred.
12206 Perl_parse_label(pTHX_ U32 flags)
12208 if (flags & ~PARSE_OPTIONAL)
12209 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
12210 if (PL_lex_state == LEX_KNOWNEXT) {
12211 PL_parser->yychar = yylex();
12212 if (PL_parser->yychar == LABEL) {
12213 char * const lpv = pl_yylval.pval;
12214 STRLEN llen = strlen(lpv);
12215 PL_parser->yychar = YYEMPTY;
12216 return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
12223 STRLEN wlen, bufptr_pos;
12226 if (!isIDFIRST_lazy_if(s, UTF))
12228 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
12229 if (word_takes_any_delimeter(s, wlen))
12231 bufptr_pos = s - SvPVX(PL_linestr);
12233 lex_read_space(LEX_KEEP_PREVIOUS);
12235 s = SvPVX(PL_linestr) + bufptr_pos;
12236 if (t[0] == ':' && t[1] != ':') {
12237 PL_oldoldbufptr = PL_oldbufptr;
12240 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
12244 if (flags & PARSE_OPTIONAL) {
12247 qerror(Perl_mess(aTHX_ "Parse error"));
12248 return newSVpvs("x");
12255 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
12257 Parse a single complete Perl statement. This may be a normal imperative
12258 statement or a declaration that has compile-time effect, and may include
12259 optional labels. It is up to the caller to ensure that the dynamic
12260 parser state (L</PL_parser> et al) is correctly set to reflect the source
12261 of the code to be parsed and the lexical context for the statement.
12263 The op tree representing the statement is returned. This may be a
12264 null pointer if the statement is null, for example if it was actually
12265 a subroutine definition (which has compile-time side effects). If not
12266 null, it will be the result of a L</newSTATEOP> call, normally including
12267 a C<nextstate> or equivalent op.
12269 If an error occurs in parsing or compilation, in most cases a valid op
12270 tree (most likely null) is returned anyway. The error is reflected in
12271 the parser state, normally resulting in a single exception at the top
12272 level of parsing which covers all the compilation errors that occurred.
12273 Some compilation errors, however, will throw an exception immediately.
12275 The I<flags> parameter is reserved for future use, and must always
12282 Perl_parse_fullstmt(pTHX_ U32 flags)
12285 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
12286 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
12290 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
12292 Parse a sequence of zero or more Perl statements. These may be normal
12293 imperative statements, including optional labels, or declarations
12294 that have compile-time effect, or any mixture thereof. The statement
12295 sequence ends when a closing brace or end-of-file is encountered in a
12296 place where a new statement could have validly started. It is up to
12297 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
12298 is correctly set to reflect the source of the code to be parsed and the
12299 lexical context for the statements.
12301 The op tree representing the statement sequence is returned. This may
12302 be a null pointer if the statements were all null, for example if there
12303 were no statements or if there were only subroutine definitions (which
12304 have compile-time side effects). If not null, it will be a C<lineseq>
12305 list, normally including C<nextstate> or equivalent ops.
12307 If an error occurs in parsing or compilation, in most cases a valid op
12308 tree is returned anyway. The error is reflected in the parser state,
12309 normally resulting in a single exception at the top level of parsing
12310 which covers all the compilation errors that occurred. Some compilation
12311 errors, however, will throw an exception immediately.
12313 The I<flags> parameter is reserved for future use, and must always
12320 Perl_parse_stmtseq(pTHX_ U32 flags)
12325 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
12326 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
12327 c = lex_peek_unichar(0);
12328 if (c != -1 && c != /*{*/'}')
12329 qerror(Perl_mess(aTHX_ "Parse error"));
12335 * c-indentation-style: bsd
12336 * c-basic-offset: 4
12337 * indent-tabs-mode: nil
12340 * ex: set ts=8 sts=4 sw=4 et: