3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 =head1 Lexer interface
27 This is the lower layer of the Perl parser, managing characters and tokens.
29 =for apidoc AmU|yy_parser *|PL_parser
31 Pointer to a structure encapsulating the state of the parsing operation
32 currently in progress. The pointer can be locally changed to perform
33 a nested parse without interfering with the state of an outer parse.
34 Individual members of C<PL_parser> have their own documentation.
40 #define PERL_IN_TOKE_C
42 #include "dquote_static.c"
44 #define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
47 #define pl_yylval (PL_parser->yylval)
49 /* XXX temporary backwards compatibility */
50 #define PL_lex_brackets (PL_parser->lex_brackets)
51 #define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52 #define PL_lex_fakeeof (PL_parser->lex_fakeeof)
53 #define PL_lex_brackstack (PL_parser->lex_brackstack)
54 #define PL_lex_casemods (PL_parser->lex_casemods)
55 #define PL_lex_casestack (PL_parser->lex_casestack)
56 #define PL_lex_defer (PL_parser->lex_defer)
57 #define PL_lex_dojoin (PL_parser->lex_dojoin)
58 #define PL_lex_expect (PL_parser->lex_expect)
59 #define PL_lex_formbrack (PL_parser->lex_formbrack)
60 #define PL_lex_inpat (PL_parser->lex_inpat)
61 #define PL_lex_inwhat (PL_parser->lex_inwhat)
62 #define PL_lex_op (PL_parser->lex_op)
63 #define PL_lex_repl (PL_parser->lex_repl)
64 #define PL_lex_starts (PL_parser->lex_starts)
65 #define PL_lex_stuff (PL_parser->lex_stuff)
66 #define PL_multi_start (PL_parser->multi_start)
67 #define PL_multi_open (PL_parser->multi_open)
68 #define PL_multi_close (PL_parser->multi_close)
69 #define PL_pending_ident (PL_parser->pending_ident)
70 #define PL_preambled (PL_parser->preambled)
71 #define PL_sublex_info (PL_parser->sublex_info)
72 #define PL_linestr (PL_parser->linestr)
73 #define PL_expect (PL_parser->expect)
74 #define PL_copline (PL_parser->copline)
75 #define PL_bufptr (PL_parser->bufptr)
76 #define PL_oldbufptr (PL_parser->oldbufptr)
77 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
78 #define PL_linestart (PL_parser->linestart)
79 #define PL_bufend (PL_parser->bufend)
80 #define PL_last_uni (PL_parser->last_uni)
81 #define PL_last_lop (PL_parser->last_lop)
82 #define PL_last_lop_op (PL_parser->last_lop_op)
83 #define PL_lex_state (PL_parser->lex_state)
84 #define PL_rsfp (PL_parser->rsfp)
85 #define PL_rsfp_filters (PL_parser->rsfp_filters)
86 #define PL_in_my (PL_parser->in_my)
87 #define PL_in_my_stash (PL_parser->in_my_stash)
88 #define PL_tokenbuf (PL_parser->tokenbuf)
89 #define PL_multi_end (PL_parser->multi_end)
90 #define PL_error_count (PL_parser->error_count)
93 # define PL_endwhite (PL_parser->endwhite)
94 # define PL_faketokens (PL_parser->faketokens)
95 # define PL_lasttoke (PL_parser->lasttoke)
96 # define PL_nextwhite (PL_parser->nextwhite)
97 # define PL_realtokenstart (PL_parser->realtokenstart)
98 # define PL_skipwhite (PL_parser->skipwhite)
99 # define PL_thisclose (PL_parser->thisclose)
100 # define PL_thismad (PL_parser->thismad)
101 # define PL_thisopen (PL_parser->thisopen)
102 # define PL_thisstuff (PL_parser->thisstuff)
103 # define PL_thistoken (PL_parser->thistoken)
104 # define PL_thiswhite (PL_parser->thiswhite)
105 # define PL_thiswhite (PL_parser->thiswhite)
106 # define PL_nexttoke (PL_parser->nexttoke)
107 # define PL_curforce (PL_parser->curforce)
109 # define PL_nexttoke (PL_parser->nexttoke)
110 # define PL_nexttype (PL_parser->nexttype)
111 # define PL_nextval (PL_parser->nextval)
114 /* This can't be done with embed.fnc, because struct yy_parser contains a
115 member named pending_ident, which clashes with the generated #define */
117 S_pending_ident(pTHX);
119 static const char ident_too_long[] = "Identifier too long";
122 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
123 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
125 # define CURMAD(slot,sv)
126 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
129 #define XENUMMASK 0x3f
130 #define XFAKEEOF 0x40
131 #define XFAKEBRACK 0x80
133 #ifdef USE_UTF8_SCRIPTS
134 # define UTF (!IN_BYTES)
136 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
139 /* The maximum number of characters preceding the unrecognized one to display */
140 #define UNRECOGNIZED_PRECEDE_COUNT 10
142 /* In variables named $^X, these are the legal values for X.
143 * 1999-02-27 mjd-perl-patch@plover.com */
144 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
146 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
148 /* LEX_* are values for PL_lex_state, the state of the lexer.
149 * They are arranged oddly so that the guard on the switch statement
150 * can get by with a single comparison (if the compiler is smart enough).
153 /* #define LEX_NOTPARSING 11 is done in perl.h. */
155 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
156 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
157 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
158 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
159 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
161 /* at end of code, eg "$x" followed by: */
162 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
163 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
165 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
166 string or after \E, $foo, etc */
167 #define LEX_INTERPCONST 2 /* NOT USED */
168 #define LEX_FORMLINE 1 /* expecting a format line */
169 #define LEX_KNOWNEXT 0 /* next token known; just return it */
173 static const char* const lex_state_names[] = {
192 #include "keywords.h"
194 /* CLINE is a macro that ensures PL_copline has a sane value */
199 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
202 # define SKIPSPACE0(s) skipspace0(s)
203 # define SKIPSPACE1(s) skipspace1(s)
204 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
205 # define PEEKSPACE(s) skipspace2(s,0)
207 # define SKIPSPACE0(s) skipspace(s)
208 # define SKIPSPACE1(s) skipspace(s)
209 # define SKIPSPACE2(s,tsv) skipspace(s)
210 # define PEEKSPACE(s) skipspace(s)
214 * Convenience functions to return different tokens and prime the
215 * lexer for the next token. They all take an argument.
217 * TOKEN : generic token (used for '(', DOLSHARP, etc)
218 * OPERATOR : generic operator
219 * AOPERATOR : assignment operator
220 * PREBLOCK : beginning the block after an if, while, foreach, ...
221 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
222 * PREREF : *EXPR where EXPR is not a simple identifier
223 * TERM : expression term
224 * LOOPX : loop exiting command (goto, last, dump, etc)
225 * FTST : file test operator
226 * FUN0 : zero-argument function
227 * FUN0OP : zero-argument function, with its op created in this file
228 * FUN1 : not used, except for not, which isn't a UNIOP
229 * BOop : bitwise or or xor
231 * SHop : shift operator
232 * PWop : power operator
233 * PMop : pattern-matching operator
234 * Aop : addition-level operator
235 * Mop : multiplication-level operator
236 * Eop : equality-testing operator
237 * Rop : relational operator <= != gt
239 * Also see LOP and lop() below.
242 #ifdef DEBUGGING /* Serve -DT. */
243 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
245 # define REPORT(retval) (retval)
248 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
249 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
250 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
251 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
252 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
253 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
254 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
255 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
256 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
257 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
258 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
259 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
260 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
261 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
262 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
263 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
264 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
265 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
266 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
267 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
268 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
270 /* This bit of chicanery makes a unary function followed by
271 * a parenthesis into a function with one argument, highest precedence.
272 * The UNIDOR macro is for unary functions that can be followed by the //
273 * operator (such as C<shift // 0>).
275 #define UNI2(f,x) { \
276 pl_yylval.ival = f; \
279 PL_last_uni = PL_oldbufptr; \
280 PL_last_lop_op = f; \
282 return REPORT( (int)FUNC1 ); \
284 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
286 #define UNI(f) UNI2(f,XTERM)
287 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
289 #define UNIBRACK(f) { \
290 pl_yylval.ival = f; \
292 PL_last_uni = PL_oldbufptr; \
294 return REPORT( (int)FUNC1 ); \
296 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
299 /* grandfather return to old style */
302 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
303 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
304 pl_yylval.ival = (f); \
312 /* how to interpret the pl_yylval associated with the token */
316 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
322 static struct debug_tokens {
324 enum token_type type;
326 } const debug_tokens[] =
328 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
329 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
330 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
331 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
332 { ARROW, TOKENTYPE_NONE, "ARROW" },
333 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
334 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
335 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
336 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
337 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
338 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
339 { DO, TOKENTYPE_NONE, "DO" },
340 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
341 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
342 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
343 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
344 { ELSE, TOKENTYPE_NONE, "ELSE" },
345 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
346 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
347 { FOR, TOKENTYPE_IVAL, "FOR" },
348 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
349 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
350 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
351 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
352 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
353 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
354 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
355 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
356 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
357 { IF, TOKENTYPE_IVAL, "IF" },
358 { LABEL, TOKENTYPE_PVAL, "LABEL" },
359 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
360 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
361 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
362 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
363 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
364 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
365 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
366 { MY, TOKENTYPE_IVAL, "MY" },
367 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
368 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
369 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
370 { OROP, TOKENTYPE_IVAL, "OROP" },
371 { OROR, TOKENTYPE_NONE, "OROR" },
372 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
373 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
374 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
375 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
376 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
377 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
378 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
379 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
380 { PREINC, TOKENTYPE_NONE, "PREINC" },
381 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
382 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
383 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
384 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
385 { SUB, TOKENTYPE_NONE, "SUB" },
386 { THING, TOKENTYPE_OPVAL, "THING" },
387 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
388 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
389 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
390 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
391 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
392 { USE, TOKENTYPE_IVAL, "USE" },
393 { WHEN, TOKENTYPE_IVAL, "WHEN" },
394 { WHILE, TOKENTYPE_IVAL, "WHILE" },
395 { WORD, TOKENTYPE_OPVAL, "WORD" },
396 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
397 { 0, TOKENTYPE_NONE, NULL }
400 /* dump the returned token in rv, plus any optional arg in pl_yylval */
403 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
407 PERL_ARGS_ASSERT_TOKEREPORT;
410 const char *name = NULL;
411 enum token_type type = TOKENTYPE_NONE;
412 const struct debug_tokens *p;
413 SV* const report = newSVpvs("<== ");
415 for (p = debug_tokens; p->token; p++) {
416 if (p->token == (int)rv) {
423 Perl_sv_catpv(aTHX_ report, name);
424 else if ((char)rv > ' ' && (char)rv < '~')
425 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
427 sv_catpvs(report, "EOF");
429 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
432 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
435 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
437 case TOKENTYPE_OPNUM:
438 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
439 PL_op_name[lvalp->ival]);
442 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
444 case TOKENTYPE_OPVAL:
446 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
447 PL_op_name[lvalp->opval->op_type]);
448 if (lvalp->opval->op_type == OP_CONST) {
449 Perl_sv_catpvf(aTHX_ report, " %s",
450 SvPEEK(cSVOPx_sv(lvalp->opval)));
455 sv_catpvs(report, "(opval=null)");
458 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
464 /* print the buffer with suitable escapes */
467 S_printbuf(pTHX_ const char *const fmt, const char *const s)
469 SV* const tmp = newSVpvs("");
471 PERL_ARGS_ASSERT_PRINTBUF;
473 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
480 S_deprecate_commaless_var_list(pTHX) {
482 deprecate("comma-less variable list");
483 return REPORT(','); /* grandfather non-comma-format format */
489 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
490 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
494 S_ao(pTHX_ int toketype)
497 if (*PL_bufptr == '=') {
499 if (toketype == ANDAND)
500 pl_yylval.ival = OP_ANDASSIGN;
501 else if (toketype == OROR)
502 pl_yylval.ival = OP_ORASSIGN;
503 else if (toketype == DORDOR)
504 pl_yylval.ival = OP_DORASSIGN;
512 * When Perl expects an operator and finds something else, no_op
513 * prints the warning. It always prints "<something> found where
514 * operator expected. It prints "Missing semicolon on previous line?"
515 * if the surprise occurs at the start of the line. "do you need to
516 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
517 * where the compiler doesn't know if foo is a method call or a function.
518 * It prints "Missing operator before end of line" if there's nothing
519 * after the missing operator, or "... before <...>" if there is something
520 * after the missing operator.
524 S_no_op(pTHX_ const char *const what, char *s)
527 char * const oldbp = PL_bufptr;
528 const bool is_first = (PL_oldbufptr == PL_linestart);
530 PERL_ARGS_ASSERT_NO_OP;
536 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
537 if (ckWARN_d(WARN_SYNTAX)) {
539 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
540 "\t(Missing semicolon on previous line?)\n");
541 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
543 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
545 if (t < PL_bufptr && isSPACE(*t))
546 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
547 "\t(Do you need to predeclare %.*s?)\n",
548 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
552 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
553 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
561 * Complain about missing quote/regexp/heredoc terminator.
562 * If it's called with NULL then it cauterizes the line buffer.
563 * If we're in a delimited string and the delimiter is a control
564 * character, it's reformatted into a two-char sequence like ^C.
569 S_missingterm(pTHX_ char *s)
575 char * const nl = strrchr(s,'\n');
579 else if (isCNTRL(PL_multi_close)) {
581 tmpbuf[1] = (char)toCTRL(PL_multi_close);
586 *tmpbuf = (char)PL_multi_close;
590 q = strchr(s,'"') ? '\'' : '"';
591 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
595 * Check whether the named feature is enabled.
598 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
601 HV * const hinthv = GvHV(PL_hintgv);
602 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
604 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
606 if (namelen > MAX_FEATURE_LEN)
608 memcpy(&he_name[8], name, namelen);
610 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
614 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
615 * utf16-to-utf8-reversed.
618 #ifdef PERL_CR_FILTER
622 register const char *s = SvPVX_const(sv);
623 register const char * const e = s + SvCUR(sv);
625 PERL_ARGS_ASSERT_STRIP_RETURN;
627 /* outer loop optimized to do nothing if there are no CR-LFs */
629 if (*s++ == '\r' && *s == '\n') {
630 /* hit a CR-LF, need to copy the rest */
631 register char *d = s - 1;
634 if (*s == '\r' && s[1] == '\n')
645 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
647 const I32 count = FILTER_READ(idx+1, sv, maxlen);
648 if (count > 0 && !maxlen)
655 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
657 Creates and initialises a new lexer/parser state object, supplying
658 a context in which to lex and parse from a new source of Perl code.
659 A pointer to the new state object is placed in L</PL_parser>. An entry
660 is made on the save stack so that upon unwinding the new state object
661 will be destroyed and the former value of L</PL_parser> will be restored.
662 Nothing else need be done to clean up the parsing context.
664 The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
665 non-null, provides a string (in SV form) containing code to be parsed.
666 A copy of the string is made, so subsequent modification of I<line>
667 does not affect parsing. I<rsfp>, if non-null, provides an input stream
668 from which code will be read to be parsed. If both are non-null, the
669 code in I<line> comes first and must consist of complete lines of input,
670 and I<rsfp> supplies the remainder of the source.
672 The I<flags> parameter is reserved for future use, and must always
673 be zero, except for one flag that is currently reserved for perl's internal
679 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
680 can share filters with the current parser. */
683 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
686 const char *s = NULL;
688 yy_parser *parser, *oparser;
689 if (flags && flags != LEX_START_SAME_FILTER)
690 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
692 /* create and initialise a parser */
693 Newxz(parser, 1, yy_parser);
694 parser->old_parser = oparser = PL_parser;
697 parser->stack = NULL;
699 parser->stack_size = 0;
701 /* on scope exit, free this parser and restore any outer one */
703 parser->saved_curcop = PL_curcop;
705 /* initialise lexer state */
708 parser->curforce = -1;
710 parser->nexttoke = 0;
712 parser->error_count = oparser ? oparser->error_count : 0;
713 parser->copline = NOLINE;
714 parser->lex_state = LEX_NORMAL;
715 parser->expect = XSTATE;
717 parser->rsfp_filters =
718 !(flags & LEX_START_SAME_FILTER) || !oparser
720 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
722 Newx(parser->lex_brackstack, 120, char);
723 Newx(parser->lex_casestack, 12, char);
724 *parser->lex_casestack = '\0';
727 s = SvPV_const(line, len);
733 parser->linestr = newSVpvs("\n;");
735 parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
737 sv_catpvs(parser->linestr, "\n;");
739 parser->oldoldbufptr =
742 parser->linestart = SvPVX(parser->linestr);
743 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
744 parser->last_lop = parser->last_uni = NULL;
750 /* delete a parser object */
753 Perl_parser_free(pTHX_ const yy_parser *parser)
755 PERL_ARGS_ASSERT_PARSER_FREE;
757 PL_curcop = parser->saved_curcop;
758 SvREFCNT_dec(parser->linestr);
760 if (parser->rsfp == PerlIO_stdin())
761 PerlIO_clearerr(parser->rsfp);
762 else if (parser->rsfp && (!parser->old_parser ||
763 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
764 PerlIO_close(parser->rsfp);
765 SvREFCNT_dec(parser->rsfp_filters);
767 Safefree(parser->lex_brackstack);
768 Safefree(parser->lex_casestack);
769 PL_parser = parser->old_parser;
775 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
777 Buffer scalar containing the chunk currently under consideration of the
778 text currently being lexed. This is always a plain string scalar (for
779 which C<SvPOK> is true). It is not intended to be used as a scalar by
780 normal scalar means; instead refer to the buffer directly by the pointer
781 variables described below.
783 The lexer maintains various C<char*> pointers to things in the
784 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
785 reallocated, all of these pointers must be updated. Don't attempt to
786 do this manually, but rather use L</lex_grow_linestr> if you need to
787 reallocate the buffer.
789 The content of the text chunk in the buffer is commonly exactly one
790 complete line of input, up to and including a newline terminator,
791 but there are situations where it is otherwise. The octets of the
792 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
793 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
794 flag on this scalar, which may disagree with it.
796 For direct examination of the buffer, the variable
797 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
798 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
799 of these pointers is usually preferable to examination of the scalar
800 through normal scalar means.
802 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
804 Direct pointer to the end of the chunk of text currently being lexed, the
805 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
806 + SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
807 always located at the end of the buffer, and does not count as part of
808 the buffer's contents.
810 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
812 Points to the current position of lexing inside the lexer buffer.
813 Characters around this point may be freely examined, within
814 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
815 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
816 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
818 Lexing code (whether in the Perl core or not) moves this pointer past
819 the characters that it consumes. It is also expected to perform some
820 bookkeeping whenever a newline character is consumed. This movement
821 can be more conveniently performed by the function L</lex_read_to>,
822 which handles newlines appropriately.
824 Interpretation of the buffer's octets can be abstracted out by
825 using the slightly higher-level functions L</lex_peek_unichar> and
826 L</lex_read_unichar>.
828 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
830 Points to the start of the current line inside the lexer buffer.
831 This is useful for indicating at which column an error occurred, and
832 not much else. This must be updated by any lexing code that consumes
833 a newline; the function L</lex_read_to> handles this detail.
839 =for apidoc Amx|bool|lex_bufutf8
841 Indicates whether the octets in the lexer buffer
842 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
843 of Unicode characters. If not, they should be interpreted as Latin-1
844 characters. This is analogous to the C<SvUTF8> flag for scalars.
846 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
847 contains valid UTF-8. Lexing code must be robust in the face of invalid
850 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
851 is significant, but not the whole story regarding the input character
852 encoding. Normally, when a file is being read, the scalar contains octets
853 and its C<SvUTF8> flag is off, but the octets should be interpreted as
854 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
855 however, the scalar may have the C<SvUTF8> flag on, and in this case its
856 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
857 is in effect. This logic may change in the future; use this function
858 instead of implementing the logic yourself.
864 Perl_lex_bufutf8(pTHX)
870 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
872 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
873 at least I<len> octets (including terminating NUL). Returns a
874 pointer to the reallocated buffer. This is necessary before making
875 any direct modification of the buffer that would increase its length.
876 L</lex_stuff_pvn> provides a more convenient way to insert text into
879 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
880 this function updates all of the lexer's variables that point directly
887 Perl_lex_grow_linestr(pTHX_ STRLEN len)
891 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
892 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
893 linestr = PL_parser->linestr;
894 buf = SvPVX(linestr);
895 if (len <= SvLEN(linestr))
897 bufend_pos = PL_parser->bufend - buf;
898 bufptr_pos = PL_parser->bufptr - buf;
899 oldbufptr_pos = PL_parser->oldbufptr - buf;
900 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
901 linestart_pos = PL_parser->linestart - buf;
902 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
903 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
904 buf = sv_grow(linestr, len);
905 PL_parser->bufend = buf + bufend_pos;
906 PL_parser->bufptr = buf + bufptr_pos;
907 PL_parser->oldbufptr = buf + oldbufptr_pos;
908 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
909 PL_parser->linestart = buf + linestart_pos;
910 if (PL_parser->last_uni)
911 PL_parser->last_uni = buf + last_uni_pos;
912 if (PL_parser->last_lop)
913 PL_parser->last_lop = buf + last_lop_pos;
918 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
920 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
921 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
922 reallocating the buffer if necessary. This means that lexing code that
923 runs later will see the characters as if they had appeared in the input.
924 It is not recommended to do this as part of normal parsing, and most
925 uses of this facility run the risk of the inserted characters being
926 interpreted in an unintended manner.
928 The string to be inserted is represented by I<len> octets starting
929 at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
930 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
931 The characters are recoded for the lexer buffer, according to how the
932 buffer is currently being interpreted (L</lex_bufutf8>). If a string
933 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
934 function is more convenient.
940 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
944 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
945 if (flags & ~(LEX_STUFF_UTF8))
946 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
948 if (flags & LEX_STUFF_UTF8) {
952 const char *p, *e = pv+len;
953 for (p = pv; p != e; p++)
954 highhalf += !!(((U8)*p) & 0x80);
957 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
958 bufptr = PL_parser->bufptr;
959 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
960 SvCUR_set(PL_parser->linestr,
961 SvCUR(PL_parser->linestr) + len+highhalf);
962 PL_parser->bufend += len+highhalf;
963 for (p = pv; p != e; p++) {
966 *bufptr++ = (char)(0xc0 | (c >> 6));
967 *bufptr++ = (char)(0x80 | (c & 0x3f));
974 if (flags & LEX_STUFF_UTF8) {
976 const char *p, *e = pv+len;
977 for (p = pv; p != e; p++) {
980 Perl_croak(aTHX_ "Lexing code attempted to stuff "
981 "non-Latin-1 character into Latin-1 input");
982 } else if (c >= 0xc2 && p+1 != e &&
983 (((U8)p[1]) & 0xc0) == 0x80) {
986 } else if (c >= 0x80) {
987 /* malformed UTF-8 */
989 SAVESPTR(PL_warnhook);
990 PL_warnhook = PERL_WARNHOOK_FATAL;
991 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
997 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
998 bufptr = PL_parser->bufptr;
999 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1000 SvCUR_set(PL_parser->linestr,
1001 SvCUR(PL_parser->linestr) + len-highhalf);
1002 PL_parser->bufend += len-highhalf;
1003 for (p = pv; p != e; p++) {
1006 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1009 *bufptr++ = (char)c;
1014 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1015 bufptr = PL_parser->bufptr;
1016 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1017 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1018 PL_parser->bufend += len;
1019 Copy(pv, bufptr, len, char);
1025 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1027 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1028 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1029 reallocating the buffer if necessary. This means that lexing code that
1030 runs later will see the characters as if they had appeared in the input.
1031 It is not recommended to do this as part of normal parsing, and most
1032 uses of this facility run the risk of the inserted characters being
1033 interpreted in an unintended manner.
1035 The string to be inserted is represented by octets starting at I<pv>
1036 and continuing to the first nul. These octets are interpreted as either
1037 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1038 in I<flags>. The characters are recoded for the lexer buffer, according
1039 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1040 If it is not convenient to nul-terminate a string to be inserted, the
1041 L</lex_stuff_pvn> function is more appropriate.
1047 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1049 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1050 lex_stuff_pvn(pv, strlen(pv), flags);
1054 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1056 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1057 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1058 reallocating the buffer if necessary. This means that lexing code that
1059 runs later will see the characters as if they had appeared in the input.
1060 It is not recommended to do this as part of normal parsing, and most
1061 uses of this facility run the risk of the inserted characters being
1062 interpreted in an unintended manner.
1064 The string to be inserted is the string value of I<sv>. The characters
1065 are recoded for the lexer buffer, according to how the buffer is currently
1066 being interpreted (L</lex_bufutf8>). If a string to be inserted is
1067 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1068 need to construct a scalar.
1074 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1078 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1080 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1082 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1086 =for apidoc Amx|void|lex_unstuff|char *ptr
1088 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1089 I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1090 This hides the discarded text from any lexing code that runs later,
1091 as if the text had never appeared.
1093 This is not the normal way to consume lexed text. For that, use
1100 Perl_lex_unstuff(pTHX_ char *ptr)
1104 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1105 buf = PL_parser->bufptr;
1107 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1110 bufend = PL_parser->bufend;
1112 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1113 unstuff_len = ptr - buf;
1114 Move(ptr, buf, bufend+1-ptr, char);
1115 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1116 PL_parser->bufend = bufend - unstuff_len;
1120 =for apidoc Amx|void|lex_read_to|char *ptr
1122 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1123 to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1124 performing the correct bookkeeping whenever a newline character is passed.
1125 This is the normal way to consume lexed text.
1127 Interpretation of the buffer's octets can be abstracted out by
1128 using the slightly higher-level functions L</lex_peek_unichar> and
1129 L</lex_read_unichar>.
1135 Perl_lex_read_to(pTHX_ char *ptr)
1138 PERL_ARGS_ASSERT_LEX_READ_TO;
1139 s = PL_parser->bufptr;
1140 if (ptr < s || ptr > PL_parser->bufend)
1141 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1142 for (; s != ptr; s++)
1144 CopLINE_inc(PL_curcop);
1145 PL_parser->linestart = s+1;
1147 PL_parser->bufptr = ptr;
1151 =for apidoc Amx|void|lex_discard_to|char *ptr
1153 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1154 up to I<ptr>. The remaining content of the buffer will be moved, and
1155 all pointers into the buffer updated appropriately. I<ptr> must not
1156 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1157 it is not permitted to discard text that has yet to be lexed.
1159 Normally it is not necessarily to do this directly, because it suffices to
1160 use the implicit discarding behaviour of L</lex_next_chunk> and things
1161 based on it. However, if a token stretches across multiple lines,
1162 and the lexing code has kept multiple lines of text in the buffer for
1163 that purpose, then after completion of the token it would be wise to
1164 explicitly discard the now-unneeded earlier lines, to avoid future
1165 multi-line tokens growing the buffer without bound.
1171 Perl_lex_discard_to(pTHX_ char *ptr)
1175 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1176 buf = SvPVX(PL_parser->linestr);
1178 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1181 if (ptr > PL_parser->bufptr)
1182 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1183 discard_len = ptr - buf;
1184 if (PL_parser->oldbufptr < ptr)
1185 PL_parser->oldbufptr = ptr;
1186 if (PL_parser->oldoldbufptr < ptr)
1187 PL_parser->oldoldbufptr = ptr;
1188 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1189 PL_parser->last_uni = NULL;
1190 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1191 PL_parser->last_lop = NULL;
1192 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1193 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1194 PL_parser->bufend -= discard_len;
1195 PL_parser->bufptr -= discard_len;
1196 PL_parser->oldbufptr -= discard_len;
1197 PL_parser->oldoldbufptr -= discard_len;
1198 if (PL_parser->last_uni)
1199 PL_parser->last_uni -= discard_len;
1200 if (PL_parser->last_lop)
1201 PL_parser->last_lop -= discard_len;
1205 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1207 Reads in the next chunk of text to be lexed, appending it to
1208 L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1209 looked to the end of the current chunk and wants to know more. It is
1210 usual, but not necessary, for lexing to have consumed the entirety of
1211 the current chunk at this time.
1213 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1214 chunk (i.e., the current chunk has been entirely consumed), normally the
1215 current chunk will be discarded at the same time that the new chunk is
1216 read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1217 will not be discarded. If the current chunk has not been entirely
1218 consumed, then it will not be discarded regardless of the flag.
1220 Returns true if some new text was added to the buffer, or false if the
1221 buffer has reached the end of the input text.
1226 #define LEX_FAKE_EOF 0x80000000
1229 Perl_lex_next_chunk(pTHX_ U32 flags)
1233 STRLEN old_bufend_pos, new_bufend_pos;
1234 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1235 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1236 bool got_some_for_debugger = 0;
1238 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1239 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1240 linestr = PL_parser->linestr;
1241 buf = SvPVX(linestr);
1242 if (!(flags & LEX_KEEP_PREVIOUS) &&
1243 PL_parser->bufptr == PL_parser->bufend) {
1244 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1246 if (PL_parser->last_uni != PL_parser->bufend)
1247 PL_parser->last_uni = NULL;
1248 if (PL_parser->last_lop != PL_parser->bufend)
1249 PL_parser->last_lop = NULL;
1250 last_uni_pos = last_lop_pos = 0;
1254 old_bufend_pos = PL_parser->bufend - buf;
1255 bufptr_pos = PL_parser->bufptr - buf;
1256 oldbufptr_pos = PL_parser->oldbufptr - buf;
1257 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1258 linestart_pos = PL_parser->linestart - buf;
1259 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1260 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1262 if (flags & LEX_FAKE_EOF) {
1264 } else if (!PL_parser->rsfp) {
1266 } else if (filter_gets(linestr, old_bufend_pos)) {
1268 got_some_for_debugger = 1;
1270 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1271 sv_setpvs(linestr, "");
1273 /* End of real input. Close filehandle (unless it was STDIN),
1274 * then add implicit termination.
1276 if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1277 PerlIO_clearerr(PL_parser->rsfp);
1278 else if (PL_parser->rsfp)
1279 (void)PerlIO_close(PL_parser->rsfp);
1280 PL_parser->rsfp = NULL;
1281 PL_parser->in_pod = 0;
1283 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1286 if (!PL_in_eval && PL_minus_p) {
1288 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1289 PL_minus_n = PL_minus_p = 0;
1290 } else if (!PL_in_eval && PL_minus_n) {
1291 sv_catpvs(linestr, /*{*/";}");
1294 sv_catpvs(linestr, ";");
1297 buf = SvPVX(linestr);
1298 new_bufend_pos = SvCUR(linestr);
1299 PL_parser->bufend = buf + new_bufend_pos;
1300 PL_parser->bufptr = buf + bufptr_pos;
1301 PL_parser->oldbufptr = buf + oldbufptr_pos;
1302 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1303 PL_parser->linestart = buf + linestart_pos;
1304 if (PL_parser->last_uni)
1305 PL_parser->last_uni = buf + last_uni_pos;
1306 if (PL_parser->last_lop)
1307 PL_parser->last_lop = buf + last_lop_pos;
1308 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1309 PL_curstash != PL_debstash) {
1310 /* debugger active and we're not compiling the debugger code,
1311 * so store the line into the debugger's array of lines
1313 update_debugger_info(NULL, buf+old_bufend_pos,
1314 new_bufend_pos-old_bufend_pos);
1320 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1322 Looks ahead one (Unicode) character in the text currently being lexed.
1323 Returns the codepoint (unsigned integer value) of the next character,
1324 or -1 if lexing has reached the end of the input text. To consume the
1325 peeked character, use L</lex_read_unichar>.
1327 If the next character is in (or extends into) the next chunk of input
1328 text, the next chunk will be read in. Normally the current chunk will be
1329 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1330 then the current chunk will not be discarded.
1332 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1333 is encountered, an exception is generated.
1339 Perl_lex_peek_unichar(pTHX_ U32 flags)
1343 if (flags & ~(LEX_KEEP_PREVIOUS))
1344 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1345 s = PL_parser->bufptr;
1346 bufend = PL_parser->bufend;
1352 if (!lex_next_chunk(flags))
1354 s = PL_parser->bufptr;
1355 bufend = PL_parser->bufend;
1361 len = PL_utf8skip[head];
1362 while ((STRLEN)(bufend-s) < len) {
1363 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1365 s = PL_parser->bufptr;
1366 bufend = PL_parser->bufend;
1369 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1370 if (retlen == (STRLEN)-1) {
1371 /* malformed UTF-8 */
1373 SAVESPTR(PL_warnhook);
1374 PL_warnhook = PERL_WARNHOOK_FATAL;
1375 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1381 if (!lex_next_chunk(flags))
1383 s = PL_parser->bufptr;
1390 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1392 Reads the next (Unicode) character in the text currently being lexed.
1393 Returns the codepoint (unsigned integer value) of the character read,
1394 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1395 if lexing has reached the end of the input text. To non-destructively
1396 examine the next character, use L</lex_peek_unichar> instead.
1398 If the next character is in (or extends into) the next chunk of input
1399 text, the next chunk will be read in. Normally the current chunk will be
1400 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1401 then the current chunk will not be discarded.
1403 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1404 is encountered, an exception is generated.
1410 Perl_lex_read_unichar(pTHX_ U32 flags)
1413 if (flags & ~(LEX_KEEP_PREVIOUS))
1414 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1415 c = lex_peek_unichar(flags);
1418 CopLINE_inc(PL_curcop);
1420 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1422 ++(PL_parser->bufptr);
1428 =for apidoc Amx|void|lex_read_space|U32 flags
1430 Reads optional spaces, in Perl style, in the text currently being
1431 lexed. The spaces may include ordinary whitespace characters and
1432 Perl-style comments. C<#line> directives are processed if encountered.
1433 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1434 at a non-space character (or the end of the input text).
1436 If spaces extend into the next chunk of input text, the next chunk will
1437 be read in. Normally the current chunk will be discarded at the same
1438 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1439 chunk will not be discarded.
1444 #define LEX_NO_NEXT_CHUNK 0x80000000
1447 Perl_lex_read_space(pTHX_ U32 flags)
1450 bool need_incline = 0;
1451 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1452 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1455 sv_free(PL_skipwhite);
1456 PL_skipwhite = NULL;
1459 PL_skipwhite = newSVpvs("");
1460 #endif /* PERL_MAD */
1461 s = PL_parser->bufptr;
1462 bufend = PL_parser->bufend;
1468 } while (!(c == '\n' || (c == 0 && s == bufend)));
1469 } else if (c == '\n') {
1471 PL_parser->linestart = s;
1476 } else if (isSPACE(c)) {
1478 } else if (c == 0 && s == bufend) {
1482 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1483 #endif /* PERL_MAD */
1484 if (flags & LEX_NO_NEXT_CHUNK)
1486 PL_parser->bufptr = s;
1487 CopLINE_inc(PL_curcop);
1488 got_more = lex_next_chunk(flags);
1489 CopLINE_dec(PL_curcop);
1490 s = PL_parser->bufptr;
1491 bufend = PL_parser->bufend;
1494 if (need_incline && PL_parser->rsfp) {
1504 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1505 #endif /* PERL_MAD */
1506 PL_parser->bufptr = s;
1511 * This subroutine has nothing to do with tilting, whether at windmills
1512 * or pinball tables. Its name is short for "increment line". It
1513 * increments the current line number in CopLINE(PL_curcop) and checks
1514 * to see whether the line starts with a comment of the form
1515 * # line 500 "foo.pm"
1516 * If so, it sets the current line number and file to the values in the comment.
1520 S_incline(pTHX_ const char *s)
1528 PERL_ARGS_ASSERT_INCLINE;
1530 CopLINE_inc(PL_curcop);
1533 while (SPACE_OR_TAB(*s))
1535 if (strnEQ(s, "line", 4))
1539 if (SPACE_OR_TAB(*s))
1543 while (SPACE_OR_TAB(*s))
1551 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1553 while (SPACE_OR_TAB(*s))
1555 if (*s == '"' && (t = strchr(s+1, '"'))) {
1561 while (!isSPACE(*t))
1565 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1567 if (*e != '\n' && *e != '\0')
1568 return; /* false alarm */
1570 line_num = atoi(n)-1;
1573 const STRLEN len = t - s;
1574 SV *const temp_sv = CopFILESV(PL_curcop);
1579 cf = SvPVX(temp_sv);
1580 tmplen = SvCUR(temp_sv);
1586 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
1587 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1588 * to *{"::_<newfilename"} */
1589 /* However, the long form of evals is only turned on by the
1590 debugger - usually they're "(eval %lu)" */
1594 STRLEN tmplen2 = len;
1595 if (tmplen + 2 <= sizeof smallbuf)
1598 Newx(tmpbuf, tmplen + 2, char);
1601 memcpy(tmpbuf + 2, cf, tmplen);
1603 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1608 if (tmplen2 + 2 <= sizeof smallbuf)
1611 Newx(tmpbuf2, tmplen2 + 2, char);
1613 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1614 /* Either they malloc'd it, or we malloc'd it,
1615 so no prefix is present in ours. */
1620 memcpy(tmpbuf2 + 2, s, tmplen2);
1623 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1625 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1626 /* adjust ${"::_<newfilename"} to store the new file name */
1627 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1628 /* The line number may differ. If that is the case,
1629 alias the saved lines that are in the array.
1630 Otherwise alias the whole array. */
1631 if (CopLINE(PL_curcop) == line_num) {
1632 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1633 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1635 else if (GvAV(*gvp)) {
1636 AV * const av = GvAV(*gvp);
1637 const I32 start = CopLINE(PL_curcop)+1;
1638 I32 items = AvFILLp(av) - start;
1640 AV * const av2 = GvAVn(gv2);
1641 SV **svp = AvARRAY(av) + start;
1642 I32 l = (I32)line_num+1;
1644 av_store(av2, l++, SvREFCNT_inc(*svp++));
1649 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1651 if (tmpbuf != smallbuf) Safefree(tmpbuf);
1653 CopFILE_free(PL_curcop);
1654 CopFILE_setn(PL_curcop, s, len);
1656 CopLINE_set(PL_curcop, line_num);
1660 /* skip space before PL_thistoken */
1663 S_skipspace0(pTHX_ register char *s)
1665 PERL_ARGS_ASSERT_SKIPSPACE0;
1672 PL_thiswhite = newSVpvs("");
1673 sv_catsv(PL_thiswhite, PL_skipwhite);
1674 sv_free(PL_skipwhite);
1677 PL_realtokenstart = s - SvPVX(PL_linestr);
1681 /* skip space after PL_thistoken */
1684 S_skipspace1(pTHX_ register char *s)
1686 const char *start = s;
1687 I32 startoff = start - SvPVX(PL_linestr);
1689 PERL_ARGS_ASSERT_SKIPSPACE1;
1694 start = SvPVX(PL_linestr) + startoff;
1695 if (!PL_thistoken && PL_realtokenstart >= 0) {
1696 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1697 PL_thistoken = newSVpvn(tstart, start - tstart);
1699 PL_realtokenstart = -1;
1702 PL_nextwhite = newSVpvs("");
1703 sv_catsv(PL_nextwhite, PL_skipwhite);
1704 sv_free(PL_skipwhite);
1711 S_skipspace2(pTHX_ register char *s, SV **svp)
1714 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1715 const I32 startoff = s - SvPVX(PL_linestr);
1717 PERL_ARGS_ASSERT_SKIPSPACE2;
1720 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1721 if (!PL_madskills || !svp)
1723 start = SvPVX(PL_linestr) + startoff;
1724 if (!PL_thistoken && PL_realtokenstart >= 0) {
1725 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1726 PL_thistoken = newSVpvn(tstart, start - tstart);
1727 PL_realtokenstart = -1;
1731 *svp = newSVpvs("");
1732 sv_setsv(*svp, PL_skipwhite);
1733 sv_free(PL_skipwhite);
1742 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1744 AV *av = CopFILEAVx(PL_curcop);
1746 SV * const sv = newSV_type(SVt_PVMG);
1748 sv_setsv(sv, orig_sv);
1750 sv_setpvn(sv, buf, len);
1753 av_store(av, (I32)CopLINE(PL_curcop), sv);
1759 * Called to gobble the appropriate amount and type of whitespace.
1760 * Skips comments as well.
1764 S_skipspace(pTHX_ register char *s)
1768 #endif /* PERL_MAD */
1769 PERL_ARGS_ASSERT_SKIPSPACE;
1772 sv_free(PL_skipwhite);
1773 PL_skipwhite = NULL;
1775 #endif /* PERL_MAD */
1776 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1777 while (s < PL_bufend && SPACE_OR_TAB(*s))
1780 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1782 lex_read_space(LEX_KEEP_PREVIOUS |
1783 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1784 LEX_NO_NEXT_CHUNK : 0));
1786 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1787 if (PL_linestart > PL_bufptr)
1788 PL_bufptr = PL_linestart;
1793 PL_skipwhite = newSVpvn(start, s-start);
1794 #endif /* PERL_MAD */
1800 * Check the unary operators to ensure there's no ambiguity in how they're
1801 * used. An ambiguous piece of code would be:
1803 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1804 * the +5 is its argument.
1814 if (PL_oldoldbufptr != PL_last_uni)
1816 while (isSPACE(*PL_last_uni))
1819 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1821 if ((t = strchr(s, '(')) && t < PL_bufptr)
1824 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1825 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1826 (int)(s - PL_last_uni), PL_last_uni);
1830 * LOP : macro to build a list operator. Its behaviour has been replaced
1831 * with a subroutine, S_lop() for which LOP is just another name.
1834 #define LOP(f,x) return lop(f,x,s)
1838 * Build a list operator (or something that might be one). The rules:
1839 * - if we have a next token, then it's a list operator [why?]
1840 * - if the next thing is an opening paren, then it's a function
1841 * - else it's a list operator
1845 S_lop(pTHX_ I32 f, int x, char *s)
1849 PERL_ARGS_ASSERT_LOP;
1855 PL_last_lop = PL_oldbufptr;
1856 PL_last_lop_op = (OPCODE)f;
1865 return REPORT(FUNC);
1868 return REPORT(FUNC);
1871 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1872 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1873 return REPORT(LSTOP);
1880 * Sets up for an eventual force_next(). start_force(0) basically does
1881 * an unshift, while start_force(-1) does a push. yylex removes items
1886 S_start_force(pTHX_ int where)
1890 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1891 where = PL_lasttoke;
1892 assert(PL_curforce < 0 || PL_curforce == where);
1893 if (PL_curforce != where) {
1894 for (i = PL_lasttoke; i > where; --i) {
1895 PL_nexttoke[i] = PL_nexttoke[i-1];
1899 if (PL_curforce < 0) /* in case of duplicate start_force() */
1900 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1901 PL_curforce = where;
1904 curmad('^', newSVpvs(""));
1905 CURMAD('_', PL_nextwhite);
1910 S_curmad(pTHX_ char slot, SV *sv)
1916 if (PL_curforce < 0)
1917 where = &PL_thismad;
1919 where = &PL_nexttoke[PL_curforce].next_mad;
1925 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1927 else if (PL_encoding) {
1928 sv_recode_to_utf8(sv, PL_encoding);
1933 /* keep a slot open for the head of the list? */
1934 if (slot != '_' && *where && (*where)->mad_key == '^') {
1935 (*where)->mad_key = slot;
1936 sv_free(MUTABLE_SV(((*where)->mad_val)));
1937 (*where)->mad_val = (void*)sv;
1940 addmad(newMADsv(slot, sv), where, 0);
1943 # define start_force(where) NOOP
1944 # define curmad(slot, sv) NOOP
1949 * When the lexer realizes it knows the next token (for instance,
1950 * it is reordering tokens for the parser) then it can call S_force_next
1951 * to know what token to return the next time the lexer is called. Caller
1952 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1953 * and possibly PL_expect to ensure the lexer handles the token correctly.
1957 S_force_next(pTHX_ I32 type)
1962 PerlIO_printf(Perl_debug_log, "### forced token:\n");
1963 tokereport(type, &NEXTVAL_NEXTTOKE);
1967 if (PL_curforce < 0)
1968 start_force(PL_lasttoke);
1969 PL_nexttoke[PL_curforce].next_type = type;
1970 if (PL_lex_state != LEX_KNOWNEXT)
1971 PL_lex_defer = PL_lex_state;
1972 PL_lex_state = LEX_KNOWNEXT;
1973 PL_lex_expect = PL_expect;
1976 PL_nexttype[PL_nexttoke] = type;
1978 if (PL_lex_state != LEX_KNOWNEXT) {
1979 PL_lex_defer = PL_lex_state;
1980 PL_lex_expect = PL_expect;
1981 PL_lex_state = LEX_KNOWNEXT;
1989 int yyc = PL_parser->yychar;
1990 if (yyc != YYEMPTY) {
1993 NEXTVAL_NEXTTOKE = PL_parser->yylval;
1994 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
1995 PL_lex_allbrackets--;
1997 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1998 } else if (yyc == '('/*)*/) {
1999 PL_lex_allbrackets--;
2004 PL_parser->yychar = YYEMPTY;
2009 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2012 SV * const sv = newSVpvn_utf8(start, len,
2015 && !is_ascii_string((const U8*)start, len)
2016 && is_utf8_string((const U8*)start, len));
2022 * When the lexer knows the next thing is a word (for instance, it has
2023 * just seen -> and it knows that the next char is a word char, then
2024 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2028 * char *start : buffer position (must be within PL_linestr)
2029 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2030 * int check_keyword : if true, Perl checks to make sure the word isn't
2031 * a keyword (do this if the word is a label, e.g. goto FOO)
2032 * int allow_pack : if true, : characters will also be allowed (require,
2033 * use, etc. do this)
2034 * int allow_initial_tick : used by the "sub" lexer only.
2038 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
2044 PERL_ARGS_ASSERT_FORCE_WORD;
2046 start = SKIPSPACE1(start);
2048 if (isIDFIRST_lazy_if(s,UTF) ||
2049 (allow_pack && *s == ':') ||
2050 (allow_initial_tick && *s == '\'') )
2052 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2053 if (check_keyword && keyword(PL_tokenbuf, len, 0))
2055 start_force(PL_curforce);
2057 curmad('X', newSVpvn(start,s-start));
2058 if (token == METHOD) {
2063 PL_expect = XOPERATOR;
2067 curmad('g', newSVpvs( "forced" ));
2068 NEXTVAL_NEXTTOKE.opval
2069 = (OP*)newSVOP(OP_CONST,0,
2070 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2071 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2079 * Called when the lexer wants $foo *foo &foo etc, but the program
2080 * text only contains the "foo" portion. The first argument is a pointer
2081 * to the "foo", and the second argument is the type symbol to prefix.
2082 * Forces the next token to be a "WORD".
2083 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2087 S_force_ident(pTHX_ register const char *s, int kind)
2091 PERL_ARGS_ASSERT_FORCE_IDENT;
2094 const STRLEN len = strlen(s);
2095 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2096 UTF ? SVf_UTF8 : 0));
2097 start_force(PL_curforce);
2098 NEXTVAL_NEXTTOKE.opval = o;
2101 o->op_private = OPpCONST_ENTERED;
2102 /* XXX see note in pp_entereval() for why we forgo typo
2103 warnings if the symbol must be introduced in an eval.
2105 gv_fetchpvn_flags(s, len,
2106 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2107 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2108 kind == '$' ? SVt_PV :
2109 kind == '@' ? SVt_PVAV :
2110 kind == '%' ? SVt_PVHV :
2118 Perl_str_to_version(pTHX_ SV *sv)
2123 const char *start = SvPV_const(sv,len);
2124 const char * const end = start + len;
2125 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2127 PERL_ARGS_ASSERT_STR_TO_VERSION;
2129 while (start < end) {
2133 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2138 retval += ((NV)n)/nshift;
2147 * Forces the next token to be a version number.
2148 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2149 * and if "guessing" is TRUE, then no new token is created (and the caller
2150 * must use an alternative parsing method).
2154 S_force_version(pTHX_ char *s, int guessing)
2160 I32 startoff = s - SvPVX(PL_linestr);
2163 PERL_ARGS_ASSERT_FORCE_VERSION;
2171 while (isDIGIT(*d) || *d == '_' || *d == '.')
2175 start_force(PL_curforce);
2176 curmad('X', newSVpvn(s,d-s));
2179 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2181 #ifdef USE_LOCALE_NUMERIC
2182 char *loc = setlocale(LC_NUMERIC, "C");
2184 s = scan_num(s, &pl_yylval);
2185 #ifdef USE_LOCALE_NUMERIC
2186 setlocale(LC_NUMERIC, loc);
2188 version = pl_yylval.opval;
2189 ver = cSVOPx(version)->op_sv;
2190 if (SvPOK(ver) && !SvNIOK(ver)) {
2191 SvUPGRADE(ver, SVt_PVNV);
2192 SvNV_set(ver, str_to_version(ver));
2193 SvNOK_on(ver); /* hint that it is a version */
2196 else if (guessing) {
2199 sv_free(PL_nextwhite); /* let next token collect whitespace */
2201 s = SvPVX(PL_linestr) + startoff;
2209 if (PL_madskills && !version) {
2210 sv_free(PL_nextwhite); /* let next token collect whitespace */
2212 s = SvPVX(PL_linestr) + startoff;
2215 /* NOTE: The parser sees the package name and the VERSION swapped */
2216 start_force(PL_curforce);
2217 NEXTVAL_NEXTTOKE.opval = version;
2224 * S_force_strict_version
2225 * Forces the next token to be a version number using strict syntax rules.
2229 S_force_strict_version(pTHX_ char *s)
2234 I32 startoff = s - SvPVX(PL_linestr);
2236 const char *errstr = NULL;
2238 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2240 while (isSPACE(*s)) /* leading whitespace */
2243 if (is_STRICT_VERSION(s,&errstr)) {
2245 s = (char *)scan_version(s, ver, 0);
2246 version = newSVOP(OP_CONST, 0, ver);
2248 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2249 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2253 yyerror(errstr); /* version required */
2258 if (PL_madskills && !version) {
2259 sv_free(PL_nextwhite); /* let next token collect whitespace */
2261 s = SvPVX(PL_linestr) + startoff;
2264 /* NOTE: The parser sees the package name and the VERSION swapped */
2265 start_force(PL_curforce);
2266 NEXTVAL_NEXTTOKE.opval = version;
2274 * Tokenize a quoted string passed in as an SV. It finds the next
2275 * chunk, up to end of string or a backslash. It may make a new
2276 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2281 S_tokeq(pTHX_ SV *sv)
2285 register char *send;
2290 PERL_ARGS_ASSERT_TOKEQ;
2295 s = SvPV_force(sv, len);
2296 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2299 /* This is relying on the SV being "well formed" with a trailing '\0' */
2300 while (s < send && !(*s == '\\' && s[1] == '\\'))
2305 if ( PL_hints & HINT_NEW_STRING ) {
2306 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2310 if (s + 1 < send && (s[1] == '\\'))
2311 s++; /* all that, just for this */
2316 SvCUR_set(sv, d - SvPVX_const(sv));
2318 if ( PL_hints & HINT_NEW_STRING )
2319 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2324 * Now come three functions related to double-quote context,
2325 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2326 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2327 * interact with PL_lex_state, and create fake ( ... ) argument lists
2328 * to handle functions and concatenation.
2329 * They assume that whoever calls them will be setting up a fake
2330 * join call, because each subthing puts a ',' after it. This lets
2333 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2335 * (I'm not sure whether the spurious commas at the end of lcfirst's
2336 * arguments and join's arguments are created or not).
2341 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2343 * Pattern matching will set PL_lex_op to the pattern-matching op to
2344 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2346 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2348 * Everything else becomes a FUNC.
2350 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2351 * had an OP_CONST or OP_READLINE). This just sets us up for a
2352 * call to S_sublex_push().
2356 S_sublex_start(pTHX)
2359 register const I32 op_type = pl_yylval.ival;
2361 if (op_type == OP_NULL) {
2362 pl_yylval.opval = PL_lex_op;
2366 if (op_type == OP_CONST || op_type == OP_READLINE) {
2367 SV *sv = tokeq(PL_lex_stuff);
2369 if (SvTYPE(sv) == SVt_PVIV) {
2370 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2372 const char * const p = SvPV_const(sv, len);
2373 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2377 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2378 PL_lex_stuff = NULL;
2379 /* Allow <FH> // "foo" */
2380 if (op_type == OP_READLINE)
2381 PL_expect = XTERMORDORDOR;
2384 else if (op_type == OP_BACKTICK && PL_lex_op) {
2385 /* readpipe() vas overriden */
2386 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2387 pl_yylval.opval = PL_lex_op;
2389 PL_lex_stuff = NULL;
2393 PL_sublex_info.super_state = PL_lex_state;
2394 PL_sublex_info.sub_inwhat = (U16)op_type;
2395 PL_sublex_info.sub_op = PL_lex_op;
2396 PL_lex_state = LEX_INTERPPUSH;
2400 pl_yylval.opval = PL_lex_op;
2410 * Create a new scope to save the lexing state. The scope will be
2411 * ended in S_sublex_done. Returns a '(', starting the function arguments
2412 * to the uc, lc, etc. found before.
2413 * Sets PL_lex_state to LEX_INTERPCONCAT.
2422 PL_lex_state = PL_sublex_info.super_state;
2423 SAVEBOOL(PL_lex_dojoin);
2424 SAVEI32(PL_lex_brackets);
2425 SAVEI32(PL_lex_allbrackets);
2426 SAVEI8(PL_lex_fakeeof);
2427 SAVEI32(PL_lex_casemods);
2428 SAVEI32(PL_lex_starts);
2429 SAVEI8(PL_lex_state);
2430 SAVEVPTR(PL_lex_inpat);
2431 SAVEI16(PL_lex_inwhat);
2432 SAVECOPLINE(PL_curcop);
2433 SAVEPPTR(PL_bufptr);
2434 SAVEPPTR(PL_bufend);
2435 SAVEPPTR(PL_oldbufptr);
2436 SAVEPPTR(PL_oldoldbufptr);
2437 SAVEPPTR(PL_last_lop);
2438 SAVEPPTR(PL_last_uni);
2439 SAVEPPTR(PL_linestart);
2440 SAVESPTR(PL_linestr);
2441 SAVEGENERICPV(PL_lex_brackstack);
2442 SAVEGENERICPV(PL_lex_casestack);
2444 PL_linestr = PL_lex_stuff;
2445 PL_lex_stuff = NULL;
2447 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2448 = SvPVX(PL_linestr);
2449 PL_bufend += SvCUR(PL_linestr);
2450 PL_last_lop = PL_last_uni = NULL;
2451 SAVEFREESV(PL_linestr);
2453 PL_lex_dojoin = FALSE;
2454 PL_lex_brackets = 0;
2455 PL_lex_allbrackets = 0;
2456 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2457 Newx(PL_lex_brackstack, 120, char);
2458 Newx(PL_lex_casestack, 12, char);
2459 PL_lex_casemods = 0;
2460 *PL_lex_casestack = '\0';
2462 PL_lex_state = LEX_INTERPCONCAT;
2463 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2465 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2466 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2467 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2468 PL_lex_inpat = PL_sublex_info.sub_op;
2470 PL_lex_inpat = NULL;
2477 * Restores lexer state after a S_sublex_push.
2484 if (!PL_lex_starts++) {
2485 SV * const sv = newSVpvs("");
2486 if (SvUTF8(PL_linestr))
2488 PL_expect = XOPERATOR;
2489 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2493 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2494 PL_lex_state = LEX_INTERPCASEMOD;
2498 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2499 assert(PL_lex_inwhat != OP_TRANSR);
2500 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2501 PL_linestr = PL_lex_repl;
2503 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2504 PL_bufend += SvCUR(PL_linestr);
2505 PL_last_lop = PL_last_uni = NULL;
2506 SAVEFREESV(PL_linestr);
2507 PL_lex_dojoin = FALSE;
2508 PL_lex_brackets = 0;
2509 PL_lex_allbrackets = 0;
2510 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2511 PL_lex_casemods = 0;
2512 *PL_lex_casestack = '\0';
2514 if (SvEVALED(PL_lex_repl)) {
2515 PL_lex_state = LEX_INTERPNORMAL;
2517 /* we don't clear PL_lex_repl here, so that we can check later
2518 whether this is an evalled subst; that means we rely on the
2519 logic to ensure sublex_done() is called again only via the
2520 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2523 PL_lex_state = LEX_INTERPCONCAT;
2533 PL_endwhite = newSVpvs("");
2534 sv_catsv(PL_endwhite, PL_thiswhite);
2538 sv_setpvs(PL_thistoken,"");
2540 PL_realtokenstart = -1;
2544 PL_bufend = SvPVX(PL_linestr);
2545 PL_bufend += SvCUR(PL_linestr);
2546 PL_expect = XOPERATOR;
2547 PL_sublex_info.sub_inwhat = 0;
2555 Extracts a pattern, double-quoted string, or transliteration. This
2558 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2559 processing a pattern (PL_lex_inpat is true), a transliteration
2560 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2562 Returns a pointer to the character scanned up to. If this is
2563 advanced from the start pointer supplied (i.e. if anything was
2564 successfully parsed), will leave an OP for the substring scanned
2565 in pl_yylval. Caller must intuit reason for not parsing further
2566 by looking at the next characters herself.
2570 constants: \N{NAME} only
2571 case and quoting: \U \Q \E
2572 stops on @ and $, but not for $ as tail anchor
2574 In transliterations:
2575 characters are VERY literal, except for - not at the start or end
2576 of the string, which indicates a range. If the range is in bytes,
2577 scan_const expands the range to the full set of intermediate
2578 characters. If the range is in utf8, the hyphen is replaced with
2579 a certain range mark which will be handled by pmtrans() in op.c.
2581 In double-quoted strings:
2583 double-quoted style: \r and \n
2584 constants: \x31, etc.
2585 deprecated backrefs: \1 (in substitution replacements)
2586 case and quoting: \U \Q \E
2589 scan_const does *not* construct ops to handle interpolated strings.
2590 It stops processing as soon as it finds an embedded $ or @ variable
2591 and leaves it to the caller to work out what's going on.
2593 embedded arrays (whether in pattern or not) could be:
2594 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2596 $ in double-quoted strings must be the symbol of an embedded scalar.
2598 $ in pattern could be $foo or could be tail anchor. Assumption:
2599 it's a tail anchor if $ is the last thing in the string, or if it's
2600 followed by one of "()| \r\n\t"
2602 \1 (backreferences) are turned into $1
2604 The structure of the code is
2605 while (there's a character to process) {
2606 handle transliteration ranges
2607 skip regexp comments /(?#comment)/ and codes /(?{code})/
2608 skip #-initiated comments in //x patterns
2609 check for embedded arrays
2610 check for embedded scalars
2612 deprecate \1 in substitution replacements
2613 handle string-changing backslashes \l \U \Q \E, etc.
2614 switch (what was escaped) {
2615 handle \- in a transliteration (becomes a literal -)
2616 if a pattern and not \N{, go treat as regular character
2617 handle \132 (octal characters)
2618 handle \x15 and \x{1234} (hex characters)
2619 handle \N{name} (named characters, also \N{3,5} in a pattern)
2620 handle \cV (control characters)
2621 handle printf-style backslashes (\f, \r, \n, etc)
2624 } (end if backslash)
2625 handle regular character
2626 } (end while character to read)
2631 S_scan_const(pTHX_ char *start)
2634 register char *send = PL_bufend; /* end of the constant */
2635 SV *sv = newSV(send - start); /* sv for the constant. See
2636 note below on sizing. */
2637 register char *s = start; /* start of the constant */
2638 register char *d = SvPVX(sv); /* destination for copies */
2639 bool dorange = FALSE; /* are we in a translit range? */
2640 bool didrange = FALSE; /* did we just finish a range? */
2641 bool has_utf8 = FALSE; /* Output constant is UTF8 */
2642 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
2643 to be UTF8? But, this can
2644 show as true when the source
2645 isn't utf8, as for example
2646 when it is entirely composed
2649 /* Note on sizing: The scanned constant is placed into sv, which is
2650 * initialized by newSV() assuming one byte of output for every byte of
2651 * input. This routine expects newSV() to allocate an extra byte for a
2652 * trailing NUL, which this routine will append if it gets to the end of
2653 * the input. There may be more bytes of input than output (eg., \N{LATIN
2654 * CAPITAL LETTER A}), or more output than input if the constant ends up
2655 * recoded to utf8, but each time a construct is found that might increase
2656 * the needed size, SvGROW() is called. Its size parameter each time is
2657 * based on the best guess estimate at the time, namely the length used so
2658 * far, plus the length the current construct will occupy, plus room for
2659 * the trailing NUL, plus one byte for every input byte still unscanned */
2663 UV literal_endpoint = 0;
2664 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2667 PERL_ARGS_ASSERT_SCAN_CONST;
2669 assert(PL_lex_inwhat != OP_TRANSR);
2670 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2671 /* If we are doing a trans and we know we want UTF8 set expectation */
2672 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2673 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2677 while (s < send || dorange) {
2679 /* get transliterations out of the way (they're most literal) */
2680 if (PL_lex_inwhat == OP_TRANS) {
2681 /* expand a range A-Z to the full set of characters. AIE! */
2683 I32 i; /* current expanded character */
2684 I32 min; /* first character in range */
2685 I32 max; /* last character in range */
2696 char * const c = (char*)utf8_hop((U8*)d, -1);
2700 *c = (char)UTF_TO_NATIVE(0xff);
2701 /* mark the range as done, and continue */
2707 i = d - SvPVX_const(sv); /* remember current offset */
2710 SvLEN(sv) + (has_utf8 ?
2711 (512 - UTF_CONTINUATION_MARK +
2714 /* How many two-byte within 0..255: 128 in UTF-8,
2715 * 96 in UTF-8-mod. */
2717 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
2719 d = SvPVX(sv) + i; /* refresh d after realloc */
2723 for (j = 0; j <= 1; j++) {
2724 char * const c = (char*)utf8_hop((U8*)d, -1);
2725 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2731 max = (U8)0xff; /* only to \xff */
2732 uvmax = uv; /* \x{100} to uvmax */
2734 d = c; /* eat endpoint chars */
2739 d -= 2; /* eat the first char and the - */
2740 min = (U8)*d; /* first char in range */
2741 max = (U8)d[1]; /* last char in range */
2748 "Invalid range \"%c-%c\" in transliteration operator",
2749 (char)min, (char)max);
2753 if (literal_endpoint == 2 &&
2754 ((isLOWER(min) && isLOWER(max)) ||
2755 (isUPPER(min) && isUPPER(max)))) {
2757 for (i = min; i <= max; i++)
2759 *d++ = NATIVE_TO_NEED(has_utf8,i);
2761 for (i = min; i <= max; i++)
2763 *d++ = NATIVE_TO_NEED(has_utf8,i);
2768 for (i = min; i <= max; i++)
2771 const U8 ch = (U8)NATIVE_TO_UTF(i);
2772 if (UNI_IS_INVARIANT(ch))
2775 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2776 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2785 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2787 *d++ = (char)UTF_TO_NATIVE(0xff);
2789 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2793 /* mark the range as done, and continue */
2797 literal_endpoint = 0;
2802 /* range begins (ignore - as first or last char) */
2803 else if (*s == '-' && s+1 < send && s != start) {
2805 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2812 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
2822 literal_endpoint = 0;
2823 native_range = TRUE;
2828 /* if we get here, we're not doing a transliteration */
2830 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2831 except for the last char, which will be done separately. */
2832 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2834 while (s+1 < send && *s != ')')
2835 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2837 else if (s[2] == '{' /* This should match regcomp.c */
2838 || (s[2] == '?' && s[3] == '{'))
2841 char *regparse = s + (s[2] == '{' ? 3 : 4);
2844 while (count && (c = *regparse)) {
2845 if (c == '\\' && regparse[1])
2853 if (*regparse != ')')
2854 regparse--; /* Leave one char for continuation. */
2855 while (s < regparse)
2856 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2860 /* likewise skip #-initiated comments in //x patterns */
2861 else if (*s == '#' && PL_lex_inpat &&
2862 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
2863 while (s+1 < send && *s != '\n')
2864 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2867 /* check for embedded arrays
2868 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2870 else if (*s == '@' && s[1]) {
2871 if (isALNUM_lazy_if(s+1,UTF))
2873 if (strchr(":'{$", s[1]))
2875 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2876 break; /* in regexp, neither @+ nor @- are interpolated */
2879 /* check for embedded scalars. only stop if we're sure it's a
2882 else if (*s == '$') {
2883 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
2885 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2887 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2888 "Possible unintended interpolation of $\\ in regex");
2890 break; /* in regexp, $ might be tail anchor */
2894 /* End of else if chain - OP_TRANS rejoin rest */
2897 if (*s == '\\' && s+1 < send) {
2898 char* e; /* Can be used for ending '}', etc. */
2902 /* warn on \1 - \9 in substitution replacements, but note that \11
2903 * is an octal; and \19 is \1 followed by '9' */
2904 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2905 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2907 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2912 /* string-change backslash escapes */
2913 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2917 /* In a pattern, process \N, but skip any other backslash escapes.
2918 * This is because we don't want to translate an escape sequence
2919 * into a meta symbol and have the regex compiler use the meta
2920 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
2921 * in spite of this, we do have to process \N here while the proper
2922 * charnames handler is in scope. See bugs #56444 and #62056.
2923 * There is a complication because \N in a pattern may also stand
2924 * for 'match a non-nl', and not mean a charname, in which case its
2925 * processing should be deferred to the regex compiler. To be a
2926 * charname it must be followed immediately by a '{', and not look
2927 * like \N followed by a curly quantifier, i.e., not something like
2928 * \N{3,}. regcurly returns a boolean indicating if it is a legal
2930 else if (PL_lex_inpat
2933 || regcurly(s + 1)))
2935 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2936 goto default_action;
2941 /* quoted - in transliterations */
2943 if (PL_lex_inwhat == OP_TRANS) {
2950 if ((isALPHA(*s) || isDIGIT(*s)))
2951 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2952 "Unrecognized escape \\%c passed through",
2954 /* default action is to copy the quoted character */
2955 goto default_action;
2958 /* eg. \132 indicates the octal constant 0132 */
2959 case '0': case '1': case '2': case '3':
2960 case '4': case '5': case '6': case '7':
2964 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2967 goto NUM_ESCAPE_INSERT;
2969 /* eg. \o{24} indicates the octal constant \024 */
2975 bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
2981 goto NUM_ESCAPE_INSERT;
2984 /* eg. \x24 indicates the hex constant 0x24 */
2988 char* const e = strchr(s, '}');
2989 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2990 PERL_SCAN_DISALLOW_PREFIX;
2995 yyerror("Missing right brace on \\x{}");
2999 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
3005 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
3006 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
3012 /* Insert oct or hex escaped character. There will always be
3013 * enough room in sv since such escapes will be longer than any
3014 * UTF-8 sequence they can end up as, except if they force us
3015 * to recode the rest of the string into utf8 */
3017 /* Here uv is the ordinal of the next character being added in
3018 * unicode (converted from native). */
3019 if (!UNI_IS_INVARIANT(uv)) {
3020 if (!has_utf8 && uv > 255) {
3021 /* Might need to recode whatever we have accumulated so
3022 * far if it contains any chars variant in utf8 or
3025 SvCUR_set(sv, d - SvPVX_const(sv));
3028 /* See Note on sizing above. */
3029 sv_utf8_upgrade_flags_grow(sv,
3030 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3031 UNISKIP(uv) + (STRLEN)(send - s) + 1);
3032 d = SvPVX(sv) + SvCUR(sv);
3037 d = (char*)uvuni_to_utf8((U8*)d, uv);
3038 if (PL_lex_inwhat == OP_TRANS &&
3039 PL_sublex_info.sub_op) {
3040 PL_sublex_info.sub_op->op_private |=
3041 (PL_lex_repl ? OPpTRANS_FROM_UTF
3045 if (uv > 255 && !dorange)
3046 native_range = FALSE;
3059 /* In a non-pattern \N must be a named character, like \N{LATIN
3060 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3061 * mean to match a non-newline. For non-patterns, named
3062 * characters are converted to their string equivalents. In
3063 * patterns, named characters are not converted to their
3064 * ultimate forms for the same reasons that other escapes
3065 * aren't. Instead, they are converted to the \N{U+...} form
3066 * to get the value from the charnames that is in effect right
3067 * now, while preserving the fact that it was a named character
3068 * so that the regex compiler knows this */
3070 /* This section of code doesn't generally use the
3071 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
3072 * a close examination of this macro and determined it is a
3073 * no-op except on utfebcdic variant characters. Every
3074 * character generated by this that would normally need to be
3075 * enclosed by this macro is invariant, so the macro is not
3076 * needed, and would complicate use of copy(). XXX There are
3077 * other parts of this file where the macro is used
3078 * inconsistently, but are saved by it being a no-op */
3080 /* The structure of this section of code (besides checking for
3081 * errors and upgrading to utf8) is:
3082 * Further disambiguate between the two meanings of \N, and if
3083 * not a charname, go process it elsewhere
3084 * If of form \N{U+...}, pass it through if a pattern;
3085 * otherwise convert to utf8
3086 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3087 * pattern; otherwise convert to utf8 */
3089 /* Here, s points to the 'N'; the test below is guaranteed to
3090 * succeed if we are being called on a pattern as we already
3091 * know from a test above that the next character is a '{'.
3092 * On a non-pattern \N must mean 'named sequence, which
3093 * requires braces */
3096 yyerror("Missing braces on \\N{}");
3101 /* If there is no matching '}', it is an error. */
3102 if (! (e = strchr(s, '}'))) {
3103 if (! PL_lex_inpat) {
3104 yyerror("Missing right brace on \\N{}");
3106 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3111 /* Here it looks like a named character */
3115 /* XXX This block is temporary code. \N{} implies that the
3116 * pattern is to have Unicode semantics, and therefore
3117 * currently has to be encoded in utf8. By putting it in
3118 * utf8 now, we save a whole pass in the regular expression
3119 * compiler. Once that code is changed so Unicode
3120 * semantics doesn't necessarily have to be in utf8, this
3121 * block should be removed. However, the code that parses
3122 * the output of this would have to be changed to not
3123 * necessarily expect utf8 */
3125 SvCUR_set(sv, d - SvPVX_const(sv));
3128 /* See Note on sizing above. */
3129 sv_utf8_upgrade_flags_grow(sv,
3130 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3131 /* 5 = '\N{' + cur char + NUL */
3132 (STRLEN)(send - s) + 5);
3133 d = SvPVX(sv) + SvCUR(sv);
3138 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3139 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3140 | PERL_SCAN_DISALLOW_PREFIX;
3143 /* For \N{U+...}, the '...' is a unicode value even on
3144 * EBCDIC machines */
3145 s += 2; /* Skip to next char after the 'U+' */
3147 uv = grok_hex(s, &len, &flags, NULL);
3148 if (len == 0 || len != (STRLEN)(e - s)) {
3149 yyerror("Invalid hexadecimal number in \\N{U+...}");
3156 /* On non-EBCDIC platforms, pass through to the regex
3157 * compiler unchanged. The reason we evaluated the
3158 * number above is to make sure there wasn't a syntax
3159 * error. But on EBCDIC we convert to native so
3160 * downstream code can continue to assume it's native
3162 s -= 5; /* Include the '\N{U+' */
3164 d += my_snprintf(d, e - s + 1 + 1, /* includes the }
3167 (unsigned int) UNI_TO_NATIVE(uv));
3169 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3173 else { /* Not a pattern: convert the hex to string */
3175 /* If destination is not in utf8, unconditionally
3176 * recode it to be so. This is because \N{} implies
3177 * Unicode semantics, and scalars have to be in utf8
3178 * to guarantee those semantics */
3180 SvCUR_set(sv, d - SvPVX_const(sv));
3183 /* See Note on sizing above. */
3184 sv_utf8_upgrade_flags_grow(
3186 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3187 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3188 d = SvPVX(sv) + SvCUR(sv);
3192 /* Add the string to the output */
3193 if (UNI_IS_INVARIANT(uv)) {
3196 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3199 else { /* Here is \N{NAME} but not \N{U+...}. */
3201 SV *res; /* result from charnames */
3202 const char *str; /* the string in 'res' */
3203 STRLEN len; /* its length */
3205 /* Get the value for NAME */
3206 res = newSVpvn(s, e - s);
3207 res = new_constant( NULL, 0, "charnames",
3208 /* includes all of: \N{...} */
3209 res, NULL, s - 3, e - s + 4 );
3211 /* Most likely res will be in utf8 already since the
3212 * standard charnames uses pack U, but a custom translator
3213 * can leave it otherwise, so make sure. XXX This can be
3214 * revisited to not have charnames use utf8 for characters
3215 * that don't need it when regexes don't have to be in utf8
3216 * for Unicode semantics. If doing so, remember EBCDIC */
3217 sv_utf8_upgrade(res);
3218 str = SvPV_const(res, len);
3220 /* Don't accept malformed input */
3221 if (! is_utf8_string((U8 *) str, len)) {
3222 yyerror("Malformed UTF-8 returned by \\N");
3224 else if (PL_lex_inpat) {
3226 if (! len) { /* The name resolved to an empty string */
3227 Copy("\\N{}", d, 4, char);
3231 /* In order to not lose information for the regex
3232 * compiler, pass the result in the specially made
3233 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3234 * the code points in hex of each character
3235 * returned by charnames */
3237 const char *str_end = str + len;
3238 STRLEN char_length; /* cur char's byte length */
3239 STRLEN output_length; /* and the number of bytes
3240 after this is translated
3242 const STRLEN off = d - SvPVX_const(sv);
3244 /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3245 * max('U+', '.'); and 1 for NUL */
3246 char hex_string[2 * UTF8_MAXBYTES + 5];
3248 /* Get the first character of the result. */
3249 U32 uv = utf8n_to_uvuni((U8 *) str,
3254 /* The call to is_utf8_string() above hopefully
3255 * guarantees that there won't be an error. But
3256 * it's easy here to make sure. The function just
3257 * above warns and returns 0 if invalid utf8, but
3258 * it can also return 0 if the input is validly a
3259 * NUL. Disambiguate */
3260 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3261 uv = UNICODE_REPLACEMENT;
3264 /* Convert first code point to hex, including the
3265 * boiler plate before it. For all these, we
3266 * convert to native format so that downstream code
3267 * can continue to assume the input is native */
3269 my_snprintf(hex_string, sizeof(hex_string),
3271 (unsigned int) UNI_TO_NATIVE(uv));
3273 /* Make sure there is enough space to hold it */
3274 d = off + SvGROW(sv, off
3276 + (STRLEN)(send - e)
3277 + 2); /* '}' + NUL */
3279 Copy(hex_string, d, output_length, char);
3282 /* For each subsequent character, append dot and
3283 * its ordinal in hex */
3284 while ((str += char_length) < str_end) {
3285 const STRLEN off = d - SvPVX_const(sv);
3286 U32 uv = utf8n_to_uvuni((U8 *) str,
3290 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3291 uv = UNICODE_REPLACEMENT;
3295 my_snprintf(hex_string, sizeof(hex_string),
3297 (unsigned int) UNI_TO_NATIVE(uv));
3299 d = off + SvGROW(sv, off
3301 + (STRLEN)(send - e)
3302 + 2); /* '}' + NUL */
3303 Copy(hex_string, d, output_length, char);
3307 *d++ = '}'; /* Done. Add the trailing brace */
3310 else { /* Here, not in a pattern. Convert the name to a
3313 /* If destination is not in utf8, unconditionally
3314 * recode it to be so. This is because \N{} implies
3315 * Unicode semantics, and scalars have to be in utf8
3316 * to guarantee those semantics */
3318 SvCUR_set(sv, d - SvPVX_const(sv));
3321 /* See Note on sizing above. */
3322 sv_utf8_upgrade_flags_grow(sv,
3323 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3324 len + (STRLEN)(send - s) + 1);
3325 d = SvPVX(sv) + SvCUR(sv);
3327 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3329 /* See Note on sizing above. (NOTE: SvCUR() is not
3330 * set correctly here). */
3331 const STRLEN off = d - SvPVX_const(sv);
3332 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3334 Copy(str, d, len, char);
3339 /* Deprecate non-approved name syntax */
3340 if (ckWARN_d(WARN_DEPRECATED)) {
3341 bool problematic = FALSE;
3344 /* For non-ut8 input, look to see that the first
3345 * character is an alpha, then loop through the rest
3346 * checking that each is a continuation */
3348 if (! isALPHAU(*i)) problematic = TRUE;
3349 else for (i = s + 1; i < e; i++) {
3350 if (isCHARNAME_CONT(*i)) continue;
3356 /* Similarly for utf8. For invariants can check
3357 * directly. We accept anything above the latin1
3358 * range because it is immaterial to Perl if it is
3359 * correct or not, and is expensive to check. But
3360 * it is fairly easy in the latin1 range to convert
3361 * the variants into a single character and check
3363 if (UTF8_IS_INVARIANT(*i)) {
3364 if (! isALPHAU(*i)) problematic = TRUE;
3365 } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3366 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
3372 if (! problematic) for (i = s + UTF8SKIP(s);
3376 if (UTF8_IS_INVARIANT(*i)) {
3377 if (isCHARNAME_CONT(*i)) continue;
3378 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3380 } else if (isCHARNAME_CONT(
3382 TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
3391 /* The e-i passed to the final %.*s makes sure that
3392 * should the trailing NUL be missing that this
3393 * print won't run off the end of the string */
3394 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
3395 "Deprecated character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s",
3396 (int)(i - s + 1), s, (int)(e - i), i + 1);
3399 } /* End \N{NAME} */
3402 native_range = FALSE; /* \N{} is defined to be Unicode */
3404 s = e + 1; /* Point to just after the '}' */
3407 /* \c is a control character */
3411 *d++ = grok_bslash_c(*s++, has_utf8, 1);
3414 yyerror("Missing control char name in \\c");
3418 /* printf-style backslashes, formfeeds, newlines, etc */
3420 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3423 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3426 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3429 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3432 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3435 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3438 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3444 } /* end if (backslash) */
3451 /* If we started with encoded form, or already know we want it,
3452 then encode the next character */
3453 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3457 /* One might think that it is wasted effort in the case of the
3458 * source being utf8 (this_utf8 == TRUE) to take the next character
3459 * in the source, convert it to an unsigned value, and then convert
3460 * it back again. But the source has not been validated here. The
3461 * routine that does the conversion checks for errors like
3464 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3465 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3467 SvCUR_set(sv, d - SvPVX_const(sv));
3470 /* See Note on sizing above. */
3471 sv_utf8_upgrade_flags_grow(sv,
3472 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3473 need + (STRLEN)(send - s) + 1);
3474 d = SvPVX(sv) + SvCUR(sv);
3476 } else if (need > len) {
3477 /* encoded value larger than old, may need extra space (NOTE:
3478 * SvCUR() is not set correctly here). See Note on sizing
3480 const STRLEN off = d - SvPVX_const(sv);
3481 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3485 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3487 if (uv > 255 && !dorange)
3488 native_range = FALSE;
3492 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3494 } /* while loop to process each character */
3496 /* terminate the string and set up the sv */
3498 SvCUR_set(sv, d - SvPVX_const(sv));
3499 if (SvCUR(sv) >= SvLEN(sv))
3500 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
3503 if (PL_encoding && !has_utf8) {
3504 sv_recode_to_utf8(sv, PL_encoding);
3510 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3511 PL_sublex_info.sub_op->op_private |=
3512 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3516 /* shrink the sv if we allocated more than we used */
3517 if (SvCUR(sv) + 5 < SvLEN(sv)) {
3518 SvPV_shrink_to_cur(sv);
3521 /* return the substring (via pl_yylval) only if we parsed anything */
3522 if (s > PL_bufptr) {
3523 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3524 const char *const key = PL_lex_inpat ? "qr" : "q";
3525 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3529 if (PL_lex_inwhat == OP_TRANS) {
3532 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3540 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3543 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3550 * Returns TRUE if there's more to the expression (e.g., a subscript),
3553 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3555 * ->[ and ->{ return TRUE
3556 * { and [ outside a pattern are always subscripts, so return TRUE
3557 * if we're outside a pattern and it's not { or [, then return FALSE
3558 * if we're in a pattern and the first char is a {
3559 * {4,5} (any digits around the comma) returns FALSE
3560 * if we're in a pattern and the first char is a [
3562 * [SOMETHING] has a funky algorithm to decide whether it's a
3563 * character class or not. It has to deal with things like
3564 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3565 * anything else returns TRUE
3568 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3571 S_intuit_more(pTHX_ register char *s)
3575 PERL_ARGS_ASSERT_INTUIT_MORE;
3577 if (PL_lex_brackets)
3579 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3581 if (*s != '{' && *s != '[')
3586 /* In a pattern, so maybe we have {n,m}. */
3594 /* On the other hand, maybe we have a character class */
3597 if (*s == ']' || *s == '^')
3600 /* this is terrifying, and it works */
3601 int weight = 2; /* let's weigh the evidence */
3603 unsigned char un_char = 255, last_un_char;
3604 const char * const send = strchr(s,']');
3605 char tmpbuf[sizeof PL_tokenbuf * 4];
3607 if (!send) /* has to be an expression */
3610 Zero(seen,256,char);
3613 else if (isDIGIT(*s)) {
3615 if (isDIGIT(s[1]) && s[2] == ']')
3621 for (; s < send; s++) {
3622 last_un_char = un_char;
3623 un_char = (unsigned char)*s;
3628 weight -= seen[un_char] * 10;
3629 if (isALNUM_lazy_if(s+1,UTF)) {
3631 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3632 len = (int)strlen(tmpbuf);
3633 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3634 UTF ? SVf_UTF8 : 0, SVt_PV))
3639 else if (*s == '$' && s[1] &&
3640 strchr("[#!%*<>()-=",s[1])) {
3641 if (/*{*/ strchr("])} =",s[2]))
3650 if (strchr("wds]",s[1]))
3652 else if (seen[(U8)'\''] || seen[(U8)'"'])
3654 else if (strchr("rnftbxcav",s[1]))
3656 else if (isDIGIT(s[1])) {
3658 while (s[1] && isDIGIT(s[1]))
3668 if (strchr("aA01! ",last_un_char))
3670 if (strchr("zZ79~",s[1]))
3672 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3673 weight -= 5; /* cope with negative subscript */
3676 if (!isALNUM(last_un_char)
3677 && !(last_un_char == '$' || last_un_char == '@'
3678 || last_un_char == '&')
3679 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3684 if (keyword(tmpbuf, d - tmpbuf, 0))
3687 if (un_char == last_un_char + 1)
3689 weight -= seen[un_char];
3694 if (weight >= 0) /* probably a character class */
3704 * Does all the checking to disambiguate
3706 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3707 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3709 * First argument is the stuff after the first token, e.g. "bar".
3711 * Not a method if bar is a filehandle.
3712 * Not a method if foo is a subroutine prototyped to take a filehandle.
3713 * Not a method if it's really "Foo $bar"
3714 * Method if it's "foo $bar"
3715 * Not a method if it's really "print foo $bar"
3716 * Method if it's really "foo package::" (interpreted as package->foo)
3717 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3718 * Not a method if bar is a filehandle or package, but is quoted with
3723 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3726 char *s = start + (*start == '$');
3727 char tmpbuf[sizeof PL_tokenbuf];
3734 PERL_ARGS_ASSERT_INTUIT_METHOD;
3737 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3741 const char *proto = CvPROTO(cv);
3752 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3753 /* start is the beginning of the possible filehandle/object,
3754 * and s is the end of it
3755 * tmpbuf is a copy of it
3758 if (*start == '$') {
3759 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3760 isUPPER(*PL_tokenbuf))
3763 len = start - SvPVX(PL_linestr);
3767 start = SvPVX(PL_linestr) + len;
3771 return *s == '(' ? FUNCMETH : METHOD;
3773 if (!keyword(tmpbuf, len, 0)) {
3774 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3778 soff = s - SvPVX(PL_linestr);
3782 indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
3783 if (indirgv && GvCVu(indirgv))
3785 /* filehandle or package name makes it a method */
3786 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
3788 soff = s - SvPVX(PL_linestr);
3791 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3792 return 0; /* no assumptions -- "=>" quotes bareword */
3794 start_force(PL_curforce);
3795 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3796 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3797 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3799 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
3800 ( UTF ? SVf_UTF8 : 0 )));
3805 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3807 return *s == '(' ? FUNCMETH : METHOD;
3813 /* Encoded script support. filter_add() effectively inserts a
3814 * 'pre-processing' function into the current source input stream.
3815 * Note that the filter function only applies to the current source file
3816 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3818 * The datasv parameter (which may be NULL) can be used to pass
3819 * private data to this instance of the filter. The filter function
3820 * can recover the SV using the FILTER_DATA macro and use it to
3821 * store private buffers and state information.
3823 * The supplied datasv parameter is upgraded to a PVIO type
3824 * and the IoDIRP/IoANY field is used to store the function pointer,
3825 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3826 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3827 * private use must be set using malloc'd pointers.
3831 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3840 if (!PL_rsfp_filters)
3841 PL_rsfp_filters = newAV();
3844 SvUPGRADE(datasv, SVt_PVIO);
3845 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3846 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3847 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3848 FPTR2DPTR(void *, IoANY(datasv)),
3849 SvPV_nolen(datasv)));
3850 av_unshift(PL_rsfp_filters, 1);
3851 av_store(PL_rsfp_filters, 0, datasv) ;
3856 /* Delete most recently added instance of this filter function. */
3858 Perl_filter_del(pTHX_ filter_t funcp)
3863 PERL_ARGS_ASSERT_FILTER_DEL;
3866 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3867 FPTR2DPTR(void*, funcp)));
3869 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3871 /* if filter is on top of stack (usual case) just pop it off */
3872 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3873 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3874 sv_free(av_pop(PL_rsfp_filters));
3878 /* we need to search for the correct entry and clear it */
3879 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3883 /* Invoke the idxth filter function for the current rsfp. */
3884 /* maxlen 0 = read one text line */
3886 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3891 /* This API is bad. It should have been using unsigned int for maxlen.
3892 Not sure if we want to change the API, but if not we should sanity
3893 check the value here. */
3894 const unsigned int correct_length
3903 PERL_ARGS_ASSERT_FILTER_READ;
3905 if (!PL_parser || !PL_rsfp_filters)
3907 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
3908 /* Provide a default input filter to make life easy. */
3909 /* Note that we append to the line. This is handy. */
3910 DEBUG_P(PerlIO_printf(Perl_debug_log,
3911 "filter_read %d: from rsfp\n", idx));
3912 if (correct_length) {
3915 const int old_len = SvCUR(buf_sv);
3917 /* ensure buf_sv is large enough */
3918 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
3919 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3920 correct_length)) <= 0) {
3921 if (PerlIO_error(PL_rsfp))
3922 return -1; /* error */
3924 return 0 ; /* end of file */
3926 SvCUR_set(buf_sv, old_len + len) ;
3927 SvPVX(buf_sv)[old_len + len] = '\0';
3930 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3931 if (PerlIO_error(PL_rsfp))
3932 return -1; /* error */
3934 return 0 ; /* end of file */
3937 return SvCUR(buf_sv);
3939 /* Skip this filter slot if filter has been deleted */
3940 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
3941 DEBUG_P(PerlIO_printf(Perl_debug_log,
3942 "filter_read %d: skipped (filter deleted)\n",
3944 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
3946 /* Get function pointer hidden within datasv */
3947 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
3948 DEBUG_P(PerlIO_printf(Perl_debug_log,
3949 "filter_read %d: via function %p (%s)\n",
3950 idx, (void*)datasv, SvPV_nolen_const(datasv)));
3951 /* Call function. The function is expected to */
3952 /* call "FILTER_READ(idx+1, buf_sv)" first. */
3953 /* Return: <0:error, =0:eof, >0:not eof */
3954 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
3958 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
3962 PERL_ARGS_ASSERT_FILTER_GETS;
3964 #ifdef PERL_CR_FILTER
3965 if (!PL_rsfp_filters) {
3966 filter_add(S_cr_textfilter,NULL);
3969 if (PL_rsfp_filters) {
3971 SvCUR_set(sv, 0); /* start with empty line */
3972 if (FILTER_READ(0, sv, 0) > 0)
3973 return ( SvPVX(sv) ) ;
3978 return (sv_gets(sv, PL_rsfp, append));
3982 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
3987 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3989 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
3993 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
3994 (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
3996 return GvHV(gv); /* Foo:: */
3999 /* use constant CLASS => 'MyClass' */
4000 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4001 if (gv && GvCV(gv)) {
4002 SV * const sv = cv_const_sv(GvCV(gv));
4004 pkgname = SvPV_const(sv, len);
4007 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4011 * S_readpipe_override
4012 * Check whether readpipe() is overridden, and generates the appropriate
4013 * optree, provided sublex_start() is called afterwards.
4016 S_readpipe_override(pTHX)
4019 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
4020 pl_yylval.ival = OP_BACKTICK;
4022 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4024 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
4025 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
4026 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4028 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
4029 op_append_elem(OP_LIST,
4030 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4031 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4038 * The intent of this yylex wrapper is to minimize the changes to the
4039 * tokener when we aren't interested in collecting madprops. It remains
4040 * to be seen how successful this strategy will be...
4047 char *s = PL_bufptr;
4049 /* make sure PL_thiswhite is initialized */
4053 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
4054 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4055 return S_pending_ident(aTHX);
4057 /* previous token ate up our whitespace? */
4058 if (!PL_lasttoke && PL_nextwhite) {
4059 PL_thiswhite = PL_nextwhite;
4063 /* isolate the token, and figure out where it is without whitespace */
4064 PL_realtokenstart = -1;
4068 assert(PL_curforce < 0);
4070 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
4071 if (!PL_thistoken) {
4072 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4073 PL_thistoken = newSVpvs("");
4075 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4076 PL_thistoken = newSVpvn(tstart, s - tstart);
4079 if (PL_thismad) /* install head */
4080 CURMAD('X', PL_thistoken);
4083 /* last whitespace of a sublex? */
4084 if (optype == ')' && PL_endwhite) {
4085 CURMAD('X', PL_endwhite);
4090 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
4091 if (!PL_thiswhite && !PL_endwhite && !optype) {
4092 sv_free(PL_thistoken);
4097 /* put off final whitespace till peg */
4098 if (optype == ';' && !PL_rsfp) {
4099 PL_nextwhite = PL_thiswhite;
4102 else if (PL_thisopen) {
4103 CURMAD('q', PL_thisopen);
4105 sv_free(PL_thistoken);
4109 /* Store actual token text as madprop X */
4110 CURMAD('X', PL_thistoken);
4114 /* add preceding whitespace as madprop _ */
4115 CURMAD('_', PL_thiswhite);
4119 /* add quoted material as madprop = */
4120 CURMAD('=', PL_thisstuff);
4124 /* add terminating quote as madprop Q */
4125 CURMAD('Q', PL_thisclose);
4129 /* special processing based on optype */
4133 /* opval doesn't need a TOKEN since it can already store mp */
4143 if (pl_yylval.opval)
4144 append_madprops(PL_thismad, pl_yylval.opval, 0);
4152 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4161 /* remember any fake bracket that lexer is about to discard */
4162 if (PL_lex_brackets == 1 &&
4163 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4166 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4169 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4170 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4173 break; /* don't bother looking for trailing comment */
4182 /* attach a trailing comment to its statement instead of next token */
4186 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4188 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4190 if (*s == '\n' || *s == '#') {
4191 while (s < PL_bufend && *s != '\n')
4195 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4196 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4213 /* Create new token struct. Note: opvals return early above. */
4214 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4221 S_tokenize_use(pTHX_ int is_use, char *s) {
4224 PERL_ARGS_ASSERT_TOKENIZE_USE;
4226 if (PL_expect != XSTATE)
4227 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4228 is_use ? "use" : "no"));
4230 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4231 s = force_version(s, TRUE);
4232 if (*s == ';' || *s == '}'
4233 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4234 start_force(PL_curforce);
4235 NEXTVAL_NEXTTOKE.opval = NULL;
4238 else if (*s == 'v') {
4239 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4240 s = force_version(s, FALSE);
4244 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4245 s = force_version(s, FALSE);
4247 pl_yylval.ival = is_use;
4251 static const char* const exp_name[] =
4252 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4253 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4257 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4259 S_word_takes_any_delimeter(char *p, STRLEN len)
4261 return (len == 1 && strchr("msyq", p[0])) ||
4263 (p[0] == 't' && p[1] == 'r') ||
4264 (p[0] == 'q' && strchr("qwxr", p[1]))));
4270 Works out what to call the token just pulled out of the input
4271 stream. The yacc parser takes care of taking the ops we return and
4272 stitching them into a tree.
4278 if read an identifier
4279 if we're in a my declaration
4280 croak if they tried to say my($foo::bar)
4281 build the ops for a my() declaration
4282 if it's an access to a my() variable
4283 are we in a sort block?
4284 croak if my($a); $a <=> $b
4285 build ops for access to a my() variable
4286 if in a dq string, and they've said @foo and we can't find @foo
4288 build ops for a bareword
4289 if we already built the token before, use it.
4294 #pragma segment Perl_yylex
4300 register char *s = PL_bufptr;
4306 /* orig_keyword, gvp, and gv are initialized here because
4307 * jump to the label just_a_word_zero can bypass their
4308 * initialization later. */
4309 I32 orig_keyword = 0;
4314 SV* tmp = newSVpvs("");
4315 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4316 (IV)CopLINE(PL_curcop),
4317 lex_state_names[PL_lex_state],
4318 exp_name[PL_expect],
4319 pv_display(tmp, s, strlen(s), 0, 60));
4322 /* check if there's an identifier for us to look at */
4323 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4324 return REPORT(S_pending_ident(aTHX));
4326 /* no identifier pending identification */
4328 switch (PL_lex_state) {
4330 case LEX_NORMAL: /* Some compilers will produce faster */
4331 case LEX_INTERPNORMAL: /* code if we comment these out. */
4335 /* when we've already built the next token, just pull it out of the queue */
4339 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4341 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4342 PL_nexttoke[PL_lasttoke].next_mad = 0;
4343 if (PL_thismad && PL_thismad->mad_key == '_') {
4344 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4345 PL_thismad->mad_val = 0;
4346 mad_free(PL_thismad);
4351 PL_lex_state = PL_lex_defer;
4352 PL_expect = PL_lex_expect;
4353 PL_lex_defer = LEX_NORMAL;
4354 if (!PL_nexttoke[PL_lasttoke].next_type)
4359 pl_yylval = PL_nextval[PL_nexttoke];
4361 PL_lex_state = PL_lex_defer;
4362 PL_expect = PL_lex_expect;
4363 PL_lex_defer = LEX_NORMAL;
4369 next_type = PL_nexttoke[PL_lasttoke].next_type;
4371 next_type = PL_nexttype[PL_nexttoke];
4373 if (next_type & (7<<24)) {
4374 if (next_type & (1<<24)) {
4375 if (PL_lex_brackets > 100)
4376 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4377 PL_lex_brackstack[PL_lex_brackets++] =
4378 (char) ((next_type >> 16) & 0xff);
4380 if (next_type & (2<<24))
4381 PL_lex_allbrackets++;
4382 if (next_type & (4<<24))
4383 PL_lex_allbrackets--;
4384 next_type &= 0xffff;
4387 /* FIXME - can these be merged? */
4390 return REPORT(next_type);
4394 /* interpolated case modifiers like \L \U, including \Q and \E.
4395 when we get here, PL_bufptr is at the \
4397 case LEX_INTERPCASEMOD:
4399 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4400 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
4402 /* handle \E or end of string */
4403 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4405 if (PL_lex_casemods) {
4406 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4407 PL_lex_casestack[PL_lex_casemods] = '\0';
4409 if (PL_bufptr != PL_bufend
4410 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
4412 PL_lex_state = LEX_INTERPCONCAT;
4415 PL_thistoken = newSVpvs("\\E");
4418 PL_lex_allbrackets--;
4422 while (PL_bufptr != PL_bufend &&
4423 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4425 PL_thiswhite = newSVpvs("");
4426 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4430 if (PL_bufptr != PL_bufend)
4433 PL_lex_state = LEX_INTERPCONCAT;
4437 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4438 "### Saw case modifier\n"); });
4440 if (s[1] == '\\' && s[2] == 'E') {
4443 PL_thiswhite = newSVpvs("");
4444 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4447 PL_lex_state = LEX_INTERPCONCAT;
4452 if (!PL_madskills) /* when just compiling don't need correct */
4453 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4454 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
4455 if ((*s == 'L' || *s == 'U') &&
4456 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4457 PL_lex_casestack[--PL_lex_casemods] = '\0';
4458 PL_lex_allbrackets--;
4461 if (PL_lex_casemods > 10)
4462 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4463 PL_lex_casestack[PL_lex_casemods++] = *s;
4464 PL_lex_casestack[PL_lex_casemods] = '\0';
4465 PL_lex_state = LEX_INTERPCONCAT;
4466 start_force(PL_curforce);
4467 NEXTVAL_NEXTTOKE.ival = 0;
4468 force_next((2<<24)|'(');
4469 start_force(PL_curforce);
4471 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4473 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4475 NEXTVAL_NEXTTOKE.ival = OP_LC;
4477 NEXTVAL_NEXTTOKE.ival = OP_UC;
4479 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4481 Perl_croak(aTHX_ "panic: yylex");
4483 SV* const tmpsv = newSVpvs("\\ ");
4484 /* replace the space with the character we want to escape
4486 SvPVX(tmpsv)[1] = *s;
4492 if (PL_lex_starts) {
4498 sv_free(PL_thistoken);
4499 PL_thistoken = newSVpvs("");
4502 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4503 if (PL_lex_casemods == 1 && PL_lex_inpat)
4512 case LEX_INTERPPUSH:
4513 return REPORT(sublex_push());
4515 case LEX_INTERPSTART:
4516 if (PL_bufptr == PL_bufend)
4517 return REPORT(sublex_done());
4518 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4519 "### Interpolated variable\n"); });
4521 PL_lex_dojoin = (*PL_bufptr == '@');
4522 PL_lex_state = LEX_INTERPNORMAL;
4523 if (PL_lex_dojoin) {
4524 start_force(PL_curforce);
4525 NEXTVAL_NEXTTOKE.ival = 0;
4527 start_force(PL_curforce);
4528 force_ident("\"", '$');
4529 start_force(PL_curforce);
4530 NEXTVAL_NEXTTOKE.ival = 0;
4532 start_force(PL_curforce);
4533 NEXTVAL_NEXTTOKE.ival = 0;
4534 force_next((2<<24)|'(');
4535 start_force(PL_curforce);
4536 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
4539 if (PL_lex_starts++) {
4544 sv_free(PL_thistoken);
4545 PL_thistoken = newSVpvs("");
4548 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4549 if (!PL_lex_casemods && PL_lex_inpat)
4556 case LEX_INTERPENDMAYBE:
4557 if (intuit_more(PL_bufptr)) {
4558 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
4564 if (PL_lex_dojoin) {
4565 PL_lex_dojoin = FALSE;
4566 PL_lex_state = LEX_INTERPCONCAT;
4570 sv_free(PL_thistoken);
4571 PL_thistoken = newSVpvs("");
4574 PL_lex_allbrackets--;
4577 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4578 && SvEVALED(PL_lex_repl))
4580 if (PL_bufptr != PL_bufend)
4581 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4585 case LEX_INTERPCONCAT:
4587 if (PL_lex_brackets)
4588 Perl_croak(aTHX_ "panic: INTERPCONCAT");
4590 if (PL_bufptr == PL_bufend)
4591 return REPORT(sublex_done());
4593 if (SvIVX(PL_linestr) == '\'') {
4594 SV *sv = newSVsv(PL_linestr);
4597 else if ( PL_hints & HINT_NEW_RE )
4598 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
4599 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4603 s = scan_const(PL_bufptr);
4605 PL_lex_state = LEX_INTERPCASEMOD;
4607 PL_lex_state = LEX_INTERPSTART;
4610 if (s != PL_bufptr) {
4611 start_force(PL_curforce);
4613 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4615 NEXTVAL_NEXTTOKE = pl_yylval;
4618 if (PL_lex_starts++) {
4622 sv_free(PL_thistoken);
4623 PL_thistoken = newSVpvs("");
4626 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4627 if (!PL_lex_casemods && PL_lex_inpat)
4640 PL_lex_state = LEX_NORMAL;
4641 s = scan_formline(PL_bufptr);
4642 if (!PL_lex_formbrack)
4648 PL_oldoldbufptr = PL_oldbufptr;
4654 sv_free(PL_thistoken);
4657 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
4661 if (isIDFIRST_lazy_if(s,UTF))
4664 unsigned char c = *s;
4665 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4666 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4667 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4672 Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4676 goto fake_eof; /* emulate EOF on ^D or ^Z */
4685 if (PL_lex_brackets &&
4686 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
4687 yyerror((const char *)
4689 ? "Format not terminated"
4690 : "Missing right curly or square bracket"));
4692 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4693 "### Tokener got EOF\n");
4697 if (s++ < PL_bufend)
4698 goto retry; /* ignore stray nulls */
4701 if (!PL_in_eval && !PL_preambled) {
4702 PL_preambled = TRUE;
4708 /* Generate a string of Perl code to load the debugger.
4709 * If PERL5DB is set, it will return the contents of that,
4710 * otherwise a compile-time require of perl5db.pl. */
4712 const char * const pdb = PerlEnv_getenv("PERL5DB");
4715 sv_setpv(PL_linestr, pdb);
4716 sv_catpvs(PL_linestr,";");
4718 SETERRNO(0,SS_NORMAL);
4719 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4722 sv_setpvs(PL_linestr,"");
4723 if (PL_preambleav) {
4724 SV **svp = AvARRAY(PL_preambleav);
4725 SV **const end = svp + AvFILLp(PL_preambleav);
4727 sv_catsv(PL_linestr, *svp);
4729 sv_catpvs(PL_linestr, ";");
4731 sv_free(MUTABLE_SV(PL_preambleav));
4732 PL_preambleav = NULL;
4735 sv_catpvs(PL_linestr,
4736 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4737 if (PL_minus_n || PL_minus_p) {
4738 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4740 sv_catpvs(PL_linestr,"chomp;");
4743 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4744 || *PL_splitstr == '"')
4745 && strchr(PL_splitstr + 1, *PL_splitstr))
4746 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4748 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4749 bytes can be used as quoting characters. :-) */
4750 const char *splits = PL_splitstr;
4751 sv_catpvs(PL_linestr, "our @F=split(q\0");
4754 if (*splits == '\\')
4755 sv_catpvn(PL_linestr, splits, 1);
4756 sv_catpvn(PL_linestr, splits, 1);
4757 } while (*splits++);
4758 /* This loop will embed the trailing NUL of
4759 PL_linestr as the last thing it does before
4761 sv_catpvs(PL_linestr, ");");
4765 sv_catpvs(PL_linestr,"our @F=split(' ');");
4768 sv_catpvs(PL_linestr, "\n");
4769 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4770 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4771 PL_last_lop = PL_last_uni = NULL;
4772 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4773 update_debugger_info(PL_linestr, NULL, 0);
4778 bof = PL_rsfp ? TRUE : FALSE;
4781 fake_eof = LEX_FAKE_EOF;
4783 PL_bufptr = PL_bufend;
4784 CopLINE_inc(PL_curcop);
4785 if (!lex_next_chunk(fake_eof)) {
4786 CopLINE_dec(PL_curcop);
4788 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4790 CopLINE_dec(PL_curcop);
4793 PL_realtokenstart = -1;
4796 /* If it looks like the start of a BOM or raw UTF-16,
4797 * check if it in fact is. */
4798 if (bof && PL_rsfp &&
4803 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4804 bof = (offset == (Off_t)SvCUR(PL_linestr));
4805 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
4806 /* offset may include swallowed CR */
4808 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
4811 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4812 s = swallow_bom((U8*)s);
4815 if (PL_parser->in_pod) {
4816 /* Incest with pod. */
4819 sv_catsv(PL_thiswhite, PL_linestr);
4821 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4822 sv_setpvs(PL_linestr, "");
4823 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4824 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4825 PL_last_lop = PL_last_uni = NULL;
4826 PL_parser->in_pod = 0;
4831 } while (PL_parser->in_pod);
4832 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4833 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4834 PL_last_lop = PL_last_uni = NULL;
4835 if (CopLINE(PL_curcop) == 1) {
4836 while (s < PL_bufend && isSPACE(*s))
4838 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4842 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
4846 if (*s == '#' && *(s+1) == '!')
4848 #ifdef ALTERNATE_SHEBANG
4850 static char const as[] = ALTERNATE_SHEBANG;
4851 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4852 d = s + (sizeof(as) - 1);
4854 #endif /* ALTERNATE_SHEBANG */
4863 while (*d && !isSPACE(*d))
4867 #ifdef ARG_ZERO_IS_SCRIPT
4868 if (ipathend > ipath) {
4870 * HP-UX (at least) sets argv[0] to the script name,
4871 * which makes $^X incorrect. And Digital UNIX and Linux,
4872 * at least, set argv[0] to the basename of the Perl
4873 * interpreter. So, having found "#!", we'll set it right.
4875 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4877 assert(SvPOK(x) || SvGMAGICAL(x));
4878 if (sv_eq(x, CopFILESV(PL_curcop))) {
4879 sv_setpvn(x, ipath, ipathend - ipath);
4885 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
4886 const char * const lstart = SvPV_const(x,llen);
4888 bstart += blen - llen;
4889 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4890 sv_setpvn(x, ipath, ipathend - ipath);
4895 TAINT_NOT; /* $^X is always tainted, but that's OK */
4897 #endif /* ARG_ZERO_IS_SCRIPT */
4902 d = instr(s,"perl -");
4904 d = instr(s,"perl");
4906 /* avoid getting into infinite loops when shebang
4907 * line contains "Perl" rather than "perl" */
4909 for (d = ipathend-4; d >= ipath; --d) {
4910 if ((*d == 'p' || *d == 'P')
4911 && !ibcmp(d, "perl", 4))
4921 #ifdef ALTERNATE_SHEBANG
4923 * If the ALTERNATE_SHEBANG on this system starts with a
4924 * character that can be part of a Perl expression, then if
4925 * we see it but not "perl", we're probably looking at the
4926 * start of Perl code, not a request to hand off to some
4927 * other interpreter. Similarly, if "perl" is there, but
4928 * not in the first 'word' of the line, we assume the line
4929 * contains the start of the Perl program.
4931 if (d && *s != '#') {
4932 const char *c = ipath;
4933 while (*c && !strchr("; \t\r\n\f\v#", *c))
4936 d = NULL; /* "perl" not in first word; ignore */
4938 *s = '#'; /* Don't try to parse shebang line */
4940 #endif /* ALTERNATE_SHEBANG */
4945 !instr(s,"indir") &&
4946 instr(PL_origargv[0],"perl"))
4953 while (s < PL_bufend && isSPACE(*s))
4955 if (s < PL_bufend) {
4956 Newx(newargv,PL_origargc+3,char*);
4958 while (s < PL_bufend && !isSPACE(*s))
4961 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4964 newargv = PL_origargv;
4967 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4969 Perl_croak(aTHX_ "Can't exec %s", ipath);
4972 while (*d && !isSPACE(*d))
4974 while (SPACE_OR_TAB(*d))
4978 const bool switches_done = PL_doswitches;
4979 const U32 oldpdb = PL_perldb;
4980 const bool oldn = PL_minus_n;
4981 const bool oldp = PL_minus_p;
4985 bool baduni = FALSE;
4987 const char *d2 = d1 + 1;
4988 if (parse_unicode_opts((const char **)&d2)
4992 if (baduni || *d1 == 'M' || *d1 == 'm') {
4993 const char * const m = d1;
4994 while (*d1 && !isSPACE(*d1))
4996 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4999 d1 = moreswitches(d1);
5001 if (PL_doswitches && !switches_done) {
5002 int argc = PL_origargc;
5003 char **argv = PL_origargv;
5006 } while (argc && argv[0][0] == '-' && argv[0][1]);
5007 init_argv_symbols(argc,argv);
5009 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5010 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5011 /* if we have already added "LINE: while (<>) {",
5012 we must not do it again */
5014 sv_setpvs(PL_linestr, "");
5015 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5016 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5017 PL_last_lop = PL_last_uni = NULL;
5018 PL_preambled = FALSE;
5019 if (PERLDB_LINE || PERLDB_SAVESRC)
5020 (void)gv_fetchfile(PL_origfilename);
5027 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5029 PL_lex_state = LEX_FORMLINE;
5034 #ifdef PERL_STRICT_CR
5035 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5037 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5039 case ' ': case '\t': case '\f': case 013:
5041 PL_realtokenstart = -1;
5043 PL_thiswhite = newSVpvs("");
5044 sv_catpvn(PL_thiswhite, s, 1);
5051 PL_realtokenstart = -1;
5055 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
5056 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
5057 /* handle eval qq[#line 1 "foo"\n ...] */
5058 CopLINE_dec(PL_curcop);
5061 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5063 if (!PL_in_eval || PL_rsfp)
5068 while (d < PL_bufend && *d != '\n')
5072 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5073 Perl_croak(aTHX_ "panic: input overflow");
5076 PL_thiswhite = newSVpvn(s, d - s);
5081 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5083 PL_lex_state = LEX_FORMLINE;
5089 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5090 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5093 TOKEN(PEG); /* make sure any #! line is accessible */
5098 /* if (PL_madskills && PL_lex_formbrack) { */
5100 while (d < PL_bufend && *d != '\n')
5104 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5105 Perl_croak(aTHX_ "panic: input overflow");
5106 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5108 PL_thiswhite = newSVpvs("");
5109 if (CopLINE(PL_curcop) == 1) {
5110 sv_setpvs(PL_thiswhite, "");
5113 sv_catpvn(PL_thiswhite, s, d - s);
5127 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
5135 while (s < PL_bufend && SPACE_OR_TAB(*s))
5138 if (strnEQ(s,"=>",2)) {
5139 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
5140 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5141 OPERATOR('-'); /* unary minus */
5143 PL_last_uni = PL_oldbufptr;
5145 case 'r': ftst = OP_FTEREAD; break;
5146 case 'w': ftst = OP_FTEWRITE; break;
5147 case 'x': ftst = OP_FTEEXEC; break;
5148 case 'o': ftst = OP_FTEOWNED; break;
5149 case 'R': ftst = OP_FTRREAD; break;
5150 case 'W': ftst = OP_FTRWRITE; break;
5151 case 'X': ftst = OP_FTREXEC; break;
5152 case 'O': ftst = OP_FTROWNED; break;
5153 case 'e': ftst = OP_FTIS; break;
5154 case 'z': ftst = OP_FTZERO; break;
5155 case 's': ftst = OP_FTSIZE; break;
5156 case 'f': ftst = OP_FTFILE; break;
5157 case 'd': ftst = OP_FTDIR; break;
5158 case 'l': ftst = OP_FTLINK; break;
5159 case 'p': ftst = OP_FTPIPE; break;
5160 case 'S': ftst = OP_FTSOCK; break;
5161 case 'u': ftst = OP_FTSUID; break;
5162 case 'g': ftst = OP_FTSGID; break;
5163 case 'k': ftst = OP_FTSVTX; break;
5164 case 'b': ftst = OP_FTBLK; break;
5165 case 'c': ftst = OP_FTCHR; break;
5166 case 't': ftst = OP_FTTTY; break;
5167 case 'T': ftst = OP_FTTEXT; break;
5168 case 'B': ftst = OP_FTBINARY; break;
5169 case 'M': case 'A': case 'C':
5170 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5172 case 'M': ftst = OP_FTMTIME; break;
5173 case 'A': ftst = OP_FTATIME; break;
5174 case 'C': ftst = OP_FTCTIME; break;
5182 PL_last_lop_op = (OPCODE)ftst;
5183 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5184 "### Saw file test %c\n", (int)tmp);
5189 /* Assume it was a minus followed by a one-letter named
5190 * subroutine call (or a -bareword), then. */
5191 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5192 "### '-%c' looked like a file test but was not\n",
5199 const char tmp = *s++;
5202 if (PL_expect == XOPERATOR)
5207 else if (*s == '>') {
5210 if (isIDFIRST_lazy_if(s,UTF)) {
5211 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5219 if (PL_expect == XOPERATOR) {
5220 if (*s == '=' && !PL_lex_allbrackets &&
5221 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5228 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5230 OPERATOR('-'); /* unary minus */
5236 const char tmp = *s++;
5239 if (PL_expect == XOPERATOR)
5244 if (PL_expect == XOPERATOR) {
5245 if (*s == '=' && !PL_lex_allbrackets &&
5246 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5253 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5260 if (PL_expect != XOPERATOR) {
5261 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5262 PL_expect = XOPERATOR;
5263 force_ident(PL_tokenbuf, '*');
5271 if (*s == '=' && !PL_lex_allbrackets &&
5272 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5278 if (*s == '=' && !PL_lex_allbrackets &&
5279 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5286 if (PL_expect == XOPERATOR) {
5287 if (s[1] == '=' && !PL_lex_allbrackets &&
5288 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5293 PL_tokenbuf[0] = '%';
5294 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5295 sizeof PL_tokenbuf - 1, FALSE);
5296 if (!PL_tokenbuf[1]) {
5299 PL_pending_ident = '%';
5303 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5304 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5309 if (PL_lex_brackets > 100)
5310 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5311 PL_lex_brackstack[PL_lex_brackets++] = 0;
5312 PL_lex_allbrackets++;
5314 const char tmp = *s++;
5319 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5321 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5327 if (Perl_feature_is_enabled(aTHX_ "dot", 3)) {
5328 /* Perl_warn("feature dot enabled, ~ becomes concat\n"); */
5331 /* Perl_warn("feature dot not enabled, ~ stays ~\n"); */
5336 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5343 goto just_a_word_zero_gv;
5346 switch (PL_expect) {
5352 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5354 PL_bufptr = s; /* update in case we back off */
5357 "Use of := for an empty attribute list is not allowed");
5364 PL_expect = XTERMBLOCK;
5367 stuffstart = s - SvPVX(PL_linestr) - 1;
5371 while (isIDFIRST_lazy_if(s,UTF)) {
5374 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5375 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5376 if (tmp < 0) tmp = -tmp;
5391 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5393 d = scan_str(d,TRUE,TRUE);
5395 /* MUST advance bufptr here to avoid bogus
5396 "at end of line" context messages from yyerror().
5398 PL_bufptr = s + len;
5399 yyerror("Unterminated attribute parameter in attribute list");
5403 return REPORT(0); /* EOF indicator */
5407 sv_catsv(sv, PL_lex_stuff);
5408 attrs = op_append_elem(OP_LIST, attrs,
5409 newSVOP(OP_CONST, 0, sv));
5410 SvREFCNT_dec(PL_lex_stuff);
5411 PL_lex_stuff = NULL;
5414 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5416 if (PL_in_my == KEY_our) {
5417 deprecate(":unique");
5420 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5423 /* NOTE: any CV attrs applied here need to be part of
5424 the CVf_BUILTIN_ATTRS define in cv.h! */
5425 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5427 CvLVALUE_on(PL_compcv);
5429 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5431 deprecate(":locked");
5433 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5435 CvMETHOD_on(PL_compcv);
5437 /* After we've set the flags, it could be argued that
5438 we don't need to do the attributes.pm-based setting
5439 process, and shouldn't bother appending recognized
5440 flags. To experiment with that, uncomment the
5441 following "else". (Note that's already been
5442 uncommented. That keeps the above-applied built-in
5443 attributes from being intercepted (and possibly
5444 rejected) by a package's attribute routines, but is
5445 justified by the performance win for the common case
5446 of applying only built-in attributes.) */
5448 attrs = op_append_elem(OP_LIST, attrs,
5449 newSVOP(OP_CONST, 0,
5453 if (*s == ':' && s[1] != ':')
5456 break; /* require real whitespace or :'s */
5457 /* XXX losing whitespace on sequential attributes here */
5461 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5462 if (*s != ';' && *s != '}' && *s != tmp
5463 && (tmp != '=' || *s != ')')) {
5464 const char q = ((*s == '\'') ? '"' : '\'');
5465 /* If here for an expression, and parsed no attrs, back
5467 if (tmp == '=' && !attrs) {
5471 /* MUST advance bufptr here to avoid bogus "at end of line"
5472 context messages from yyerror().
5475 yyerror( (const char *)
5477 ? Perl_form(aTHX_ "Invalid separator character "
5478 "%c%c%c in attribute list", q, *s, q)
5479 : "Unterminated attribute list" ) );
5487 start_force(PL_curforce);
5488 NEXTVAL_NEXTTOKE.opval = attrs;
5489 CURMAD('_', PL_nextwhite);
5494 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5495 (s - SvPVX(PL_linestr)) - stuffstart);
5500 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5504 PL_lex_allbrackets--;
5508 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5509 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
5513 PL_lex_allbrackets++;
5516 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5522 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5525 PL_lex_allbrackets--;
5531 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5534 if (PL_lex_brackets <= 0)
5535 yyerror("Unmatched right square bracket");
5538 PL_lex_allbrackets--;
5539 if (PL_lex_state == LEX_INTERPNORMAL) {
5540 if (PL_lex_brackets == 0) {
5541 if (*s == '-' && s[1] == '>')
5542 PL_lex_state = LEX_INTERPENDMAYBE;
5543 else if (*s != '[' && *s != '{')
5544 PL_lex_state = LEX_INTERPEND;
5551 if (PL_lex_brackets > 100) {
5552 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5554 switch (PL_expect) {
5556 if (PL_lex_formbrack) {
5560 if (PL_oldoldbufptr == PL_last_lop)
5561 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5563 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5564 PL_lex_allbrackets++;
5565 OPERATOR(HASHBRACK);
5567 while (s < PL_bufend && SPACE_OR_TAB(*s))
5570 PL_tokenbuf[0] = '\0';
5571 if (d < PL_bufend && *d == '-') {
5572 PL_tokenbuf[0] = '-';
5574 while (d < PL_bufend && SPACE_OR_TAB(*d))
5577 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5578 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5580 while (d < PL_bufend && SPACE_OR_TAB(*d))
5583 const char minus = (PL_tokenbuf[0] == '-');
5584 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5592 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5593 PL_lex_allbrackets++;
5598 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5599 PL_lex_allbrackets++;
5604 if (PL_oldoldbufptr == PL_last_lop)
5605 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5607 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5608 PL_lex_allbrackets++;
5611 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5613 /* This hack is to get the ${} in the message. */
5615 yyerror("syntax error");
5618 OPERATOR(HASHBRACK);
5620 /* This hack serves to disambiguate a pair of curlies
5621 * as being a block or an anon hash. Normally, expectation
5622 * determines that, but in cases where we're not in a
5623 * position to expect anything in particular (like inside
5624 * eval"") we have to resolve the ambiguity. This code
5625 * covers the case where the first term in the curlies is a
5626 * quoted string. Most other cases need to be explicitly
5627 * disambiguated by prepending a "+" before the opening
5628 * curly in order to force resolution as an anon hash.
5630 * XXX should probably propagate the outer expectation
5631 * into eval"" to rely less on this hack, but that could
5632 * potentially break current behavior of eval"".
5636 if (*s == '\'' || *s == '"' || *s == '`') {
5637 /* common case: get past first string, handling escapes */
5638 for (t++; t < PL_bufend && *t != *s;)
5639 if (*t++ == '\\' && (*t == '\\' || *t == *s))
5643 else if (*s == 'q') {
5646 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5649 /* skip q//-like construct */
5651 char open, close, term;
5654 while (t < PL_bufend && isSPACE(*t))
5656 /* check for q => */
5657 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5658 OPERATOR(HASHBRACK);
5662 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5666 for (t++; t < PL_bufend; t++) {
5667 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5669 else if (*t == open)
5673 for (t++; t < PL_bufend; t++) {
5674 if (*t == '\\' && t+1 < PL_bufend)
5676 else if (*t == close && --brackets <= 0)
5678 else if (*t == open)
5685 /* skip plain q word */
5686 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5689 else if (isALNUM_lazy_if(t,UTF)) {
5691 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5694 while (t < PL_bufend && isSPACE(*t))
5696 /* if comma follows first term, call it an anon hash */
5697 /* XXX it could be a comma expression with loop modifiers */
5698 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5699 || (*t == '=' && t[1] == '>')))
5700 OPERATOR(HASHBRACK);
5701 if (PL_expect == XREF)
5704 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5710 pl_yylval.ival = CopLINE(PL_curcop);
5711 if (isSPACE(*s) || *s == '#')
5712 PL_copline = NOLINE; /* invalidate current command line number */
5715 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5719 if (PL_lex_brackets <= 0)
5720 yyerror("Unmatched right curly bracket");
5722 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5723 PL_lex_allbrackets--;
5724 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
5725 PL_lex_formbrack = 0;
5726 if (PL_lex_state == LEX_INTERPNORMAL) {
5727 if (PL_lex_brackets == 0) {
5728 if (PL_expect & XFAKEBRACK) {
5729 PL_expect &= XENUMMASK;
5730 PL_lex_state = LEX_INTERPEND;
5735 PL_thiswhite = newSVpvs("");
5736 sv_catpvs(PL_thiswhite,"}");
5739 return yylex(); /* ignore fake brackets */
5741 if (*s == '-' && s[1] == '>')
5742 PL_lex_state = LEX_INTERPENDMAYBE;
5743 else if (*s != '[' && *s != '{')
5744 PL_lex_state = LEX_INTERPEND;
5747 if (PL_expect & XFAKEBRACK) {
5748 PL_expect &= XENUMMASK;
5750 return yylex(); /* ignore fake brackets */
5752 start_force(PL_curforce);
5754 curmad('X', newSVpvn(s-1,1));
5755 CURMAD('_', PL_thiswhite);
5760 PL_thistoken = newSVpvs("");
5766 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5767 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5774 if (PL_expect == XOPERATOR) {
5775 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5776 && isIDFIRST_lazy_if(s,UTF))
5778 CopLINE_dec(PL_curcop);
5779 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5780 CopLINE_inc(PL_curcop);
5782 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5783 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5790 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5792 PL_expect = XOPERATOR;
5793 force_ident(PL_tokenbuf, '&');
5797 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5803 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5804 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5811 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5812 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5820 const char tmp = *s++;
5822 if (!PL_lex_allbrackets &&
5823 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5830 if (!PL_lex_allbrackets &&
5831 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
5839 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5840 && strchr("+-*/%.^&|<",tmp))
5841 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5842 "Reversed %c= operator",(int)tmp);
5844 if (PL_expect == XSTATE && isALPHA(tmp) &&
5845 (s == PL_linestart+1 || s[-2] == '\n') )
5847 if (PL_in_eval && !PL_rsfp) {
5852 if (strnEQ(s,"=cut",4)) {