3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 =head1 Lexer interface
27 This is the lower layer of the Perl parser, managing characters and tokens.
29 =for apidoc AmU|yy_parser *|PL_parser
31 Pointer to a structure encapsulating the state of the parsing operation
32 currently in progress. The pointer can be locally changed to perform
33 a nested parse without interfering with the state of an outer parse.
34 Individual members of C<PL_parser> have their own documentation.
40 #define PERL_IN_TOKE_C
42 #include "dquote_static.c"
44 #define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
47 #define pl_yylval (PL_parser->yylval)
49 /* XXX temporary backwards compatibility */
50 #define PL_lex_brackets (PL_parser->lex_brackets)
51 #define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52 #define PL_lex_fakeeof (PL_parser->lex_fakeeof)
53 #define PL_lex_brackstack (PL_parser->lex_brackstack)
54 #define PL_lex_casemods (PL_parser->lex_casemods)
55 #define PL_lex_casestack (PL_parser->lex_casestack)
56 #define PL_lex_defer (PL_parser->lex_defer)
57 #define PL_lex_dojoin (PL_parser->lex_dojoin)
58 #define PL_lex_expect (PL_parser->lex_expect)
59 #define PL_lex_formbrack (PL_parser->lex_formbrack)
60 #define PL_lex_inpat (PL_parser->lex_inpat)
61 #define PL_lex_inwhat (PL_parser->lex_inwhat)
62 #define PL_lex_op (PL_parser->lex_op)
63 #define PL_lex_repl (PL_parser->lex_repl)
64 #define PL_lex_starts (PL_parser->lex_starts)
65 #define PL_lex_stuff (PL_parser->lex_stuff)
66 #define PL_multi_start (PL_parser->multi_start)
67 #define PL_multi_open (PL_parser->multi_open)
68 #define PL_multi_close (PL_parser->multi_close)
69 #define PL_pending_ident (PL_parser->pending_ident)
70 #define PL_preambled (PL_parser->preambled)
71 #define PL_sublex_info (PL_parser->sublex_info)
72 #define PL_linestr (PL_parser->linestr)
73 #define PL_expect (PL_parser->expect)
74 #define PL_copline (PL_parser->copline)
75 #define PL_bufptr (PL_parser->bufptr)
76 #define PL_oldbufptr (PL_parser->oldbufptr)
77 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
78 #define PL_linestart (PL_parser->linestart)
79 #define PL_bufend (PL_parser->bufend)
80 #define PL_last_uni (PL_parser->last_uni)
81 #define PL_last_lop (PL_parser->last_lop)
82 #define PL_last_lop_op (PL_parser->last_lop_op)
83 #define PL_lex_state (PL_parser->lex_state)
84 #define PL_rsfp (PL_parser->rsfp)
85 #define PL_rsfp_filters (PL_parser->rsfp_filters)
86 #define PL_in_my (PL_parser->in_my)
87 #define PL_in_my_stash (PL_parser->in_my_stash)
88 #define PL_tokenbuf (PL_parser->tokenbuf)
89 #define PL_multi_end (PL_parser->multi_end)
90 #define PL_error_count (PL_parser->error_count)
93 # define PL_endwhite (PL_parser->endwhite)
94 # define PL_faketokens (PL_parser->faketokens)
95 # define PL_lasttoke (PL_parser->lasttoke)
96 # define PL_nextwhite (PL_parser->nextwhite)
97 # define PL_realtokenstart (PL_parser->realtokenstart)
98 # define PL_skipwhite (PL_parser->skipwhite)
99 # define PL_thisclose (PL_parser->thisclose)
100 # define PL_thismad (PL_parser->thismad)
101 # define PL_thisopen (PL_parser->thisopen)
102 # define PL_thisstuff (PL_parser->thisstuff)
103 # define PL_thistoken (PL_parser->thistoken)
104 # define PL_thiswhite (PL_parser->thiswhite)
105 # define PL_thiswhite (PL_parser->thiswhite)
106 # define PL_nexttoke (PL_parser->nexttoke)
107 # define PL_curforce (PL_parser->curforce)
109 # define PL_nexttoke (PL_parser->nexttoke)
110 # define PL_nexttype (PL_parser->nexttype)
111 # define PL_nextval (PL_parser->nextval)
114 /* This can't be done with embed.fnc, because struct yy_parser contains a
115 member named pending_ident, which clashes with the generated #define */
117 S_pending_ident(pTHX);
119 static const char ident_too_long[] = "Identifier too long";
122 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
123 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
125 # define CURMAD(slot,sv)
126 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
129 #define XENUMMASK 0x3f
130 #define XFAKEEOF 0x40
131 #define XFAKEBRACK 0x80
133 #ifdef USE_UTF8_SCRIPTS
134 # define UTF (!IN_BYTES)
136 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
139 /* The maximum number of characters preceding the unrecognized one to display */
140 #define UNRECOGNIZED_PRECEDE_COUNT 10
142 /* In variables named $^X, these are the legal values for X.
143 * 1999-02-27 mjd-perl-patch@plover.com */
144 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
146 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
148 /* LEX_* are values for PL_lex_state, the state of the lexer.
149 * They are arranged oddly so that the guard on the switch statement
150 * can get by with a single comparison (if the compiler is smart enough).
153 /* #define LEX_NOTPARSING 11 is done in perl.h. */
155 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
156 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
157 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
158 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
159 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
161 /* at end of code, eg "$x" followed by: */
162 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
163 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
165 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
166 string or after \E, $foo, etc */
167 #define LEX_INTERPCONST 2 /* NOT USED */
168 #define LEX_FORMLINE 1 /* expecting a format line */
169 #define LEX_KNOWNEXT 0 /* next token known; just return it */
173 static const char* const lex_state_names[] = {
192 #include "keywords.h"
194 /* CLINE is a macro that ensures PL_copline has a sane value */
199 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
202 # define SKIPSPACE0(s) skipspace0(s)
203 # define SKIPSPACE1(s) skipspace1(s)
204 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
205 # define PEEKSPACE(s) skipspace2(s,0)
207 # define SKIPSPACE0(s) skipspace(s)
208 # define SKIPSPACE1(s) skipspace(s)
209 # define SKIPSPACE2(s,tsv) skipspace(s)
210 # define PEEKSPACE(s) skipspace(s)
214 * Convenience functions to return different tokens and prime the
215 * lexer for the next token. They all take an argument.
217 * TOKEN : generic token (used for '(', DOLSHARP, etc)
218 * OPERATOR : generic operator
219 * AOPERATOR : assignment operator
220 * PREBLOCK : beginning the block after an if, while, foreach, ...
221 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
222 * PREREF : *EXPR where EXPR is not a simple identifier
223 * TERM : expression term
224 * LOOPX : loop exiting command (goto, last, dump, etc)
225 * FTST : file test operator
226 * FUN0 : zero-argument function
227 * FUN1 : not used, except for not, which isn't a UNIOP
228 * BOop : bitwise or or xor
230 * SHop : shift operator
231 * PWop : power operator
232 * PMop : pattern-matching operator
233 * Aop : addition-level operator
234 * Mop : multiplication-level operator
235 * Eop : equality-testing operator
236 * Rop : relational operator <= != gt
238 * Also see LOP and lop() below.
241 #ifdef DEBUGGING /* Serve -DT. */
242 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
244 # define REPORT(retval) (retval)
247 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
248 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
249 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
250 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
251 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
252 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
253 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
254 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
255 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
256 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
257 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
258 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
259 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
260 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
261 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
262 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
263 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
264 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
265 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
266 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
268 /* This bit of chicanery makes a unary function followed by
269 * a parenthesis into a function with one argument, highest precedence.
270 * The UNIDOR macro is for unary functions that can be followed by the //
271 * operator (such as C<shift // 0>).
273 #define UNI2(f,x) { \
274 pl_yylval.ival = f; \
277 PL_last_uni = PL_oldbufptr; \
278 PL_last_lop_op = f; \
280 return REPORT( (int)FUNC1 ); \
282 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
284 #define UNI(f) UNI2(f,XTERM)
285 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
287 #define UNIBRACK(f) { \
288 pl_yylval.ival = f; \
290 PL_last_uni = PL_oldbufptr; \
292 return REPORT( (int)FUNC1 ); \
294 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
297 /* grandfather return to old style */
300 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
301 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
302 pl_yylval.ival = (f); \
310 /* how to interpret the pl_yylval associated with the token */
314 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
320 static struct debug_tokens {
322 enum token_type type;
324 } const debug_tokens[] =
326 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
327 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
328 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
329 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
330 { ARROW, TOKENTYPE_NONE, "ARROW" },
331 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
332 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
333 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
334 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
335 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
336 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
337 { DO, TOKENTYPE_NONE, "DO" },
338 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
339 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
340 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
341 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
342 { ELSE, TOKENTYPE_NONE, "ELSE" },
343 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
344 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
345 { FOR, TOKENTYPE_IVAL, "FOR" },
346 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
347 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
348 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
349 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
350 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
351 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
352 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
353 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
354 { IF, TOKENTYPE_IVAL, "IF" },
355 { LABEL, TOKENTYPE_PVAL, "LABEL" },
356 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
357 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
358 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
359 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
360 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
361 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
362 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
363 { MY, TOKENTYPE_IVAL, "MY" },
364 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
365 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
366 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
367 { OROP, TOKENTYPE_IVAL, "OROP" },
368 { OROR, TOKENTYPE_NONE, "OROR" },
369 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
370 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
371 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
372 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
373 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
374 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
375 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
376 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
377 { PREINC, TOKENTYPE_NONE, "PREINC" },
378 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
379 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
380 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
381 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
382 { SUB, TOKENTYPE_NONE, "SUB" },
383 { THING, TOKENTYPE_OPVAL, "THING" },
384 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
385 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
386 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
387 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
388 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
389 { USE, TOKENTYPE_IVAL, "USE" },
390 { WHEN, TOKENTYPE_IVAL, "WHEN" },
391 { WHILE, TOKENTYPE_IVAL, "WHILE" },
392 { WORD, TOKENTYPE_OPVAL, "WORD" },
393 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
394 { 0, TOKENTYPE_NONE, NULL }
397 /* dump the returned token in rv, plus any optional arg in pl_yylval */
400 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
404 PERL_ARGS_ASSERT_TOKEREPORT;
407 const char *name = NULL;
408 enum token_type type = TOKENTYPE_NONE;
409 const struct debug_tokens *p;
410 SV* const report = newSVpvs("<== ");
412 for (p = debug_tokens; p->token; p++) {
413 if (p->token == (int)rv) {
420 Perl_sv_catpv(aTHX_ report, name);
421 else if ((char)rv > ' ' && (char)rv < '~')
422 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
424 sv_catpvs(report, "EOF");
426 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
429 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
432 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
434 case TOKENTYPE_OPNUM:
435 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
436 PL_op_name[lvalp->ival]);
439 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
441 case TOKENTYPE_OPVAL:
443 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
444 PL_op_name[lvalp->opval->op_type]);
445 if (lvalp->opval->op_type == OP_CONST) {
446 Perl_sv_catpvf(aTHX_ report, " %s",
447 SvPEEK(cSVOPx_sv(lvalp->opval)));
452 sv_catpvs(report, "(opval=null)");
455 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
461 /* print the buffer with suitable escapes */
464 S_printbuf(pTHX_ const char *const fmt, const char *const s)
466 SV* const tmp = newSVpvs("");
468 PERL_ARGS_ASSERT_PRINTBUF;
470 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
477 S_deprecate_commaless_var_list(pTHX) {
479 deprecate("comma-less variable list");
480 return REPORT(','); /* grandfather non-comma-format format */
486 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
487 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
491 S_ao(pTHX_ int toketype)
494 if (*PL_bufptr == '=') {
496 if (toketype == ANDAND)
497 pl_yylval.ival = OP_ANDASSIGN;
498 else if (toketype == OROR)
499 pl_yylval.ival = OP_ORASSIGN;
500 else if (toketype == DORDOR)
501 pl_yylval.ival = OP_DORASSIGN;
509 * When Perl expects an operator and finds something else, no_op
510 * prints the warning. It always prints "<something> found where
511 * operator expected. It prints "Missing semicolon on previous line?"
512 * if the surprise occurs at the start of the line. "do you need to
513 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
514 * where the compiler doesn't know if foo is a method call or a function.
515 * It prints "Missing operator before end of line" if there's nothing
516 * after the missing operator, or "... before <...>" if there is something
517 * after the missing operator.
521 S_no_op(pTHX_ const char *const what, char *s)
524 char * const oldbp = PL_bufptr;
525 const bool is_first = (PL_oldbufptr == PL_linestart);
527 PERL_ARGS_ASSERT_NO_OP;
533 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
534 if (ckWARN_d(WARN_SYNTAX)) {
536 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
537 "\t(Missing semicolon on previous line?)\n");
538 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
540 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
542 if (t < PL_bufptr && isSPACE(*t))
543 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
544 "\t(Do you need to predeclare %.*s?)\n",
545 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
549 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
550 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
558 * Complain about missing quote/regexp/heredoc terminator.
559 * If it's called with NULL then it cauterizes the line buffer.
560 * If we're in a delimited string and the delimiter is a control
561 * character, it's reformatted into a two-char sequence like ^C.
566 S_missingterm(pTHX_ char *s)
572 char * const nl = strrchr(s,'\n');
576 else if (isCNTRL(PL_multi_close)) {
578 tmpbuf[1] = (char)toCTRL(PL_multi_close);
583 *tmpbuf = (char)PL_multi_close;
587 q = strchr(s,'"') ? '\'' : '"';
588 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
591 #define FEATURE_IS_ENABLED(name) \
592 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
593 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
594 /* The longest string we pass in. */
595 #define MAX_FEATURE_LEN (sizeof("unicode_strings")-1)
598 * S_feature_is_enabled
599 * Check whether the named feature is enabled.
602 S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
605 HV * const hinthv = GvHV(PL_hintgv);
606 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
608 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
610 assert(namelen <= MAX_FEATURE_LEN);
611 memcpy(&he_name[8], name, namelen);
613 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
617 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
618 * utf16-to-utf8-reversed.
621 #ifdef PERL_CR_FILTER
625 register const char *s = SvPVX_const(sv);
626 register const char * const e = s + SvCUR(sv);
628 PERL_ARGS_ASSERT_STRIP_RETURN;
630 /* outer loop optimized to do nothing if there are no CR-LFs */
632 if (*s++ == '\r' && *s == '\n') {
633 /* hit a CR-LF, need to copy the rest */
634 register char *d = s - 1;
637 if (*s == '\r' && s[1] == '\n')
648 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
650 const I32 count = FILTER_READ(idx+1, sv, maxlen);
651 if (count > 0 && !maxlen)
658 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
660 Creates and initialises a new lexer/parser state object, supplying
661 a context in which to lex and parse from a new source of Perl code.
662 A pointer to the new state object is placed in L</PL_parser>. An entry
663 is made on the save stack so that upon unwinding the new state object
664 will be destroyed and the former value of L</PL_parser> will be restored.
665 Nothing else need be done to clean up the parsing context.
667 The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
668 non-null, provides a string (in SV form) containing code to be parsed.
669 A copy of the string is made, so subsequent modification of I<line>
670 does not affect parsing. I<rsfp>, if non-null, provides an input stream
671 from which code will be read to be parsed. If both are non-null, the
672 code in I<line> comes first and must consist of complete lines of input,
673 and I<rsfp> supplies the remainder of the source.
675 The I<flags> parameter is reserved for future use, and must always
682 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
685 const char *s = NULL;
687 yy_parser *parser, *oparser;
689 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
691 /* create and initialise a parser */
693 Newxz(parser, 1, yy_parser);
694 parser->old_parser = oparser = PL_parser;
697 parser->stack = NULL;
699 parser->stack_size = 0;
701 /* on scope exit, free this parser and restore any outer one */
703 parser->saved_curcop = PL_curcop;
705 /* initialise lexer state */
708 parser->curforce = -1;
710 parser->nexttoke = 0;
712 parser->error_count = oparser ? oparser->error_count : 0;
713 parser->copline = NOLINE;
714 parser->lex_state = LEX_NORMAL;
715 parser->expect = XSTATE;
717 parser->rsfp_filters = newAV();
719 Newx(parser->lex_brackstack, 120, char);
720 Newx(parser->lex_casestack, 12, char);
721 *parser->lex_casestack = '\0';
724 s = SvPV_const(line, len);
730 parser->linestr = newSVpvs("\n;");
732 parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
734 sv_catpvs(parser->linestr, "\n;");
736 parser->oldoldbufptr =
739 parser->linestart = SvPVX(parser->linestr);
740 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
741 parser->last_lop = parser->last_uni = NULL;
747 /* delete a parser object */
750 Perl_parser_free(pTHX_ const yy_parser *parser)
752 PERL_ARGS_ASSERT_PARSER_FREE;
754 PL_curcop = parser->saved_curcop;
755 SvREFCNT_dec(parser->linestr);
757 if (parser->rsfp == PerlIO_stdin())
758 PerlIO_clearerr(parser->rsfp);
759 else if (parser->rsfp && (!parser->old_parser ||
760 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
761 PerlIO_close(parser->rsfp);
762 SvREFCNT_dec(parser->rsfp_filters);
764 Safefree(parser->lex_brackstack);
765 Safefree(parser->lex_casestack);
766 PL_parser = parser->old_parser;
772 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
774 Buffer scalar containing the chunk currently under consideration of the
775 text currently being lexed. This is always a plain string scalar (for
776 which C<SvPOK> is true). It is not intended to be used as a scalar by
777 normal scalar means; instead refer to the buffer directly by the pointer
778 variables described below.
780 The lexer maintains various C<char*> pointers to things in the
781 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
782 reallocated, all of these pointers must be updated. Don't attempt to
783 do this manually, but rather use L</lex_grow_linestr> if you need to
784 reallocate the buffer.
786 The content of the text chunk in the buffer is commonly exactly one
787 complete line of input, up to and including a newline terminator,
788 but there are situations where it is otherwise. The octets of the
789 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
790 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
791 flag on this scalar, which may disagree with it.
793 For direct examination of the buffer, the variable
794 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
795 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
796 of these pointers is usually preferable to examination of the scalar
797 through normal scalar means.
799 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
801 Direct pointer to the end of the chunk of text currently being lexed, the
802 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
803 + SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
804 always located at the end of the buffer, and does not count as part of
805 the buffer's contents.
807 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
809 Points to the current position of lexing inside the lexer buffer.
810 Characters around this point may be freely examined, within
811 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
812 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
813 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
815 Lexing code (whether in the Perl core or not) moves this pointer past
816 the characters that it consumes. It is also expected to perform some
817 bookkeeping whenever a newline character is consumed. This movement
818 can be more conveniently performed by the function L</lex_read_to>,
819 which handles newlines appropriately.
821 Interpretation of the buffer's octets can be abstracted out by
822 using the slightly higher-level functions L</lex_peek_unichar> and
823 L</lex_read_unichar>.
825 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
827 Points to the start of the current line inside the lexer buffer.
828 This is useful for indicating at which column an error occurred, and
829 not much else. This must be updated by any lexing code that consumes
830 a newline; the function L</lex_read_to> handles this detail.
836 =for apidoc Amx|bool|lex_bufutf8
838 Indicates whether the octets in the lexer buffer
839 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
840 of Unicode characters. If not, they should be interpreted as Latin-1
841 characters. This is analogous to the C<SvUTF8> flag for scalars.
843 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
844 contains valid UTF-8. Lexing code must be robust in the face of invalid
847 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
848 is significant, but not the whole story regarding the input character
849 encoding. Normally, when a file is being read, the scalar contains octets
850 and its C<SvUTF8> flag is off, but the octets should be interpreted as
851 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
852 however, the scalar may have the C<SvUTF8> flag on, and in this case its
853 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
854 is in effect. This logic may change in the future; use this function
855 instead of implementing the logic yourself.
861 Perl_lex_bufutf8(pTHX)
867 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
869 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
870 at least I<len> octets (including terminating NUL). Returns a
871 pointer to the reallocated buffer. This is necessary before making
872 any direct modification of the buffer that would increase its length.
873 L</lex_stuff_pvn> provides a more convenient way to insert text into
876 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
877 this function updates all of the lexer's variables that point directly
884 Perl_lex_grow_linestr(pTHX_ STRLEN len)
888 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
889 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
890 linestr = PL_parser->linestr;
891 buf = SvPVX(linestr);
892 if (len <= SvLEN(linestr))
894 bufend_pos = PL_parser->bufend - buf;
895 bufptr_pos = PL_parser->bufptr - buf;
896 oldbufptr_pos = PL_parser->oldbufptr - buf;
897 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
898 linestart_pos = PL_parser->linestart - buf;
899 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
900 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
901 buf = sv_grow(linestr, len);
902 PL_parser->bufend = buf + bufend_pos;
903 PL_parser->bufptr = buf + bufptr_pos;
904 PL_parser->oldbufptr = buf + oldbufptr_pos;
905 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
906 PL_parser->linestart = buf + linestart_pos;
907 if (PL_parser->last_uni)
908 PL_parser->last_uni = buf + last_uni_pos;
909 if (PL_parser->last_lop)
910 PL_parser->last_lop = buf + last_lop_pos;
915 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
917 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
918 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
919 reallocating the buffer if necessary. This means that lexing code that
920 runs later will see the characters as if they had appeared in the input.
921 It is not recommended to do this as part of normal parsing, and most
922 uses of this facility run the risk of the inserted characters being
923 interpreted in an unintended manner.
925 The string to be inserted is represented by I<len> octets starting
926 at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
927 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
928 The characters are recoded for the lexer buffer, according to how the
929 buffer is currently being interpreted (L</lex_bufutf8>). If a string
930 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
931 function is more convenient.
937 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
941 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
942 if (flags & ~(LEX_STUFF_UTF8))
943 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
945 if (flags & LEX_STUFF_UTF8) {
949 const char *p, *e = pv+len;
950 for (p = pv; p != e; p++)
951 highhalf += !!(((U8)*p) & 0x80);
954 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
955 bufptr = PL_parser->bufptr;
956 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
957 SvCUR_set(PL_parser->linestr,
958 SvCUR(PL_parser->linestr) + len+highhalf);
959 PL_parser->bufend += len+highhalf;
960 for (p = pv; p != e; p++) {
963 *bufptr++ = (char)(0xc0 | (c >> 6));
964 *bufptr++ = (char)(0x80 | (c & 0x3f));
971 if (flags & LEX_STUFF_UTF8) {
973 const char *p, *e = pv+len;
974 for (p = pv; p != e; p++) {
977 Perl_croak(aTHX_ "Lexing code attempted to stuff "
978 "non-Latin-1 character into Latin-1 input");
979 } else if (c >= 0xc2 && p+1 != e &&
980 (((U8)p[1]) & 0xc0) == 0x80) {
983 } else if (c >= 0x80) {
984 /* malformed UTF-8 */
986 SAVESPTR(PL_warnhook);
987 PL_warnhook = PERL_WARNHOOK_FATAL;
988 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
994 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
995 bufptr = PL_parser->bufptr;
996 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
997 SvCUR_set(PL_parser->linestr,
998 SvCUR(PL_parser->linestr) + len-highhalf);
999 PL_parser->bufend += len-highhalf;
1000 for (p = pv; p != e; p++) {
1003 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1006 *bufptr++ = (char)c;
1011 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1012 bufptr = PL_parser->bufptr;
1013 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1014 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1015 PL_parser->bufend += len;
1016 Copy(pv, bufptr, len, char);
1022 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1024 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1025 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1026 reallocating the buffer if necessary. This means that lexing code that
1027 runs later will see the characters as if they had appeared in the input.
1028 It is not recommended to do this as part of normal parsing, and most
1029 uses of this facility run the risk of the inserted characters being
1030 interpreted in an unintended manner.
1032 The string to be inserted is represented by octets starting at I<pv>
1033 and continuing to the first nul. These octets are interpreted as either
1034 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1035 in I<flags>. The characters are recoded for the lexer buffer, according
1036 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1037 If it is not convenient to nul-terminate a string to be inserted, the
1038 L</lex_stuff_pvn> function is more appropriate.
1044 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1046 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1047 lex_stuff_pvn(pv, strlen(pv), flags);
1051 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1053 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1054 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1055 reallocating the buffer if necessary. This means that lexing code that
1056 runs later will see the characters as if they had appeared in the input.
1057 It is not recommended to do this as part of normal parsing, and most
1058 uses of this facility run the risk of the inserted characters being
1059 interpreted in an unintended manner.
1061 The string to be inserted is the string value of I<sv>. The characters
1062 are recoded for the lexer buffer, according to how the buffer is currently
1063 being interpreted (L</lex_bufutf8>). If a string to be inserted is
1064 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1065 need to construct a scalar.
1071 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1075 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1077 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1079 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1083 =for apidoc Amx|void|lex_unstuff|char *ptr
1085 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1086 I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1087 This hides the discarded text from any lexing code that runs later,
1088 as if the text had never appeared.
1090 This is not the normal way to consume lexed text. For that, use
1097 Perl_lex_unstuff(pTHX_ char *ptr)
1101 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1102 buf = PL_parser->bufptr;
1104 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1107 bufend = PL_parser->bufend;
1109 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1110 unstuff_len = ptr - buf;
1111 Move(ptr, buf, bufend+1-ptr, char);
1112 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1113 PL_parser->bufend = bufend - unstuff_len;
1117 =for apidoc Amx|void|lex_read_to|char *ptr
1119 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1120 to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1121 performing the correct bookkeeping whenever a newline character is passed.
1122 This is the normal way to consume lexed text.
1124 Interpretation of the buffer's octets can be abstracted out by
1125 using the slightly higher-level functions L</lex_peek_unichar> and
1126 L</lex_read_unichar>.
1132 Perl_lex_read_to(pTHX_ char *ptr)
1135 PERL_ARGS_ASSERT_LEX_READ_TO;
1136 s = PL_parser->bufptr;
1137 if (ptr < s || ptr > PL_parser->bufend)
1138 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1139 for (; s != ptr; s++)
1141 CopLINE_inc(PL_curcop);
1142 PL_parser->linestart = s+1;
1144 PL_parser->bufptr = ptr;
1148 =for apidoc Amx|void|lex_discard_to|char *ptr
1150 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1151 up to I<ptr>. The remaining content of the buffer will be moved, and
1152 all pointers into the buffer updated appropriately. I<ptr> must not
1153 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1154 it is not permitted to discard text that has yet to be lexed.
1156 Normally it is not necessarily to do this directly, because it suffices to
1157 use the implicit discarding behaviour of L</lex_next_chunk> and things
1158 based on it. However, if a token stretches across multiple lines,
1159 and the lexing code has kept multiple lines of text in the buffer for
1160 that purpose, then after completion of the token it would be wise to
1161 explicitly discard the now-unneeded earlier lines, to avoid future
1162 multi-line tokens growing the buffer without bound.
1168 Perl_lex_discard_to(pTHX_ char *ptr)
1172 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1173 buf = SvPVX(PL_parser->linestr);
1175 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1178 if (ptr > PL_parser->bufptr)
1179 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1180 discard_len = ptr - buf;
1181 if (PL_parser->oldbufptr < ptr)
1182 PL_parser->oldbufptr = ptr;
1183 if (PL_parser->oldoldbufptr < ptr)
1184 PL_parser->oldoldbufptr = ptr;
1185 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1186 PL_parser->last_uni = NULL;
1187 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1188 PL_parser->last_lop = NULL;
1189 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1190 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1191 PL_parser->bufend -= discard_len;
1192 PL_parser->bufptr -= discard_len;
1193 PL_parser->oldbufptr -= discard_len;
1194 PL_parser->oldoldbufptr -= discard_len;
1195 if (PL_parser->last_uni)
1196 PL_parser->last_uni -= discard_len;
1197 if (PL_parser->last_lop)
1198 PL_parser->last_lop -= discard_len;
1202 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1204 Reads in the next chunk of text to be lexed, appending it to
1205 L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1206 looked to the end of the current chunk and wants to know more. It is
1207 usual, but not necessary, for lexing to have consumed the entirety of
1208 the current chunk at this time.
1210 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1211 chunk (i.e., the current chunk has been entirely consumed), normally the
1212 current chunk will be discarded at the same time that the new chunk is
1213 read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1214 will not be discarded. If the current chunk has not been entirely
1215 consumed, then it will not be discarded regardless of the flag.
1217 Returns true if some new text was added to the buffer, or false if the
1218 buffer has reached the end of the input text.
1223 #define LEX_FAKE_EOF 0x80000000
1226 Perl_lex_next_chunk(pTHX_ U32 flags)
1230 STRLEN old_bufend_pos, new_bufend_pos;
1231 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1232 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1233 bool got_some_for_debugger = 0;
1235 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1236 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1237 linestr = PL_parser->linestr;
1238 buf = SvPVX(linestr);
1239 if (!(flags & LEX_KEEP_PREVIOUS) &&
1240 PL_parser->bufptr == PL_parser->bufend) {
1241 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1243 if (PL_parser->last_uni != PL_parser->bufend)
1244 PL_parser->last_uni = NULL;
1245 if (PL_parser->last_lop != PL_parser->bufend)
1246 PL_parser->last_lop = NULL;
1247 last_uni_pos = last_lop_pos = 0;
1251 old_bufend_pos = PL_parser->bufend - buf;
1252 bufptr_pos = PL_parser->bufptr - buf;
1253 oldbufptr_pos = PL_parser->oldbufptr - buf;
1254 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1255 linestart_pos = PL_parser->linestart - buf;
1256 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1257 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1259 if (flags & LEX_FAKE_EOF) {
1261 } else if (!PL_parser->rsfp) {
1263 } else if (filter_gets(linestr, old_bufend_pos)) {
1265 got_some_for_debugger = 1;
1267 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1268 sv_setpvs(linestr, "");
1270 /* End of real input. Close filehandle (unless it was STDIN),
1271 * then add implicit termination.
1273 if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1274 PerlIO_clearerr(PL_parser->rsfp);
1275 else if (PL_parser->rsfp)
1276 (void)PerlIO_close(PL_parser->rsfp);
1277 PL_parser->rsfp = NULL;
1278 PL_parser->in_pod = 0;
1280 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1283 if (!PL_in_eval && PL_minus_p) {
1285 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1286 PL_minus_n = PL_minus_p = 0;
1287 } else if (!PL_in_eval && PL_minus_n) {
1288 sv_catpvs(linestr, /*{*/";}");
1291 sv_catpvs(linestr, ";");
1294 buf = SvPVX(linestr);
1295 new_bufend_pos = SvCUR(linestr);
1296 PL_parser->bufend = buf + new_bufend_pos;
1297 PL_parser->bufptr = buf + bufptr_pos;
1298 PL_parser->oldbufptr = buf + oldbufptr_pos;
1299 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1300 PL_parser->linestart = buf + linestart_pos;
1301 if (PL_parser->last_uni)
1302 PL_parser->last_uni = buf + last_uni_pos;
1303 if (PL_parser->last_lop)
1304 PL_parser->last_lop = buf + last_lop_pos;
1305 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1306 PL_curstash != PL_debstash) {
1307 /* debugger active and we're not compiling the debugger code,
1308 * so store the line into the debugger's array of lines
1310 update_debugger_info(NULL, buf+old_bufend_pos,
1311 new_bufend_pos-old_bufend_pos);
1317 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1319 Looks ahead one (Unicode) character in the text currently being lexed.
1320 Returns the codepoint (unsigned integer value) of the next character,
1321 or -1 if lexing has reached the end of the input text. To consume the
1322 peeked character, use L</lex_read_unichar>.
1324 If the next character is in (or extends into) the next chunk of input
1325 text, the next chunk will be read in. Normally the current chunk will be
1326 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1327 then the current chunk will not be discarded.
1329 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1330 is encountered, an exception is generated.
1336 Perl_lex_peek_unichar(pTHX_ U32 flags)
1340 if (flags & ~(LEX_KEEP_PREVIOUS))
1341 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1342 s = PL_parser->bufptr;
1343 bufend = PL_parser->bufend;
1349 if (!lex_next_chunk(flags))
1351 s = PL_parser->bufptr;
1352 bufend = PL_parser->bufend;
1358 len = PL_utf8skip[head];
1359 while ((STRLEN)(bufend-s) < len) {
1360 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1362 s = PL_parser->bufptr;
1363 bufend = PL_parser->bufend;
1366 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1367 if (retlen == (STRLEN)-1) {
1368 /* malformed UTF-8 */
1370 SAVESPTR(PL_warnhook);
1371 PL_warnhook = PERL_WARNHOOK_FATAL;
1372 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1378 if (!lex_next_chunk(flags))
1380 s = PL_parser->bufptr;
1387 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1389 Reads the next (Unicode) character in the text currently being lexed.
1390 Returns the codepoint (unsigned integer value) of the character read,
1391 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1392 if lexing has reached the end of the input text. To non-destructively
1393 examine the next character, use L</lex_peek_unichar> instead.
1395 If the next character is in (or extends into) the next chunk of input
1396 text, the next chunk will be read in. Normally the current chunk will be
1397 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1398 then the current chunk will not be discarded.
1400 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1401 is encountered, an exception is generated.
1407 Perl_lex_read_unichar(pTHX_ U32 flags)
1410 if (flags & ~(LEX_KEEP_PREVIOUS))
1411 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1412 c = lex_peek_unichar(flags);
1415 CopLINE_inc(PL_curcop);
1416 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1422 =for apidoc Amx|void|lex_read_space|U32 flags
1424 Reads optional spaces, in Perl style, in the text currently being
1425 lexed. The spaces may include ordinary whitespace characters and
1426 Perl-style comments. C<#line> directives are processed if encountered.
1427 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1428 at a non-space character (or the end of the input text).
1430 If spaces extend into the next chunk of input text, the next chunk will
1431 be read in. Normally the current chunk will be discarded at the same
1432 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1433 chunk will not be discarded.
1438 #define LEX_NO_NEXT_CHUNK 0x80000000
1441 Perl_lex_read_space(pTHX_ U32 flags)
1444 bool need_incline = 0;
1445 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1446 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1449 sv_free(PL_skipwhite);
1450 PL_skipwhite = NULL;
1453 PL_skipwhite = newSVpvs("");
1454 #endif /* PERL_MAD */
1455 s = PL_parser->bufptr;
1456 bufend = PL_parser->bufend;
1462 } while (!(c == '\n' || (c == 0 && s == bufend)));
1463 } else if (c == '\n') {
1465 PL_parser->linestart = s;
1470 } else if (isSPACE(c)) {
1472 } else if (c == 0 && s == bufend) {
1476 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1477 #endif /* PERL_MAD */
1478 if (flags & LEX_NO_NEXT_CHUNK)
1480 PL_parser->bufptr = s;
1481 CopLINE_inc(PL_curcop);
1482 got_more = lex_next_chunk(flags);
1483 CopLINE_dec(PL_curcop);
1484 s = PL_parser->bufptr;
1485 bufend = PL_parser->bufend;
1488 if (need_incline && PL_parser->rsfp) {
1498 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1499 #endif /* PERL_MAD */
1500 PL_parser->bufptr = s;
1505 * This subroutine has nothing to do with tilting, whether at windmills
1506 * or pinball tables. Its name is short for "increment line". It
1507 * increments the current line number in CopLINE(PL_curcop) and checks
1508 * to see whether the line starts with a comment of the form
1509 * # line 500 "foo.pm"
1510 * If so, it sets the current line number and file to the values in the comment.
1514 S_incline(pTHX_ const char *s)
1521 PERL_ARGS_ASSERT_INCLINE;
1523 CopLINE_inc(PL_curcop);
1526 while (SPACE_OR_TAB(*s))
1528 if (strnEQ(s, "line", 4))
1532 if (SPACE_OR_TAB(*s))
1536 while (SPACE_OR_TAB(*s))
1544 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1546 while (SPACE_OR_TAB(*s))
1548 if (*s == '"' && (t = strchr(s+1, '"'))) {
1554 while (!isSPACE(*t))
1558 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1560 if (*e != '\n' && *e != '\0')
1561 return; /* false alarm */
1564 const STRLEN len = t - s;
1565 #ifndef USE_ITHREADS
1566 SV *const temp_sv = CopFILESV(PL_curcop);
1571 cf = SvPVX(temp_sv);
1572 tmplen = SvCUR(temp_sv);
1578 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
1579 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1580 * to *{"::_<newfilename"} */
1581 /* However, the long form of evals is only turned on by the
1582 debugger - usually they're "(eval %lu)" */
1586 STRLEN tmplen2 = len;
1587 if (tmplen + 2 <= sizeof smallbuf)
1590 Newx(tmpbuf, tmplen + 2, char);
1593 memcpy(tmpbuf + 2, cf, tmplen);
1595 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1600 if (tmplen2 + 2 <= sizeof smallbuf)
1603 Newx(tmpbuf2, tmplen2 + 2, char);
1605 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1606 /* Either they malloc'd it, or we malloc'd it,
1607 so no prefix is present in ours. */
1612 memcpy(tmpbuf2 + 2, s, tmplen2);
1615 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1617 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1618 /* adjust ${"::_<newfilename"} to store the new file name */
1619 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1620 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1621 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1624 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1626 if (tmpbuf != smallbuf) Safefree(tmpbuf);
1629 CopFILE_free(PL_curcop);
1630 CopFILE_setn(PL_curcop, s, len);
1632 CopLINE_set(PL_curcop, atoi(n)-1);
1636 /* skip space before PL_thistoken */
1639 S_skipspace0(pTHX_ register char *s)
1641 PERL_ARGS_ASSERT_SKIPSPACE0;
1648 PL_thiswhite = newSVpvs("");
1649 sv_catsv(PL_thiswhite, PL_skipwhite);
1650 sv_free(PL_skipwhite);
1653 PL_realtokenstart = s - SvPVX(PL_linestr);
1657 /* skip space after PL_thistoken */
1660 S_skipspace1(pTHX_ register char *s)
1662 const char *start = s;
1663 I32 startoff = start - SvPVX(PL_linestr);
1665 PERL_ARGS_ASSERT_SKIPSPACE1;
1670 start = SvPVX(PL_linestr) + startoff;
1671 if (!PL_thistoken && PL_realtokenstart >= 0) {
1672 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1673 PL_thistoken = newSVpvn(tstart, start - tstart);
1675 PL_realtokenstart = -1;
1678 PL_nextwhite = newSVpvs("");
1679 sv_catsv(PL_nextwhite, PL_skipwhite);
1680 sv_free(PL_skipwhite);
1687 S_skipspace2(pTHX_ register char *s, SV **svp)
1690 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1691 const I32 startoff = s - SvPVX(PL_linestr);
1693 PERL_ARGS_ASSERT_SKIPSPACE2;
1696 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1697 if (!PL_madskills || !svp)
1699 start = SvPVX(PL_linestr) + startoff;
1700 if (!PL_thistoken && PL_realtokenstart >= 0) {
1701 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1702 PL_thistoken = newSVpvn(tstart, start - tstart);
1703 PL_realtokenstart = -1;
1707 *svp = newSVpvs("");
1708 sv_setsv(*svp, PL_skipwhite);
1709 sv_free(PL_skipwhite);
1718 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1720 AV *av = CopFILEAVx(PL_curcop);
1722 SV * const sv = newSV_type(SVt_PVMG);
1724 sv_setsv(sv, orig_sv);
1726 sv_setpvn(sv, buf, len);
1729 av_store(av, (I32)CopLINE(PL_curcop), sv);
1735 * Called to gobble the appropriate amount and type of whitespace.
1736 * Skips comments as well.
1740 S_skipspace(pTHX_ register char *s)
1744 #endif /* PERL_MAD */
1745 PERL_ARGS_ASSERT_SKIPSPACE;
1748 sv_free(PL_skipwhite);
1749 PL_skipwhite = NULL;
1751 #endif /* PERL_MAD */
1752 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1753 while (s < PL_bufend && SPACE_OR_TAB(*s))
1756 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1758 lex_read_space(LEX_KEEP_PREVIOUS |
1759 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1760 LEX_NO_NEXT_CHUNK : 0));
1762 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1763 if (PL_linestart > PL_bufptr)
1764 PL_bufptr = PL_linestart;
1769 PL_skipwhite = newSVpvn(start, s-start);
1770 #endif /* PERL_MAD */
1776 * Check the unary operators to ensure there's no ambiguity in how they're
1777 * used. An ambiguous piece of code would be:
1779 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1780 * the +5 is its argument.
1790 if (PL_oldoldbufptr != PL_last_uni)
1792 while (isSPACE(*PL_last_uni))
1795 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1797 if ((t = strchr(s, '(')) && t < PL_bufptr)
1800 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1801 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1802 (int)(s - PL_last_uni), PL_last_uni);
1806 * LOP : macro to build a list operator. Its behaviour has been replaced
1807 * with a subroutine, S_lop() for which LOP is just another name.
1810 #define LOP(f,x) return lop(f,x,s)
1814 * Build a list operator (or something that might be one). The rules:
1815 * - if we have a next token, then it's a list operator [why?]
1816 * - if the next thing is an opening paren, then it's a function
1817 * - else it's a list operator
1821 S_lop(pTHX_ I32 f, int x, char *s)
1825 PERL_ARGS_ASSERT_LOP;
1831 PL_last_lop = PL_oldbufptr;
1832 PL_last_lop_op = (OPCODE)f;
1841 return REPORT(FUNC);
1844 return REPORT(FUNC);
1847 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1848 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1849 return REPORT(LSTOP);
1856 * Sets up for an eventual force_next(). start_force(0) basically does
1857 * an unshift, while start_force(-1) does a push. yylex removes items
1862 S_start_force(pTHX_ int where)
1866 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1867 where = PL_lasttoke;
1868 assert(PL_curforce < 0 || PL_curforce == where);
1869 if (PL_curforce != where) {
1870 for (i = PL_lasttoke; i > where; --i) {
1871 PL_nexttoke[i] = PL_nexttoke[i-1];
1875 if (PL_curforce < 0) /* in case of duplicate start_force() */
1876 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1877 PL_curforce = where;
1880 curmad('^', newSVpvs(""));
1881 CURMAD('_', PL_nextwhite);
1886 S_curmad(pTHX_ char slot, SV *sv)
1892 if (PL_curforce < 0)
1893 where = &PL_thismad;
1895 where = &PL_nexttoke[PL_curforce].next_mad;
1901 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1903 else if (PL_encoding) {
1904 sv_recode_to_utf8(sv, PL_encoding);
1909 /* keep a slot open for the head of the list? */
1910 if (slot != '_' && *where && (*where)->mad_key == '^') {
1911 (*where)->mad_key = slot;
1912 sv_free(MUTABLE_SV(((*where)->mad_val)));
1913 (*where)->mad_val = (void*)sv;
1916 addmad(newMADsv(slot, sv), where, 0);
1919 # define start_force(where) NOOP
1920 # define curmad(slot, sv) NOOP
1925 * When the lexer realizes it knows the next token (for instance,
1926 * it is reordering tokens for the parser) then it can call S_force_next
1927 * to know what token to return the next time the lexer is called. Caller
1928 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1929 * and possibly PL_expect to ensure the lexer handles the token correctly.
1933 S_force_next(pTHX_ I32 type)
1938 PerlIO_printf(Perl_debug_log, "### forced token:\n");
1939 tokereport(type, &NEXTVAL_NEXTTOKE);
1943 if (PL_curforce < 0)
1944 start_force(PL_lasttoke);
1945 PL_nexttoke[PL_curforce].next_type = type;
1946 if (PL_lex_state != LEX_KNOWNEXT)
1947 PL_lex_defer = PL_lex_state;
1948 PL_lex_state = LEX_KNOWNEXT;
1949 PL_lex_expect = PL_expect;
1952 PL_nexttype[PL_nexttoke] = type;
1954 if (PL_lex_state != LEX_KNOWNEXT) {
1955 PL_lex_defer = PL_lex_state;
1956 PL_lex_expect = PL_expect;
1957 PL_lex_state = LEX_KNOWNEXT;
1965 int yyc = PL_parser->yychar;
1966 if (yyc != YYEMPTY) {
1969 NEXTVAL_NEXTTOKE = PL_parser->yylval;
1970 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
1971 PL_lex_allbrackets--;
1973 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1974 } else if (yyc == '('/*)*/) {
1975 PL_lex_allbrackets--;
1980 PL_parser->yychar = YYEMPTY;
1985 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1988 SV * const sv = newSVpvn_utf8(start, len,
1991 && !is_ascii_string((const U8*)start, len)
1992 && is_utf8_string((const U8*)start, len));
1998 * When the lexer knows the next thing is a word (for instance, it has
1999 * just seen -> and it knows that the next char is a word char, then
2000 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2004 * char *start : buffer position (must be within PL_linestr)
2005 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2006 * int check_keyword : if true, Perl checks to make sure the word isn't
2007 * a keyword (do this if the word is a label, e.g. goto FOO)
2008 * int allow_pack : if true, : characters will also be allowed (require,
2009 * use, etc. do this)
2010 * int allow_initial_tick : used by the "sub" lexer only.
2014 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
2020 PERL_ARGS_ASSERT_FORCE_WORD;
2022 start = SKIPSPACE1(start);
2024 if (isIDFIRST_lazy_if(s,UTF) ||
2025 (allow_pack && *s == ':') ||
2026 (allow_initial_tick && *s == '\'') )
2028 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2029 if (check_keyword && keyword(PL_tokenbuf, len, 0))
2031 start_force(PL_curforce);
2033 curmad('X', newSVpvn(start,s-start));
2034 if (token == METHOD) {
2039 PL_expect = XOPERATOR;
2043 curmad('g', newSVpvs( "forced" ));
2044 NEXTVAL_NEXTTOKE.opval
2045 = (OP*)newSVOP(OP_CONST,0,
2046 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2047 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2055 * Called when the lexer wants $foo *foo &foo etc, but the program
2056 * text only contains the "foo" portion. The first argument is a pointer
2057 * to the "foo", and the second argument is the type symbol to prefix.
2058 * Forces the next token to be a "WORD".
2059 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2063 S_force_ident(pTHX_ register const char *s, int kind)
2067 PERL_ARGS_ASSERT_FORCE_IDENT;
2070 const STRLEN len = strlen(s);
2071 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
2072 start_force(PL_curforce);
2073 NEXTVAL_NEXTTOKE.opval = o;
2076 o->op_private = OPpCONST_ENTERED;
2077 /* XXX see note in pp_entereval() for why we forgo typo
2078 warnings if the symbol must be introduced in an eval.
2080 gv_fetchpvn_flags(s, len,
2081 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2083 kind == '$' ? SVt_PV :
2084 kind == '@' ? SVt_PVAV :
2085 kind == '%' ? SVt_PVHV :
2093 Perl_str_to_version(pTHX_ SV *sv)
2098 const char *start = SvPV_const(sv,len);
2099 const char * const end = start + len;
2100 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2102 PERL_ARGS_ASSERT_STR_TO_VERSION;
2104 while (start < end) {
2108 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2113 retval += ((NV)n)/nshift;
2122 * Forces the next token to be a version number.
2123 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2124 * and if "guessing" is TRUE, then no new token is created (and the caller
2125 * must use an alternative parsing method).
2129 S_force_version(pTHX_ char *s, int guessing)
2135 I32 startoff = s - SvPVX(PL_linestr);
2138 PERL_ARGS_ASSERT_FORCE_VERSION;
2146 while (isDIGIT(*d) || *d == '_' || *d == '.')
2150 start_force(PL_curforce);
2151 curmad('X', newSVpvn(s,d-s));
2154 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2156 #ifdef USE_LOCALE_NUMERIC
2157 char *loc = setlocale(LC_NUMERIC, "C");
2159 s = scan_num(s, &pl_yylval);
2160 #ifdef USE_LOCALE_NUMERIC
2161 setlocale(LC_NUMERIC, loc);
2163 version = pl_yylval.opval;
2164 ver = cSVOPx(version)->op_sv;
2165 if (SvPOK(ver) && !SvNIOK(ver)) {
2166 SvUPGRADE(ver, SVt_PVNV);
2167 SvNV_set(ver, str_to_version(ver));
2168 SvNOK_on(ver); /* hint that it is a version */
2171 else if (guessing) {
2174 sv_free(PL_nextwhite); /* let next token collect whitespace */
2176 s = SvPVX(PL_linestr) + startoff;
2184 if (PL_madskills && !version) {
2185 sv_free(PL_nextwhite); /* let next token collect whitespace */
2187 s = SvPVX(PL_linestr) + startoff;
2190 /* NOTE: The parser sees the package name and the VERSION swapped */
2191 start_force(PL_curforce);
2192 NEXTVAL_NEXTTOKE.opval = version;
2199 * S_force_strict_version
2200 * Forces the next token to be a version number using strict syntax rules.
2204 S_force_strict_version(pTHX_ char *s)
2209 I32 startoff = s - SvPVX(PL_linestr);
2211 const char *errstr = NULL;
2213 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2215 while (isSPACE(*s)) /* leading whitespace */
2218 if (is_STRICT_VERSION(s,&errstr)) {
2220 s = (char *)scan_version(s, ver, 0);
2221 version = newSVOP(OP_CONST, 0, ver);
2223 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2224 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2228 yyerror(errstr); /* version required */
2233 if (PL_madskills && !version) {
2234 sv_free(PL_nextwhite); /* let next token collect whitespace */
2236 s = SvPVX(PL_linestr) + startoff;
2239 /* NOTE: The parser sees the package name and the VERSION swapped */
2240 start_force(PL_curforce);
2241 NEXTVAL_NEXTTOKE.opval = version;
2249 * Tokenize a quoted string passed in as an SV. It finds the next
2250 * chunk, up to end of string or a backslash. It may make a new
2251 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2256 S_tokeq(pTHX_ SV *sv)
2260 register char *send;
2265 PERL_ARGS_ASSERT_TOKEQ;
2270 s = SvPV_force(sv, len);
2271 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2274 /* This is relying on the SV being "well formed" with a trailing '\0' */
2275 while (s < send && !(*s == '\\' && s[1] == '\\'))
2280 if ( PL_hints & HINT_NEW_STRING ) {
2281 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2285 if (s + 1 < send && (s[1] == '\\'))
2286 s++; /* all that, just for this */
2291 SvCUR_set(sv, d - SvPVX_const(sv));
2293 if ( PL_hints & HINT_NEW_STRING )
2294 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2299 * Now come three functions related to double-quote context,
2300 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2301 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2302 * interact with PL_lex_state, and create fake ( ... ) argument lists
2303 * to handle functions and concatenation.
2304 * They assume that whoever calls them will be setting up a fake
2305 * join call, because each subthing puts a ',' after it. This lets
2308 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2310 * (I'm not sure whether the spurious commas at the end of lcfirst's
2311 * arguments and join's arguments are created or not).
2316 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2318 * Pattern matching will set PL_lex_op to the pattern-matching op to
2319 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2321 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2323 * Everything else becomes a FUNC.
2325 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2326 * had an OP_CONST or OP_READLINE). This just sets us up for a
2327 * call to S_sublex_push().
2331 S_sublex_start(pTHX)
2334 register const I32 op_type = pl_yylval.ival;
2336 if (op_type == OP_NULL) {
2337 pl_yylval.opval = PL_lex_op;
2341 if (op_type == OP_CONST || op_type == OP_READLINE) {
2342 SV *sv = tokeq(PL_lex_stuff);
2344 if (SvTYPE(sv) == SVt_PVIV) {
2345 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2347 const char * const p = SvPV_const(sv, len);
2348 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2352 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2353 PL_lex_stuff = NULL;
2354 /* Allow <FH> // "foo" */
2355 if (op_type == OP_READLINE)
2356 PL_expect = XTERMORDORDOR;
2359 else if (op_type == OP_BACKTICK && PL_lex_op) {
2360 /* readpipe() vas overriden */
2361 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2362 pl_yylval.opval = PL_lex_op;
2364 PL_lex_stuff = NULL;
2368 PL_sublex_info.super_state = PL_lex_state;
2369 PL_sublex_info.sub_inwhat = (U16)op_type;
2370 PL_sublex_info.sub_op = PL_lex_op;
2371 PL_lex_state = LEX_INTERPPUSH;
2375 pl_yylval.opval = PL_lex_op;
2385 * Create a new scope to save the lexing state. The scope will be
2386 * ended in S_sublex_done. Returns a '(', starting the function arguments
2387 * to the uc, lc, etc. found before.
2388 * Sets PL_lex_state to LEX_INTERPCONCAT.
2397 PL_lex_state = PL_sublex_info.super_state;
2398 SAVEBOOL(PL_lex_dojoin);
2399 SAVEI32(PL_lex_brackets);
2400 SAVEI32(PL_lex_allbrackets);
2401 SAVEI8(PL_lex_fakeeof);
2402 SAVEI32(PL_lex_casemods);
2403 SAVEI32(PL_lex_starts);
2404 SAVEI8(PL_lex_state);
2405 SAVEVPTR(PL_lex_inpat);
2406 SAVEI16(PL_lex_inwhat);
2407 SAVECOPLINE(PL_curcop);
2408 SAVEPPTR(PL_bufptr);
2409 SAVEPPTR(PL_bufend);
2410 SAVEPPTR(PL_oldbufptr);
2411 SAVEPPTR(PL_oldoldbufptr);
2412 SAVEPPTR(PL_last_lop);
2413 SAVEPPTR(PL_last_uni);
2414 SAVEPPTR(PL_linestart);
2415 SAVESPTR(PL_linestr);
2416 SAVEGENERICPV(PL_lex_brackstack);
2417 SAVEGENERICPV(PL_lex_casestack);
2419 PL_linestr = PL_lex_stuff;
2420 PL_lex_stuff = NULL;
2422 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2423 = SvPVX(PL_linestr);
2424 PL_bufend += SvCUR(PL_linestr);
2425 PL_last_lop = PL_last_uni = NULL;
2426 SAVEFREESV(PL_linestr);
2428 PL_lex_dojoin = FALSE;
2429 PL_lex_brackets = 0;
2430 PL_lex_allbrackets = 0;
2431 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2432 Newx(PL_lex_brackstack, 120, char);
2433 Newx(PL_lex_casestack, 12, char);
2434 PL_lex_casemods = 0;
2435 *PL_lex_casestack = '\0';
2437 PL_lex_state = LEX_INTERPCONCAT;
2438 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2440 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2441 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2442 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2443 PL_lex_inpat = PL_sublex_info.sub_op;
2445 PL_lex_inpat = NULL;
2452 * Restores lexer state after a S_sublex_push.
2459 if (!PL_lex_starts++) {
2460 SV * const sv = newSVpvs("");
2461 if (SvUTF8(PL_linestr))
2463 PL_expect = XOPERATOR;
2464 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2468 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2469 PL_lex_state = LEX_INTERPCASEMOD;
2473 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2474 assert(PL_lex_inwhat != OP_TRANSR);
2475 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2476 PL_linestr = PL_lex_repl;
2478 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2479 PL_bufend += SvCUR(PL_linestr);
2480 PL_last_lop = PL_last_uni = NULL;
2481 SAVEFREESV(PL_linestr);
2482 PL_lex_dojoin = FALSE;
2483 PL_lex_brackets = 0;
2484 PL_lex_allbrackets = 0;
2485 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2486 PL_lex_casemods = 0;
2487 *PL_lex_casestack = '\0';
2489 if (SvEVALED(PL_lex_repl)) {
2490 PL_lex_state = LEX_INTERPNORMAL;
2492 /* we don't clear PL_lex_repl here, so that we can check later
2493 whether this is an evalled subst; that means we rely on the
2494 logic to ensure sublex_done() is called again only via the
2495 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2498 PL_lex_state = LEX_INTERPCONCAT;
2508 PL_endwhite = newSVpvs("");
2509 sv_catsv(PL_endwhite, PL_thiswhite);
2513 sv_setpvs(PL_thistoken,"");
2515 PL_realtokenstart = -1;
2519 PL_bufend = SvPVX(PL_linestr);
2520 PL_bufend += SvCUR(PL_linestr);
2521 PL_expect = XOPERATOR;
2522 PL_sublex_info.sub_inwhat = 0;
2530 Extracts a pattern, double-quoted string, or transliteration. This
2533 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2534 processing a pattern (PL_lex_inpat is true), a transliteration
2535 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2537 Returns a pointer to the character scanned up to. If this is
2538 advanced from the start pointer supplied (i.e. if anything was
2539 successfully parsed), will leave an OP for the substring scanned
2540 in pl_yylval. Caller must intuit reason for not parsing further
2541 by looking at the next characters herself.
2545 constants: \N{NAME} only
2546 case and quoting: \U \Q \E
2547 stops on @ and $, but not for $ as tail anchor
2549 In transliterations:
2550 characters are VERY literal, except for - not at the start or end
2551 of the string, which indicates a range. If the range is in bytes,
2552 scan_const expands the range to the full set of intermediate
2553 characters. If the range is in utf8, the hyphen is replaced with
2554 a certain range mark which will be handled by pmtrans() in op.c.
2556 In double-quoted strings:
2558 double-quoted style: \r and \n
2559 constants: \x31, etc.
2560 deprecated backrefs: \1 (in substitution replacements)
2561 case and quoting: \U \Q \E
2564 scan_const does *not* construct ops to handle interpolated strings.
2565 It stops processing as soon as it finds an embedded $ or @ variable
2566 and leaves it to the caller to work out what's going on.
2568 embedded arrays (whether in pattern or not) could be:
2569 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2571 $ in double-quoted strings must be the symbol of an embedded scalar.
2573 $ in pattern could be $foo or could be tail anchor. Assumption:
2574 it's a tail anchor if $ is the last thing in the string, or if it's
2575 followed by one of "()| \r\n\t"
2577 \1 (backreferences) are turned into $1
2579 The structure of the code is
2580 while (there's a character to process) {
2581 handle transliteration ranges
2582 skip regexp comments /(?#comment)/ and codes /(?{code})/
2583 skip #-initiated comments in //x patterns
2584 check for embedded arrays
2585 check for embedded scalars
2587 deprecate \1 in substitution replacements
2588 handle string-changing backslashes \l \U \Q \E, etc.
2589 switch (what was escaped) {
2590 handle \- in a transliteration (becomes a literal -)
2591 if a pattern and not \N{, go treat as regular character
2592 handle \132 (octal characters)
2593 handle \x15 and \x{1234} (hex characters)
2594 handle \N{name} (named characters, also \N{3,5} in a pattern)
2595 handle \cV (control characters)
2596 handle printf-style backslashes (\f, \r, \n, etc)
2599 } (end if backslash)
2600 handle regular character
2601 } (end while character to read)
2606 S_scan_const(pTHX_ char *start)
2609 register char *send = PL_bufend; /* end of the constant */
2610 SV *sv = newSV(send - start); /* sv for the constant. See
2611 note below on sizing. */
2612 register char *s = start; /* start of the constant */
2613 register char *d = SvPVX(sv); /* destination for copies */
2614 bool dorange = FALSE; /* are we in a translit range? */
2615 bool didrange = FALSE; /* did we just finish a range? */
2616 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
2617 I32 this_utf8 = UTF; /* Is the source string assumed
2618 to be UTF8? But, this can
2619 show as true when the source
2620 isn't utf8, as for example
2621 when it is entirely composed
2624 /* Note on sizing: The scanned constant is placed into sv, which is
2625 * initialized by newSV() assuming one byte of output for every byte of
2626 * input. This routine expects newSV() to allocate an extra byte for a
2627 * trailing NUL, which this routine will append if it gets to the end of
2628 * the input. There may be more bytes of input than output (eg., \N{LATIN
2629 * CAPITAL LETTER A}), or more output than input if the constant ends up
2630 * recoded to utf8, but each time a construct is found that might increase
2631 * the needed size, SvGROW() is called. Its size parameter each time is
2632 * based on the best guess estimate at the time, namely the length used so
2633 * far, plus the length the current construct will occupy, plus room for
2634 * the trailing NUL, plus one byte for every input byte still unscanned */
2638 UV literal_endpoint = 0;
2639 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2642 PERL_ARGS_ASSERT_SCAN_CONST;
2644 assert(PL_lex_inwhat != OP_TRANSR);
2645 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2646 /* If we are doing a trans and we know we want UTF8 set expectation */
2647 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2648 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2652 while (s < send || dorange) {
2654 /* get transliterations out of the way (they're most literal) */
2655 if (PL_lex_inwhat == OP_TRANS) {
2656 /* expand a range A-Z to the full set of characters. AIE! */
2658 I32 i; /* current expanded character */
2659 I32 min; /* first character in range */
2660 I32 max; /* last character in range */
2671 char * const c = (char*)utf8_hop((U8*)d, -1);
2675 *c = (char)UTF_TO_NATIVE(0xff);
2676 /* mark the range as done, and continue */
2682 i = d - SvPVX_const(sv); /* remember current offset */
2685 SvLEN(sv) + (has_utf8 ?
2686 (512 - UTF_CONTINUATION_MARK +
2689 /* How many two-byte within 0..255: 128 in UTF-8,
2690 * 96 in UTF-8-mod. */
2692 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
2694 d = SvPVX(sv) + i; /* refresh d after realloc */
2698 for (j = 0; j <= 1; j++) {
2699 char * const c = (char*)utf8_hop((U8*)d, -1);
2700 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2706 max = (U8)0xff; /* only to \xff */
2707 uvmax = uv; /* \x{100} to uvmax */
2709 d = c; /* eat endpoint chars */
2714 d -= 2; /* eat the first char and the - */
2715 min = (U8)*d; /* first char in range */
2716 max = (U8)d[1]; /* last char in range */
2723 "Invalid range \"%c-%c\" in transliteration operator",
2724 (char)min, (char)max);
2728 if (literal_endpoint == 2 &&
2729 ((isLOWER(min) && isLOWER(max)) ||
2730 (isUPPER(min) && isUPPER(max)))) {
2732 for (i = min; i <= max; i++)
2734 *d++ = NATIVE_TO_NEED(has_utf8,i);
2736 for (i = min; i <= max; i++)
2738 *d++ = NATIVE_TO_NEED(has_utf8,i);
2743 for (i = min; i <= max; i++)
2746 const U8 ch = (U8)NATIVE_TO_UTF(i);
2747 if (UNI_IS_INVARIANT(ch))
2750 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2751 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2760 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2762 *d++ = (char)UTF_TO_NATIVE(0xff);
2764 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2768 /* mark the range as done, and continue */
2772 literal_endpoint = 0;
2777 /* range begins (ignore - as first or last char) */
2778 else if (*s == '-' && s+1 < send && s != start) {
2780 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2787 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
2797 literal_endpoint = 0;
2798 native_range = TRUE;
2803 /* if we get here, we're not doing a transliteration */
2805 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2806 except for the last char, which will be done separately. */
2807 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2809 while (s+1 < send && *s != ')')
2810 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2812 else if (s[2] == '{' /* This should match regcomp.c */
2813 || (s[2] == '?' && s[3] == '{'))
2816 char *regparse = s + (s[2] == '{' ? 3 : 4);
2819 while (count && (c = *regparse)) {
2820 if (c == '\\' && regparse[1])
2828 if (*regparse != ')')
2829 regparse--; /* Leave one char for continuation. */
2830 while (s < regparse)
2831 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2835 /* likewise skip #-initiated comments in //x patterns */
2836 else if (*s == '#' && PL_lex_inpat &&
2837 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
2838 while (s+1 < send && *s != '\n')
2839 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2842 /* check for embedded arrays
2843 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2845 else if (*s == '@' && s[1]) {
2846 if (isALNUM_lazy_if(s+1,UTF))
2848 if (strchr(":'{$", s[1]))
2850 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2851 break; /* in regexp, neither @+ nor @- are interpolated */
2854 /* check for embedded scalars. only stop if we're sure it's a
2857 else if (*s == '$') {
2858 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
2860 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2862 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2863 "Possible unintended interpolation of $\\ in regex");
2865 break; /* in regexp, $ might be tail anchor */
2869 /* End of else if chain - OP_TRANS rejoin rest */
2872 if (*s == '\\' && s+1 < send) {
2873 char* e; /* Can be used for ending '}', etc. */
2877 /* warn on \1 - \9 in substitution replacements, but note that \11
2878 * is an octal; and \19 is \1 followed by '9' */
2879 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2880 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2882 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2887 /* string-change backslash escapes */
2888 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2892 /* In a pattern, process \N, but skip any other backslash escapes.
2893 * This is because we don't want to translate an escape sequence
2894 * into a meta symbol and have the regex compiler use the meta
2895 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
2896 * in spite of this, we do have to process \N here while the proper
2897 * charnames handler is in scope. See bugs #56444 and #62056.
2898 * There is a complication because \N in a pattern may also stand
2899 * for 'match a non-nl', and not mean a charname, in which case its
2900 * processing should be deferred to the regex compiler. To be a
2901 * charname it must be followed immediately by a '{', and not look
2902 * like \N followed by a curly quantifier, i.e., not something like
2903 * \N{3,}. regcurly returns a boolean indicating if it is a legal
2905 else if (PL_lex_inpat
2908 || regcurly(s + 1)))
2910 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2911 goto default_action;
2916 /* quoted - in transliterations */
2918 if (PL_lex_inwhat == OP_TRANS) {
2925 if ((isALPHA(*s) || isDIGIT(*s)))
2926 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2927 "Unrecognized escape \\%c passed through",
2929 /* default action is to copy the quoted character */
2930 goto default_action;
2933 /* eg. \132 indicates the octal constant 0132 */
2934 case '0': case '1': case '2': case '3':
2935 case '4': case '5': case '6': case '7':
2939 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2942 goto NUM_ESCAPE_INSERT;
2944 /* eg. \o{24} indicates the octal constant \024 */
2950 bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
2956 goto NUM_ESCAPE_INSERT;
2959 /* eg. \x24 indicates the hex constant 0x24 */
2963 char* const e = strchr(s, '}');
2964 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2965 PERL_SCAN_DISALLOW_PREFIX;
2970 yyerror("Missing right brace on \\x{}");
2974 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2980 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
2981 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
2987 /* Insert oct or hex escaped character. There will always be
2988 * enough room in sv since such escapes will be longer than any
2989 * UTF-8 sequence they can end up as, except if they force us
2990 * to recode the rest of the string into utf8 */
2992 /* Here uv is the ordinal of the next character being added in
2993 * unicode (converted from native). */
2994 if (!UNI_IS_INVARIANT(uv)) {
2995 if (!has_utf8 && uv > 255) {
2996 /* Might need to recode whatever we have accumulated so
2997 * far if it contains any chars variant in utf8 or
3000 SvCUR_set(sv, d - SvPVX_const(sv));
3003 /* See Note on sizing above. */
3004 sv_utf8_upgrade_flags_grow(sv,
3005 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3006 UNISKIP(uv) + (STRLEN)(send - s) + 1);
3007 d = SvPVX(sv) + SvCUR(sv);
3012 d = (char*)uvuni_to_utf8((U8*)d, uv);
3013 if (PL_lex_inwhat == OP_TRANS &&
3014 PL_sublex_info.sub_op) {
3015 PL_sublex_info.sub_op->op_private |=
3016 (PL_lex_repl ? OPpTRANS_FROM_UTF
3020 if (uv > 255 && !dorange)
3021 native_range = FALSE;
3034 /* In a non-pattern \N must be a named character, like \N{LATIN
3035 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3036 * mean to match a non-newline. For non-patterns, named
3037 * characters are converted to their string equivalents. In
3038 * patterns, named characters are not converted to their
3039 * ultimate forms for the same reasons that other escapes
3040 * aren't. Instead, they are converted to the \N{U+...} form
3041 * to get the value from the charnames that is in effect right
3042 * now, while preserving the fact that it was a named character
3043 * so that the regex compiler knows this */
3045 /* This section of code doesn't generally use the
3046 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
3047 * a close examination of this macro and determined it is a
3048 * no-op except on utfebcdic variant characters. Every
3049 * character generated by this that would normally need to be
3050 * enclosed by this macro is invariant, so the macro is not
3051 * needed, and would complicate use of copy(). XXX There are
3052 * other parts of this file where the macro is used
3053 * inconsistently, but are saved by it being a no-op */
3055 /* The structure of this section of code (besides checking for
3056 * errors and upgrading to utf8) is:
3057 * Further disambiguate between the two meanings of \N, and if
3058 * not a charname, go process it elsewhere
3059 * If of form \N{U+...}, pass it through if a pattern;
3060 * otherwise convert to utf8
3061 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3062 * pattern; otherwise convert to utf8 */
3064 /* Here, s points to the 'N'; the test below is guaranteed to
3065 * succeed if we are being called on a pattern as we already
3066 * know from a test above that the next character is a '{'.
3067 * On a non-pattern \N must mean 'named sequence, which
3068 * requires braces */
3071 yyerror("Missing braces on \\N{}");
3076 /* If there is no matching '}', it is an error. */
3077 if (! (e = strchr(s, '}'))) {
3078 if (! PL_lex_inpat) {
3079 yyerror("Missing right brace on \\N{}");
3081 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3086 /* Here it looks like a named character */
3090 /* XXX This block is temporary code. \N{} implies that the
3091 * pattern is to have Unicode semantics, and therefore
3092 * currently has to be encoded in utf8. By putting it in
3093 * utf8 now, we save a whole pass in the regular expression
3094 * compiler. Once that code is changed so Unicode
3095 * semantics doesn't necessarily have to be in utf8, this
3096 * block should be removed */
3098 SvCUR_set(sv, d - SvPVX_const(sv));
3101 /* See Note on sizing above. */
3102 sv_utf8_upgrade_flags_grow(sv,
3103 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3104 /* 5 = '\N{' + cur char + NUL */
3105 (STRLEN)(send - s) + 5);
3106 d = SvPVX(sv) + SvCUR(sv);
3111 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3112 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3113 | PERL_SCAN_DISALLOW_PREFIX;
3116 /* For \N{U+...}, the '...' is a unicode value even on
3117 * EBCDIC machines */
3118 s += 2; /* Skip to next char after the 'U+' */
3120 uv = grok_hex(s, &len, &flags, NULL);
3121 if (len == 0 || len != (STRLEN)(e - s)) {
3122 yyerror("Invalid hexadecimal number in \\N{U+...}");
3129 /* Pass through to the regex compiler unchanged. The
3130 * reason we evaluated the number above is to make sure
3131 * there wasn't a syntax error. */
3132 s -= 5; /* Include the '\N{U+' */
3133 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3136 else { /* Not a pattern: convert the hex to string */
3138 /* If destination is not in utf8, unconditionally
3139 * recode it to be so. This is because \N{} implies
3140 * Unicode semantics, and scalars have to be in utf8
3141 * to guarantee those semantics */
3143 SvCUR_set(sv, d - SvPVX_const(sv));
3146 /* See Note on sizing above. */
3147 sv_utf8_upgrade_flags_grow(
3149 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3150 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3151 d = SvPVX(sv) + SvCUR(sv);
3155 /* Add the string to the output */
3156 if (UNI_IS_INVARIANT(uv)) {
3159 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3162 else { /* Here is \N{NAME} but not \N{U+...}. */
3164 SV *res; /* result from charnames */
3165 const char *str; /* the string in 'res' */
3166 STRLEN len; /* its length */
3168 /* Get the value for NAME */
3169 res = newSVpvn(s, e - s);
3170 res = new_constant( NULL, 0, "charnames",
3171 /* includes all of: \N{...} */
3172 res, NULL, s - 3, e - s + 4 );
3174 /* Most likely res will be in utf8 already since the
3175 * standard charnames uses pack U, but a custom translator
3176 * can leave it otherwise, so make sure. XXX This can be
3177 * revisited to not have charnames use utf8 for characters
3178 * that don't need it when regexes don't have to be in utf8
3179 * for Unicode semantics. If doing so, remember EBCDIC */
3180 sv_utf8_upgrade(res);
3181 str = SvPV_const(res, len);
3183 /* Don't accept malformed input */
3184 if (! is_utf8_string((U8 *) str, len)) {
3185 yyerror("Malformed UTF-8 returned by \\N");
3187 else if (PL_lex_inpat) {
3189 if (! len) { /* The name resolved to an empty string */
3190 Copy("\\N{}", d, 4, char);
3194 /* In order to not lose information for the regex
3195 * compiler, pass the result in the specially made
3196 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3197 * the code points in hex of each character
3198 * returned by charnames */
3200 const char *str_end = str + len;
3201 STRLEN char_length; /* cur char's byte length */
3202 STRLEN output_length; /* and the number of bytes
3203 after this is translated
3205 const STRLEN off = d - SvPVX_const(sv);
3207 /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3208 * max('U+', '.'); and 1 for NUL */
3209 char hex_string[2 * UTF8_MAXBYTES + 5];
3211 /* Get the first character of the result. */
3212 U32 uv = utf8n_to_uvuni((U8 *) str,
3217 /* The call to is_utf8_string() above hopefully
3218 * guarantees that there won't be an error. But
3219 * it's easy here to make sure. The function just
3220 * above warns and returns 0 if invalid utf8, but
3221 * it can also return 0 if the input is validly a
3222 * NUL. Disambiguate */
3223 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3224 uv = UNICODE_REPLACEMENT;
3227 /* Convert first code point to hex, including the
3228 * boiler plate before it */
3230 my_snprintf(hex_string, sizeof(hex_string),
3231 "\\N{U+%X", (unsigned int) uv);
3233 /* Make sure there is enough space to hold it */
3234 d = off + SvGROW(sv, off
3236 + (STRLEN)(send - e)
3237 + 2); /* '}' + NUL */
3239 Copy(hex_string, d, output_length, char);
3242 /* For each subsequent character, append dot and
3243 * its ordinal in hex */
3244 while ((str += char_length) < str_end) {
3245 const STRLEN off = d - SvPVX_const(sv);
3246 U32 uv = utf8n_to_uvuni((U8 *) str,
3250 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3251 uv = UNICODE_REPLACEMENT;
3255 my_snprintf(hex_string, sizeof(hex_string),
3256 ".%X", (unsigned int) uv);
3258 d = off + SvGROW(sv, off
3260 + (STRLEN)(send - e)
3261 + 2); /* '}' + NUL */
3262 Copy(hex_string, d, output_length, char);
3266 *d++ = '}'; /* Done. Add the trailing brace */
3269 else { /* Here, not in a pattern. Convert the name to a
3272 /* If destination is not in utf8, unconditionally
3273 * recode it to be so. This is because \N{} implies
3274 * Unicode semantics, and scalars have to be in utf8
3275 * to guarantee those semantics */
3277 SvCUR_set(sv, d - SvPVX_const(sv));
3280 /* See Note on sizing above. */
3281 sv_utf8_upgrade_flags_grow(sv,
3282 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3283 len + (STRLEN)(send - s) + 1);
3284 d = SvPVX(sv) + SvCUR(sv);
3286 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3288 /* See Note on sizing above. (NOTE: SvCUR() is not
3289 * set correctly here). */
3290 const STRLEN off = d - SvPVX_const(sv);
3291 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3293 Copy(str, d, len, char);
3298 /* Deprecate non-approved name syntax */
3299 if (ckWARN_d(WARN_DEPRECATED)) {
3300 bool problematic = FALSE;
3303 /* For non-ut8 input, look to see that the first
3304 * character is an alpha, then loop through the rest
3305 * checking that each is a continuation */
3307 if (! isALPHAU(*i)) problematic = TRUE;
3308 else for (i = s + 1; i < e; i++) {
3309 if (isCHARNAME_CONT(*i)) continue;
3315 /* Similarly for utf8. For invariants can check
3316 * directly. We accept anything above the latin1
3317 * range because it is immaterial to Perl if it is
3318 * correct or not, and is expensive to check. But
3319 * it is fairly easy in the latin1 range to convert
3320 * the variants into a single character and check
3322 if (UTF8_IS_INVARIANT(*i)) {
3323 if (! isALPHAU(*i)) problematic = TRUE;
3324 } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3325 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
3331 if (! problematic) for (i = s + UTF8SKIP(s);
3335 if (UTF8_IS_INVARIANT(*i)) {
3336 if (isCHARNAME_CONT(*i)) continue;
3337 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3339 } else if (isCHARNAME_CONT(
3341 TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
3350 /* The e-i passed to the final %.*s makes sure that
3351 * should the trailing NUL be missing that this
3352 * print won't run off the end of the string */
3353 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
3354 "Deprecated character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s",
3355 (int)(i - s + 1), s, (int)(e - i), i + 1);
3358 } /* End \N{NAME} */
3361 native_range = FALSE; /* \N{} is defined to be Unicode */
3363 s = e + 1; /* Point to just after the '}' */
3366 /* \c is a control character */
3370 *d++ = grok_bslash_c(*s++, 1);
3373 yyerror("Missing control char name in \\c");
3377 /* printf-style backslashes, formfeeds, newlines, etc */
3379 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3382 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3385 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3388 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3391 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3394 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3397 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3403 } /* end if (backslash) */
3410 /* If we started with encoded form, or already know we want it,
3411 then encode the next character */
3412 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3416 /* One might think that it is wasted effort in the case of the
3417 * source being utf8 (this_utf8 == TRUE) to take the next character
3418 * in the source, convert it to an unsigned value, and then convert
3419 * it back again. But the source has not been validated here. The
3420 * routine that does the conversion checks for errors like
3423 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3424 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3426 SvCUR_set(sv, d - SvPVX_const(sv));
3429 /* See Note on sizing above. */
3430 sv_utf8_upgrade_flags_grow(sv,
3431 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3432 need + (STRLEN)(send - s) + 1);
3433 d = SvPVX(sv) + SvCUR(sv);
3435 } else if (need > len) {
3436 /* encoded value larger than old, may need extra space (NOTE:
3437 * SvCUR() is not set correctly here). See Note on sizing
3439 const STRLEN off = d - SvPVX_const(sv);
3440 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3444 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3446 if (uv > 255 && !dorange)
3447 native_range = FALSE;
3451 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3453 } /* while loop to process each character */
3455 /* terminate the string and set up the sv */
3457 SvCUR_set(sv, d - SvPVX_const(sv));
3458 if (SvCUR(sv) >= SvLEN(sv))
3459 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
3462 if (PL_encoding && !has_utf8) {
3463 sv_recode_to_utf8(sv, PL_encoding);
3469 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3470 PL_sublex_info.sub_op->op_private |=
3471 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3475 /* shrink the sv if we allocated more than we used */
3476 if (SvCUR(sv) + 5 < SvLEN(sv)) {
3477 SvPV_shrink_to_cur(sv);
3480 /* return the substring (via pl_yylval) only if we parsed anything */
3481 if (s > PL_bufptr) {
3482 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3483 const char *const key = PL_lex_inpat ? "qr" : "q";
3484 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3488 if (PL_lex_inwhat == OP_TRANS) {
3491 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3499 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3502 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3509 * Returns TRUE if there's more to the expression (e.g., a subscript),
3512 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3514 * ->[ and ->{ return TRUE
3515 * { and [ outside a pattern are always subscripts, so return TRUE
3516 * if we're outside a pattern and it's not { or [, then return FALSE
3517 * if we're in a pattern and the first char is a {
3518 * {4,5} (any digits around the comma) returns FALSE
3519 * if we're in a pattern and the first char is a [
3521 * [SOMETHING] has a funky algorithm to decide whether it's a
3522 * character class or not. It has to deal with things like
3523 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3524 * anything else returns TRUE
3527 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3530 S_intuit_more(pTHX_ register char *s)
3534 PERL_ARGS_ASSERT_INTUIT_MORE;
3536 if (PL_lex_brackets)
3538 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3540 if (*s != '{' && *s != '[')
3545 /* In a pattern, so maybe we have {n,m}. */
3553 /* On the other hand, maybe we have a character class */
3556 if (*s == ']' || *s == '^')
3559 /* this is terrifying, and it works */
3560 int weight = 2; /* let's weigh the evidence */
3562 unsigned char un_char = 255, last_un_char;
3563 const char * const send = strchr(s,']');
3564 char tmpbuf[sizeof PL_tokenbuf * 4];
3566 if (!send) /* has to be an expression */
3569 Zero(seen,256,char);
3572 else if (isDIGIT(*s)) {
3574 if (isDIGIT(s[1]) && s[2] == ']')
3580 for (; s < send; s++) {
3581 last_un_char = un_char;
3582 un_char = (unsigned char)*s;
3587 weight -= seen[un_char] * 10;
3588 if (isALNUM_lazy_if(s+1,UTF)) {
3590 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3591 len = (int)strlen(tmpbuf);
3592 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
3597 else if (*s == '$' && s[1] &&
3598 strchr("[#!%*<>()-=",s[1])) {
3599 if (/*{*/ strchr("])} =",s[2]))
3608 if (strchr("wds]",s[1]))
3610 else if (seen[(U8)'\''] || seen[(U8)'"'])
3612 else if (strchr("rnftbxcav",s[1]))
3614 else if (isDIGIT(s[1])) {
3616 while (s[1] && isDIGIT(s[1]))
3626 if (strchr("aA01! ",last_un_char))
3628 if (strchr("zZ79~",s[1]))
3630 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3631 weight -= 5; /* cope with negative subscript */
3634 if (!isALNUM(last_un_char)
3635 && !(last_un_char == '$' || last_un_char == '@'
3636 || last_un_char == '&')
3637 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3642 if (keyword(tmpbuf, d - tmpbuf, 0))
3645 if (un_char == last_un_char + 1)
3647 weight -= seen[un_char];
3652 if (weight >= 0) /* probably a character class */
3662 * Does all the checking to disambiguate
3664 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3665 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3667 * First argument is the stuff after the first token, e.g. "bar".
3669 * Not a method if bar is a filehandle.
3670 * Not a method if foo is a subroutine prototyped to take a filehandle.
3671 * Not a method if it's really "Foo $bar"
3672 * Method if it's "foo $bar"
3673 * Not a method if it's really "print foo $bar"
3674 * Method if it's really "foo package::" (interpreted as package->foo)
3675 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3676 * Not a method if bar is a filehandle or package, but is quoted with
3681 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3684 char *s = start + (*start == '$');
3685 char tmpbuf[sizeof PL_tokenbuf];
3692 PERL_ARGS_ASSERT_INTUIT_METHOD;
3695 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3699 const char *proto = SvPVX_const(cv);
3710 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3711 /* start is the beginning of the possible filehandle/object,
3712 * and s is the end of it
3713 * tmpbuf is a copy of it
3716 if (*start == '$') {
3717 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3718 isUPPER(*PL_tokenbuf))
3721 len = start - SvPVX(PL_linestr);
3725 start = SvPVX(PL_linestr) + len;
3729 return *s == '(' ? FUNCMETH : METHOD;
3731 if (!keyword(tmpbuf, len, 0)) {
3732 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3736 soff = s - SvPVX(PL_linestr);
3740 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
3741 if (indirgv && GvCVu(indirgv))
3743 /* filehandle or package name makes it a method */
3744 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
3746 soff = s - SvPVX(PL_linestr);
3749 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3750 return 0; /* no assumptions -- "=>" quotes bareword */
3752 start_force(PL_curforce);
3753 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3754 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3755 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3757 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3762 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3764 return *s == '(' ? FUNCMETH : METHOD;
3770 /* Encoded script support. filter_add() effectively inserts a
3771 * 'pre-processing' function into the current source input stream.
3772 * Note that the filter function only applies to the current source file
3773 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3775 * The datasv parameter (which may be NULL) can be used to pass
3776 * private data to this instance of the filter. The filter function
3777 * can recover the SV using the FILTER_DATA macro and use it to
3778 * store private buffers and state information.
3780 * The supplied datasv parameter is upgraded to a PVIO type
3781 * and the IoDIRP/IoANY field is used to store the function pointer,
3782 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3783 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3784 * private use must be set using malloc'd pointers.
3788 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3797 if (!PL_rsfp_filters)
3798 PL_rsfp_filters = newAV();
3801 SvUPGRADE(datasv, SVt_PVIO);
3802 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3803 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3804 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3805 FPTR2DPTR(void *, IoANY(datasv)),
3806 SvPV_nolen(datasv)));
3807 av_unshift(PL_rsfp_filters, 1);
3808 av_store(PL_rsfp_filters, 0, datasv) ;
3813 /* Delete most recently added instance of this filter function. */
3815 Perl_filter_del(pTHX_ filter_t funcp)
3820 PERL_ARGS_ASSERT_FILTER_DEL;
3823 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3824 FPTR2DPTR(void*, funcp)));
3826 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3828 /* if filter is on top of stack (usual case) just pop it off */
3829 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3830 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3831 sv_free(av_pop(PL_rsfp_filters));
3835 /* we need to search for the correct entry and clear it */
3836 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3840 /* Invoke the idxth filter function for the current rsfp. */
3841 /* maxlen 0 = read one text line */
3843 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3848 /* This API is bad. It should have been using unsigned int for maxlen.
3849 Not sure if we want to change the API, but if not we should sanity
3850 check the value here. */
3851 const unsigned int correct_length
3860 PERL_ARGS_ASSERT_FILTER_READ;
3862 if (!PL_parser || !PL_rsfp_filters)
3864 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
3865 /* Provide a default input filter to make life easy. */
3866 /* Note that we append to the line. This is handy. */
3867 DEBUG_P(PerlIO_printf(Perl_debug_log,
3868 "filter_read %d: from rsfp\n", idx));
3869 if (correct_length) {
3872 const int old_len = SvCUR(buf_sv);
3874 /* ensure buf_sv is large enough */
3875 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
3876 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3877 correct_length)) <= 0) {
3878 if (PerlIO_error(PL_rsfp))
3879 return -1; /* error */
3881 return 0 ; /* end of file */
3883 SvCUR_set(buf_sv, old_len + len) ;
3884 SvPVX(buf_sv)[old_len + len] = '\0';
3887 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3888 if (PerlIO_error(PL_rsfp))
3889 return -1; /* error */
3891 return 0 ; /* end of file */
3894 return SvCUR(buf_sv);
3896 /* Skip this filter slot if filter has been deleted */
3897 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
3898 DEBUG_P(PerlIO_printf(Perl_debug_log,
3899 "filter_read %d: skipped (filter deleted)\n",
3901 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
3903 /* Get function pointer hidden within datasv */
3904 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
3905 DEBUG_P(PerlIO_printf(Perl_debug_log,
3906 "filter_read %d: via function %p (%s)\n",
3907 idx, (void*)datasv, SvPV_nolen_const(datasv)));
3908 /* Call function. The function is expected to */
3909 /* call "FILTER_READ(idx+1, buf_sv)" first. */
3910 /* Return: <0:error, =0:eof, >0:not eof */
3911 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
3915 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
3919 PERL_ARGS_ASSERT_FILTER_GETS;
3921 #ifdef PERL_CR_FILTER
3922 if (!PL_rsfp_filters) {
3923 filter_add(S_cr_textfilter,NULL);
3926 if (PL_rsfp_filters) {
3928 SvCUR_set(sv, 0); /* start with empty line */
3929 if (FILTER_READ(0, sv, 0) > 0)
3930 return ( SvPVX(sv) ) ;
3935 return (sv_gets(sv, PL_rsfp, append));
3939 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
3944 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3946 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
3950 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
3951 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
3953 return GvHV(gv); /* Foo:: */
3956 /* use constant CLASS => 'MyClass' */
3957 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3958 if (gv && GvCV(gv)) {
3959 SV * const sv = cv_const_sv(GvCV(gv));
3961 pkgname = SvPV_const(sv, len);
3964 return gv_stashpvn(pkgname, len, 0);
3968 * S_readpipe_override
3969 * Check whether readpipe() is overridden, and generates the appropriate
3970 * optree, provided sublex_start() is called afterwards.
3973 S_readpipe_override(pTHX)
3976 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
3977 pl_yylval.ival = OP_BACKTICK;
3979 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3981 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
3982 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
3983 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3985 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3986 op_append_elem(OP_LIST,
3987 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3988 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3995 * The intent of this yylex wrapper is to minimize the changes to the
3996 * tokener when we aren't interested in collecting madprops. It remains
3997 * to be seen how successful this strategy will be...
4004 char *s = PL_bufptr;
4006 /* make sure PL_thiswhite is initialized */
4010 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
4011 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4012 return S_pending_ident(aTHX);
4014 /* previous token ate up our whitespace? */
4015 if (!PL_lasttoke && PL_nextwhite) {
4016 PL_thiswhite = PL_nextwhite;
4020 /* isolate the token, and figure out where it is without whitespace */
4021 PL_realtokenstart = -1;
4025 assert(PL_curforce < 0);
4027 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
4028 if (!PL_thistoken) {
4029 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4030 PL_thistoken = newSVpvs("");
4032 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4033 PL_thistoken = newSVpvn(tstart, s - tstart);
4036 if (PL_thismad) /* install head */
4037 CURMAD('X', PL_thistoken);
4040 /* last whitespace of a sublex? */
4041 if (optype == ')' && PL_endwhite) {
4042 CURMAD('X', PL_endwhite);
4047 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
4048 if (!PL_thiswhite && !PL_endwhite && !optype) {
4049 sv_free(PL_thistoken);
4054 /* put off final whitespace till peg */
4055 if (optype == ';' && !PL_rsfp) {
4056 PL_nextwhite = PL_thiswhite;
4059 else if (PL_thisopen) {
4060 CURMAD('q', PL_thisopen);
4062 sv_free(PL_thistoken);
4066 /* Store actual token text as madprop X */
4067 CURMAD('X', PL_thistoken);
4071 /* add preceding whitespace as madprop _ */
4072 CURMAD('_', PL_thiswhite);
4076 /* add quoted material as madprop = */
4077 CURMAD('=', PL_thisstuff);
4081 /* add terminating quote as madprop Q */
4082 CURMAD('Q', PL_thisclose);
4086 /* special processing based on optype */
4090 /* opval doesn't need a TOKEN since it can already store mp */
4100 if (pl_yylval.opval)
4101 append_madprops(PL_thismad, pl_yylval.opval, 0);
4109 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4118 /* remember any fake bracket that lexer is about to discard */
4119 if (PL_lex_brackets == 1 &&
4120 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4123 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4126 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4127 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4130 break; /* don't bother looking for trailing comment */
4139 /* attach a trailing comment to its statement instead of next token */
4143 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4145 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4147 if (*s == '\n' || *s == '#') {
4148 while (s < PL_bufend && *s != '\n')
4152 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4153 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4170 /* Create new token struct. Note: opvals return early above. */
4171 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4178 S_tokenize_use(pTHX_ int is_use, char *s) {
4181 PERL_ARGS_ASSERT_TOKENIZE_USE;
4183 if (PL_expect != XSTATE)
4184 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4185 is_use ? "use" : "no"));
4187 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4188 s = force_version(s, TRUE);
4189 if (*s == ';' || *s == '}'
4190 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4191 start_force(PL_curforce);
4192 NEXTVAL_NEXTTOKE.opval = NULL;
4195 else if (*s == 'v') {
4196 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4197 s = force_version(s, FALSE);
4201 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4202 s = force_version(s, FALSE);
4204 pl_yylval.ival = is_use;
4208 static const char* const exp_name[] =
4209 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4210 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4214 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4216 S_word_takes_any_delimeter(char *p, STRLEN len)
4218 return (len == 1 && strchr("msyq", p[0])) ||
4220 (p[0] == 't' && p[1] == 'r') ||
4221 (p[0] == 'q' && strchr("qwxr", p[1]))));
4227 Works out what to call the token just pulled out of the input
4228 stream. The yacc parser takes care of taking the ops we return and
4229 stitching them into a tree.
4235 if read an identifier
4236 if we're in a my declaration
4237 croak if they tried to say my($foo::bar)
4238 build the ops for a my() declaration
4239 if it's an access to a my() variable
4240 are we in a sort block?
4241 croak if my($a); $a <=> $b
4242 build ops for access to a my() variable
4243 if in a dq string, and they've said @foo and we can't find @foo
4245 build ops for a bareword
4246 if we already built the token before, use it.
4251 #pragma segment Perl_yylex
4257 register char *s = PL_bufptr;
4263 /* orig_keyword, gvp, and gv are initialized here because
4264 * jump to the label just_a_word_zero can bypass their
4265 * initialization later. */
4266 I32 orig_keyword = 0;
4271 SV* tmp = newSVpvs("");
4272 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4273 (IV)CopLINE(PL_curcop),
4274 lex_state_names[PL_lex_state],
4275 exp_name[PL_expect],
4276 pv_display(tmp, s, strlen(s), 0, 60));
4279 /* check if there's an identifier for us to look at */
4280 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4281 return REPORT(S_pending_ident(aTHX));
4283 /* no identifier pending identification */
4285 switch (PL_lex_state) {
4287 case LEX_NORMAL: /* Some compilers will produce faster */
4288 case LEX_INTERPNORMAL: /* code if we comment these out. */
4292 /* when we've already built the next token, just pull it out of the queue */
4296 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4298 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4299 PL_nexttoke[PL_lasttoke].next_mad = 0;
4300 if (PL_thismad && PL_thismad->mad_key == '_') {
4301 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4302 PL_thismad->mad_val = 0;
4303 mad_free(PL_thismad);
4308 PL_lex_state = PL_lex_defer;
4309 PL_expect = PL_lex_expect;
4310 PL_lex_defer = LEX_NORMAL;
4311 if (!PL_nexttoke[PL_lasttoke].next_type)
4316 pl_yylval = PL_nextval[PL_nexttoke];
4318 PL_lex_state = PL_lex_defer;
4319 PL_expect = PL_lex_expect;
4320 PL_lex_defer = LEX_NORMAL;
4326 next_type = PL_nexttoke[PL_lasttoke].next_type;
4328 next_type = PL_nexttype[PL_nexttoke];
4330 if (next_type & (7<<24)) {
4331 if (next_type & (1<<24)) {
4332 if (PL_lex_brackets > 100)
4333 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4334 PL_lex_brackstack[PL_lex_brackets++] =
4335 (next_type >> 16) & 0xff;
4337 if (next_type & (2<<24))
4338 PL_lex_allbrackets++;
4339 if (next_type & (4<<24))
4340 PL_lex_allbrackets--;
4341 next_type &= 0xffff;
4344 /* FIXME - can these be merged? */
4347 return REPORT(next_type);
4351 /* interpolated case modifiers like \L \U, including \Q and \E.
4352 when we get here, PL_bufptr is at the \
4354 case LEX_INTERPCASEMOD:
4356 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4357 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
4359 /* handle \E or end of string */
4360 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4362 if (PL_lex_casemods) {
4363 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4364 PL_lex_casestack[PL_lex_casemods] = '\0';
4366 if (PL_bufptr != PL_bufend
4367 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
4369 PL_lex_state = LEX_INTERPCONCAT;
4372 PL_thistoken = newSVpvs("\\E");
4375 PL_lex_allbrackets--;
4379 while (PL_bufptr != PL_bufend &&
4380 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4382 PL_thiswhite = newSVpvs("");
4383 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4387 if (PL_bufptr != PL_bufend)
4390 PL_lex_state = LEX_INTERPCONCAT;
4394 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4395 "### Saw case modifier\n"); });
4397 if (s[1] == '\\' && s[2] == 'E') {
4400 PL_thiswhite = newSVpvs("");
4401 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4404 PL_lex_state = LEX_INTERPCONCAT;
4409 if (!PL_madskills) /* when just compiling don't need correct */
4410 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4411 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
4412 if ((*s == 'L' || *s == 'U') &&
4413 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4414 PL_lex_casestack[--PL_lex_casemods] = '\0';
4415 PL_lex_allbrackets--;
4418 if (PL_lex_casemods > 10)
4419 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4420 PL_lex_casestack[PL_lex_casemods++] = *s;
4421 PL_lex_casestack[PL_lex_casemods] = '\0';
4422 PL_lex_state = LEX_INTERPCONCAT;
4423 start_force(PL_curforce);
4424 NEXTVAL_NEXTTOKE.ival = 0;
4425 force_next((2<<24)|'(');
4426 start_force(PL_curforce);
4428 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4430 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4432 NEXTVAL_NEXTTOKE.ival = OP_LC;
4434 NEXTVAL_NEXTTOKE.ival = OP_UC;
4436 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4438 Perl_croak(aTHX_ "panic: yylex");
4440 SV* const tmpsv = newSVpvs("\\ ");
4441 /* replace the space with the character we want to escape
4443 SvPVX(tmpsv)[1] = *s;
4449 if (PL_lex_starts) {
4455 sv_free(PL_thistoken);
4456 PL_thistoken = newSVpvs("");
4459 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4460 if (PL_lex_casemods == 1 && PL_lex_inpat)
4469 case LEX_INTERPPUSH:
4470 return REPORT(sublex_push());
4472 case LEX_INTERPSTART:
4473 if (PL_bufptr == PL_bufend)
4474 return REPORT(sublex_done());
4475 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4476 "### Interpolated variable\n"); });
4478 PL_lex_dojoin = (*PL_bufptr == '@');
4479 PL_lex_state = LEX_INTERPNORMAL;
4480 if (PL_lex_dojoin) {
4481 start_force(PL_curforce);
4482 NEXTVAL_NEXTTOKE.ival = 0;
4484 start_force(PL_curforce);
4485 force_ident("\"", '$');
4486 start_force(PL_curforce);
4487 NEXTVAL_NEXTTOKE.ival = 0;
4489 start_force(PL_curforce);
4490 NEXTVAL_NEXTTOKE.ival = 0;
4491 force_next((2<<24)|'(');
4492 start_force(PL_curforce);
4493 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
4496 if (PL_lex_starts++) {
4501 sv_free(PL_thistoken);
4502 PL_thistoken = newSVpvs("");
4505 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4506 if (!PL_lex_casemods && PL_lex_inpat)
4513 case LEX_INTERPENDMAYBE:
4514 if (intuit_more(PL_bufptr)) {
4515 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
4521 if (PL_lex_dojoin) {
4522 PL_lex_dojoin = FALSE;
4523 PL_lex_state = LEX_INTERPCONCAT;
4527 sv_free(PL_thistoken);
4528 PL_thistoken = newSVpvs("");
4531 PL_lex_allbrackets--;
4534 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4535 && SvEVALED(PL_lex_repl))
4537 if (PL_bufptr != PL_bufend)
4538 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4542 case LEX_INTERPCONCAT:
4544 if (PL_lex_brackets)
4545 Perl_croak(aTHX_ "panic: INTERPCONCAT");
4547 if (PL_bufptr == PL_bufend)
4548 return REPORT(sublex_done());
4550 if (SvIVX(PL_linestr) == '\'') {
4551 SV *sv = newSVsv(PL_linestr);
4554 else if ( PL_hints & HINT_NEW_RE )
4555 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
4556 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4560 s = scan_const(PL_bufptr);
4562 PL_lex_state = LEX_INTERPCASEMOD;
4564 PL_lex_state = LEX_INTERPSTART;
4567 if (s != PL_bufptr) {
4568 start_force(PL_curforce);
4570 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4572 NEXTVAL_NEXTTOKE = pl_yylval;
4575 if (PL_lex_starts++) {
4579 sv_free(PL_thistoken);
4580 PL_thistoken = newSVpvs("");
4583 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4584 if (!PL_lex_casemods && PL_lex_inpat)
4597 PL_lex_state = LEX_NORMAL;
4598 s = scan_formline(PL_bufptr);
4599 if (!PL_lex_formbrack)
4605 PL_oldoldbufptr = PL_oldbufptr;
4611 sv_free(PL_thistoken);
4614 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
4618 if (isIDFIRST_lazy_if(s,UTF))
4621 unsigned char c = *s;
4622 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4623 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4624 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4629 Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4633 goto fake_eof; /* emulate EOF on ^D or ^Z */
4642 if (PL_lex_brackets &&
4643 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
4644 yyerror((const char *)
4646 ? "Format not terminated"
4647 : "Missing right curly or square bracket"));
4649 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4650 "### Tokener got EOF\n");
4654 if (s++ < PL_bufend)
4655 goto retry; /* ignore stray nulls */
4658 if (!PL_in_eval && !PL_preambled) {
4659 PL_preambled = TRUE;
4665 /* Generate a string of Perl code to load the debugger.
4666 * If PERL5DB is set, it will return the contents of that,
4667 * otherwise a compile-time require of perl5db.pl. */
4669 const char * const pdb = PerlEnv_getenv("PERL5DB");
4672 sv_setpv(PL_linestr, pdb);
4673 sv_catpvs(PL_linestr,";");
4675 SETERRNO(0,SS_NORMAL);
4676 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4679 sv_setpvs(PL_linestr,"");
4680 if (PL_preambleav) {
4681 SV **svp = AvARRAY(PL_preambleav);
4682 SV **const end = svp + AvFILLp(PL_preambleav);
4684 sv_catsv(PL_linestr, *svp);
4686 sv_catpvs(PL_linestr, ";");
4688 sv_free(MUTABLE_SV(PL_preambleav));
4689 PL_preambleav = NULL;
4692 sv_catpvs(PL_linestr,
4693 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4694 if (PL_minus_n || PL_minus_p) {
4695 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4697 sv_catpvs(PL_linestr,"chomp;");
4700 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4701 || *PL_splitstr == '"')
4702 && strchr(PL_splitstr + 1, *PL_splitstr))
4703 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4705 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4706 bytes can be used as quoting characters. :-) */
4707 const char *splits = PL_splitstr;
4708 sv_catpvs(PL_linestr, "our @F=split(q\0");
4711 if (*splits == '\\')
4712 sv_catpvn(PL_linestr, splits, 1);
4713 sv_catpvn(PL_linestr, splits, 1);
4714 } while (*splits++);
4715 /* This loop will embed the trailing NUL of
4716 PL_linestr as the last thing it does before
4718 sv_catpvs(PL_linestr, ");");
4722 sv_catpvs(PL_linestr,"our @F=split(' ');");
4725 sv_catpvs(PL_linestr, "\n");
4726 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4727 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4728 PL_last_lop = PL_last_uni = NULL;
4729 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4730 update_debugger_info(PL_linestr, NULL, 0);
4735 bof = PL_rsfp ? TRUE : FALSE;
4738 fake_eof = LEX_FAKE_EOF;
4740 PL_bufptr = PL_bufend;
4741 CopLINE_inc(PL_curcop);
4742 if (!lex_next_chunk(fake_eof)) {
4743 CopLINE_dec(PL_curcop);
4745 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4747 CopLINE_dec(PL_curcop);
4750 PL_realtokenstart = -1;
4753 /* If it looks like the start of a BOM or raw UTF-16,
4754 * check if it in fact is. */
4755 if (bof && PL_rsfp &&
4760 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
4762 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4763 s = swallow_bom((U8*)s);
4766 if (PL_parser->in_pod) {
4767 /* Incest with pod. */
4770 sv_catsv(PL_thiswhite, PL_linestr);
4772 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4773 sv_setpvs(PL_linestr, "");
4774 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4775 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4776 PL_last_lop = PL_last_uni = NULL;
4777 PL_parser->in_pod = 0;
4782 } while (PL_parser->in_pod);
4783 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4784 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4785 PL_last_lop = PL_last_uni = NULL;
4786 if (CopLINE(PL_curcop) == 1) {
4787 while (s < PL_bufend && isSPACE(*s))
4789 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4793 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
4797 if (*s == '#' && *(s+1) == '!')
4799 #ifdef ALTERNATE_SHEBANG
4801 static char const as[] = ALTERNATE_SHEBANG;
4802 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4803 d = s + (sizeof(as) - 1);
4805 #endif /* ALTERNATE_SHEBANG */
4814 while (*d && !isSPACE(*d))
4818 #ifdef ARG_ZERO_IS_SCRIPT
4819 if (ipathend > ipath) {
4821 * HP-UX (at least) sets argv[0] to the script name,
4822 * which makes $^X incorrect. And Digital UNIX and Linux,
4823 * at least, set argv[0] to the basename of the Perl
4824 * interpreter. So, having found "#!", we'll set it right.
4826 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4828 assert(SvPOK(x) || SvGMAGICAL(x));
4829 if (sv_eq(x, CopFILESV(PL_curcop))) {
4830 sv_setpvn(x, ipath, ipathend - ipath);
4836 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
4837 const char * const lstart = SvPV_const(x,llen);
4839 bstart += blen - llen;
4840 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4841 sv_setpvn(x, ipath, ipathend - ipath);
4846 TAINT_NOT; /* $^X is always tainted, but that's OK */
4848 #endif /* ARG_ZERO_IS_SCRIPT */
4853 d = instr(s,"perl -");
4855 d = instr(s,"perl");
4857 /* avoid getting into infinite loops when shebang
4858 * line contains "Perl" rather than "perl" */
4860 for (d = ipathend-4; d >= ipath; --d) {
4861 if ((*d == 'p' || *d == 'P')
4862 && !ibcmp(d, "perl", 4))
4872 #ifdef ALTERNATE_SHEBANG
4874 * If the ALTERNATE_SHEBANG on this system starts with a
4875 * character that can be part of a Perl expression, then if
4876 * we see it but not "perl", we're probably looking at the
4877 * start of Perl code, not a request to hand off to some
4878 * other interpreter. Similarly, if "perl" is there, but
4879 * not in the first 'word' of the line, we assume the line
4880 * contains the start of the Perl program.
4882 if (d && *s != '#') {
4883 const char *c = ipath;
4884 while (*c && !strchr("; \t\r\n\f\v#", *c))
4887 d = NULL; /* "perl" not in first word; ignore */
4889 *s = '#'; /* Don't try to parse shebang line */
4891 #endif /* ALTERNATE_SHEBANG */
4896 !instr(s,"indir") &&
4897 instr(PL_origargv[0],"perl"))
4904 while (s < PL_bufend && isSPACE(*s))
4906 if (s < PL_bufend) {
4907 Newx(newargv,PL_origargc+3,char*);
4909 while (s < PL_bufend && !isSPACE(*s))
4912 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
4915 newargv = PL_origargv;
4918 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
4920 Perl_croak(aTHX_ "Can't exec %s", ipath);
4923 while (*d && !isSPACE(*d))
4925 while (SPACE_OR_TAB(*d))
4929 const bool switches_done = PL_doswitches;
4930 const U32 oldpdb = PL_perldb;
4931 const bool oldn = PL_minus_n;
4932 const bool oldp = PL_minus_p;
4936 bool baduni = FALSE;
4938 const char *d2 = d1 + 1;
4939 if (parse_unicode_opts((const char **)&d2)
4943 if (baduni || *d1 == 'M' || *d1 == 'm') {
4944 const char * const m = d1;
4945 while (*d1 && !isSPACE(*d1))
4947 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
4950 d1 = moreswitches(d1);
4952 if (PL_doswitches && !switches_done) {
4953 int argc = PL_origargc;
4954 char **argv = PL_origargv;
4957 } while (argc && argv[0][0] == '-' && argv[0][1]);
4958 init_argv_symbols(argc,argv);
4960 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
4961 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
4962 /* if we have already added "LINE: while (<>) {",
4963 we must not do it again */
4965 sv_setpvs(PL_linestr, "");
4966 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4967 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4968 PL_last_lop = PL_last_uni = NULL;
4969 PL_preambled = FALSE;
4970 if (PERLDB_LINE || PERLDB_SAVESRC)
4971 (void)gv_fetchfile(PL_origfilename);
4978 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4980 PL_lex_state = LEX_FORMLINE;
4985 #ifdef PERL_STRICT_CR
4986 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4988 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
4990 case ' ': case '\t': case '\f': case 013:
4992 PL_realtokenstart = -1;
4994 PL_thiswhite = newSVpvs("");
4995 sv_catpvn(PL_thiswhite, s, 1);
5002 PL_realtokenstart = -1;
5006 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
5007 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
5008 /* handle eval qq[#line 1 "foo"\n ...] */
5009 CopLINE_dec(PL_curcop);
5012 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5014 if (!PL_in_eval || PL_rsfp)
5019 while (d < PL_bufend && *d != '\n')
5023 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5024 Perl_croak(aTHX_ "panic: input overflow");
5027 PL_thiswhite = newSVpvn(s, d - s);
5032 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5034 PL_lex_state = LEX_FORMLINE;
5040 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5041 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5044 TOKEN(PEG); /* make sure any #! line is accessible */
5049 /* if (PL_madskills && PL_lex_formbrack) { */
5051 while (d < PL_bufend && *d != '\n')
5055 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5056 Perl_croak(aTHX_ "panic: input overflow");
5057 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5059 PL_thiswhite = newSVpvs("");
5060 if (CopLINE(PL_curcop) == 1) {
5061 sv_setpvs(PL_thiswhite, "");
5064 sv_catpvn(PL_thiswhite, s, d - s);
5078 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
5086 while (s < PL_bufend && SPACE_OR_TAB(*s))
5089 if (strnEQ(s,"=>",2)) {
5090 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
5091 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5092 OPERATOR('-'); /* unary minus */
5094 PL_last_uni = PL_oldbufptr;
5096 case 'r': ftst = OP_FTEREAD; break;
5097 case 'w': ftst = OP_FTEWRITE; break;
5098 case 'x': ftst = OP_FTEEXEC; break;
5099 case 'o': ftst = OP_FTEOWNED; break;
5100 case 'R': ftst = OP_FTRREAD; break;
5101 case 'W': ftst = OP_FTRWRITE; break;
5102 case 'X': ftst = OP_FTREXEC; break;
5103 case 'O': ftst = OP_FTROWNED; break;
5104 case 'e': ftst = OP_FTIS; break;
5105 case 'z': ftst = OP_FTZERO; break;
5106 case 's': ftst = OP_FTSIZE; break;
5107 case 'f': ftst = OP_FTFILE; break;
5108 case 'd': ftst = OP_FTDIR; break;
5109 case 'l': ftst = OP_FTLINK; break;
5110 case 'p': ftst = OP_FTPIPE; break;
5111 case 'S': ftst = OP_FTSOCK; break;
5112 case 'u': ftst = OP_FTSUID; break;
5113 case 'g': ftst = OP_FTSGID; break;
5114 case 'k': ftst = OP_FTSVTX; break;
5115 case 'b': ftst = OP_FTBLK; break;
5116 case 'c': ftst = OP_FTCHR; break;
5117 case 't': ftst = OP_FTTTY; break;
5118 case 'T': ftst = OP_FTTEXT; break;
5119 case 'B': ftst = OP_FTBINARY; break;
5120 case 'M': case 'A': case 'C':
5121 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5123 case 'M': ftst = OP_FTMTIME; break;
5124 case 'A': ftst = OP_FTATIME; break;
5125 case 'C': ftst = OP_FTCTIME; break;
5133 PL_last_lop_op = (OPCODE)ftst;
5134 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5135 "### Saw file test %c\n", (int)tmp);
5140 /* Assume it was a minus followed by a one-letter named
5141 * subroutine call (or a -bareword), then. */
5142 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5143 "### '-%c' looked like a file test but was not\n",
5150 const char tmp = *s++;
5153 if (PL_expect == XOPERATOR)
5158 else if (*s == '>') {
5161 if (isIDFIRST_lazy_if(s,UTF)) {
5162 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5170 if (PL_expect == XOPERATOR) {
5171 if (*s == '=' && !PL_lex_allbrackets &&
5172 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5179 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5181 OPERATOR('-'); /* unary minus */
5187 const char tmp = *s++;
5190 if (PL_expect == XOPERATOR)
5195 if (PL_expect == XOPERATOR) {
5196 if (*s == '=' && !PL_lex_allbrackets &&
5197 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5204 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5211 if (PL_expect != XOPERATOR) {
5212 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5213 PL_expect = XOPERATOR;
5214 force_ident(PL_tokenbuf, '*');
5222 if (*s == '=' && !PL_lex_allbrackets &&
5223 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5229 if (*s == '=' && !PL_lex_allbrackets &&
5230 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5237 if (PL_expect == XOPERATOR) {
5238 if (s[1] == '=' && !PL_lex_allbrackets &&
5239 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5244 PL_tokenbuf[0] = '%';
5245 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5246 sizeof PL_tokenbuf - 1, FALSE);
5247 if (!PL_tokenbuf[1]) {
5250 PL_pending_ident = '%';
5254 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5255 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5260 if (PL_lex_brackets > 100)
5261 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5262 PL_lex_brackstack[PL_lex_brackets++] = 0;
5263 PL_lex_allbrackets++;
5265 const char tmp = *s++;
5270 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5272 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5280 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5287 goto just_a_word_zero_gv;
5290 switch (PL_expect) {
5296 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5298 PL_bufptr = s; /* update in case we back off */
5301 "Use of := for an empty attribute list is not allowed");
5308 PL_expect = XTERMBLOCK;
5311 stuffstart = s - SvPVX(PL_linestr) - 1;
5315 while (isIDFIRST_lazy_if(s,UTF)) {
5318 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5319 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5320 if (tmp < 0) tmp = -tmp;
5335 sv = newSVpvn(s, len);
5337 d = scan_str(d,TRUE,TRUE);
5339 /* MUST advance bufptr here to avoid bogus
5340 "at end of line" context messages from yyerror().
5342 PL_bufptr = s + len;
5343 yyerror("Unterminated attribute parameter in attribute list");
5347 return REPORT(0); /* EOF indicator */
5351 sv_catsv(sv, PL_lex_stuff);
5352 attrs = op_append_elem(OP_LIST, attrs,
5353 newSVOP(OP_CONST, 0, sv));
5354 SvREFCNT_dec(PL_lex_stuff);
5355 PL_lex_stuff = NULL;
5358 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5360 if (PL_in_my == KEY_our) {
5361 deprecate(":unique");
5364 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5367 /* NOTE: any CV attrs applied here need to be part of
5368 the CVf_BUILTIN_ATTRS define in cv.h! */
5369 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5371 CvLVALUE_on(PL_compcv);
5373 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5375 deprecate(":locked");
5377 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5379 CvMETHOD_on(PL_compcv);
5381 /* After we've set the flags, it could be argued that
5382 we don't need to do the attributes.pm-based setting
5383 process, and shouldn't bother appending recognized
5384 flags. To experiment with that, uncomment the
5385 following "else". (Note that's already been
5386 uncommented. That keeps the above-applied built-in
5387 attributes from being intercepted (and possibly
5388 rejected) by a package's attribute routines, but is
5389 justified by the performance win for the common case
5390 of applying only built-in attributes.) */
5392 attrs = op_append_elem(OP_LIST, attrs,
5393 newSVOP(OP_CONST, 0,
5397 if (*s == ':' && s[1] != ':')
5400 break; /* require real whitespace or :'s */
5401 /* XXX losing whitespace on sequential attributes here */
5405 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5406 if (*s != ';' && *s != '}' && *s != tmp
5407 && (tmp != '=' || *s != ')')) {
5408 const char q = ((*s == '\'') ? '"' : '\'');
5409 /* If here for an expression, and parsed no attrs, back
5411 if (tmp == '=' && !attrs) {
5415 /* MUST advance bufptr here to avoid bogus "at end of line"
5416 context messages from yyerror().
5419 yyerror( (const char *)
5421 ? Perl_form(aTHX_ "Invalid separator character "
5422 "%c%c%c in attribute list", q, *s, q)
5423 : "Unterminated attribute list" ) );
5431 start_force(PL_curforce);
5432 NEXTVAL_NEXTTOKE.opval = attrs;
5433 CURMAD('_', PL_nextwhite);
5438 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5439 (s - SvPVX(PL_linestr)) - stuffstart);
5444 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5448 PL_lex_allbrackets--;
5452 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5453 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
5457 PL_lex_allbrackets++;
5460 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5466 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5469 PL_lex_allbrackets--;
5475 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5478 if (PL_lex_brackets <= 0)
5479 yyerror("Unmatched right square bracket");
5482 PL_lex_allbrackets--;
5483 if (PL_lex_state == LEX_INTERPNORMAL) {
5484 if (PL_lex_brackets == 0) {
5485 if (*s == '-' && s[1] == '>')
5486 PL_lex_state = LEX_INTERPENDMAYBE;
5487 else if (*s != '[' && *s != '{')
5488 PL_lex_state = LEX_INTERPEND;
5495 if (PL_lex_brackets > 100) {
5496 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5498 switch (PL_expect) {
5500 if (PL_lex_formbrack) {
5504 if (PL_oldoldbufptr == PL_last_lop)
5505 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5507 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5508 PL_lex_allbrackets++;
5509 OPERATOR(HASHBRACK);
5511 while (s < PL_bufend && SPACE_OR_TAB(*s))
5514 PL_tokenbuf[0] = '\0';
5515 if (d < PL_bufend && *d == '-') {
5516 PL_tokenbuf[0] = '-';
5518 while (d < PL_bufend && SPACE_OR_TAB(*d))
5521 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5522 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5524 while (d < PL_bufend && SPACE_OR_TAB(*d))
5527 const char minus = (PL_tokenbuf[0] == '-');
5528 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5536 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5537 PL_lex_allbrackets++;
5542 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5543 PL_lex_allbrackets++;
5548 if (PL_oldoldbufptr == PL_last_lop)
5549 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5551 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5552 PL_lex_allbrackets++;
5555 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5557 /* This hack is to get the ${} in the message. */
5559 yyerror("syntax error");
5562 OPERATOR(HASHBRACK);
5564 /* This hack serves to disambiguate a pair of curlies
5565 * as being a block or an anon hash. Normally, expectation
5566 * determines that, but in cases where we're not in a
5567 * position to expect anything in particular (like inside
5568 * eval"") we have to resolve the ambiguity. This code
5569 * covers the case where the first term in the curlies is a
5570 * quoted string. Most other cases need to be explicitly
5571 * disambiguated by prepending a "+" before the opening
5572 * curly in order to force resolution as an anon hash.
5574 * XXX should probably propagate the outer expectation
5575 * into eval"" to rely less on this hack, but that could
5576 * potentially break current behavior of eval"".
5580 if (*s == '\'' || *s == '"' || *s == '`') {
5581 /* common case: get past first string, handling escapes */
5582 for (t++; t < PL_bufend && *t != *s;)
5583 if (*t++ == '\\' && (*t == '\\' || *t == *s))
5587 else if (*s == 'q') {
5590 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5593 /* skip q//-like construct */
5595 char open, close, term;
5598 while (t < PL_bufend && isSPACE(*t))
5600 /* check for q => */
5601 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5602 OPERATOR(HASHBRACK);
5606 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5610 for (t++; t < PL_bufend; t++) {
5611 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5613 else if (*t == open)
5617 for (t++; t < PL_bufend; t++) {
5618 if (*t == '\\' && t+1 < PL_bufend)
5620 else if (*t == close && --brackets <= 0)
5622 else if (*t == open)
5629 /* skip plain q word */
5630 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5633 else if (isALNUM_lazy_if(t,UTF)) {
5635 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5638 while (t < PL_bufend && isSPACE(*t))
5640 /* if comma follows first term, call it an anon hash */
5641 /* XXX it could be a comma expression with loop modifiers */
5642 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5643 || (*t == '=' && t[1] == '>')))
5644 OPERATOR(HASHBRACK);
5645 if (PL_expect == XREF)
5648 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5654 pl_yylval.ival = CopLINE(PL_curcop);
5655 if (isSPACE(*s) || *s == '#')
5656 PL_copline = NOLINE; /* invalidate current command line number */
5659 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5663 if (PL_lex_brackets <= 0)
5664 yyerror("Unmatched right curly bracket");
5666 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5667 PL_lex_allbrackets--;
5668 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
5669 PL_lex_formbrack = 0;
5670 if (PL_lex_state == LEX_INTERPNORMAL) {
5671 if (PL_lex_brackets == 0) {
5672 if (PL_expect & XFAKEBRACK) {
5673 PL_expect &= XENUMMASK;
5674 PL_lex_state = LEX_INTERPEND;
5679 PL_thiswhite = newSVpvs("");
5680 sv_catpvs(PL_thiswhite,"}");
5683 return yylex(); /* ignore fake brackets */
5685 if (*s == '-' && s[1] == '>')
5686 PL_lex_state = LEX_INTERPENDMAYBE;
5687 else if (*s != '[' && *s != '{')
5688 PL_lex_state = LEX_INTERPEND;
5691 if (PL_expect & XFAKEBRACK) {
5692 PL_expect &= XENUMMASK;
5694 return yylex(); /* ignore fake brackets */
5696 start_force(PL_curforce);
5698 curmad('X', newSVpvn(s-1,1));
5699 CURMAD('_', PL_thiswhite);
5704 PL_thistoken = newSVpvs("");
5710 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5711 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5718 if (PL_expect == XOPERATOR) {
5719 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5720 && isIDFIRST_lazy_if(s,UTF))
5722 CopLINE_dec(PL_curcop);
5723 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5724 CopLINE_inc(PL_curcop);
5726 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5727 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5734 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5736 PL_expect = XOPERATOR;
5737 force_ident(PL_tokenbuf, '&');
5741 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5747 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5748 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5755 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5756 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5764 const char tmp = *s++;
5766 if (!PL_lex_allbrackets &&
5767 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5774 if (!PL_lex_allbrackets &&
5775 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
5783 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5784 && strchr("+-*/%.^&|<",tmp))
5785 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5786 "Reversed %c= operator",(int)tmp);
5788 if (PL_expect == XSTATE && isALPHA(tmp) &&
5789 (s == PL_linestart+1 || s[-2] == '\n') )
5791 if (PL_in_eval && !PL_rsfp) {
5796 if (strnEQ(s,"=cut",4)) {
5812 PL_thiswhite = newSVpvs("");
5813 sv_catpvn(PL_thiswhite, PL_linestart,
5814 PL_bufend - PL_linestart);
5818 PL_parser->in_pod = 1;
5822 if (PL_lex_brackets < PL_lex_formbrack) {
5824 #ifdef PERL_STRICT_CR
5825 while (SPACE_OR_TAB(*t))
5827 while (SPACE_OR_TAB(*t) || *t == '\r')
5830 if (*t == '\n' || *t == '#') {
5836 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5845 const char tmp = *s++;
5847 /* was this !=~ where !~ was meant?
5848 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5850 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5851 const char *t = s+1;
5853 while (t < PL_bufend && isSPACE(*t))
5856 if (*t == '/' || *t == '?' ||
5857 ((*t == 'm' || *t == 's' || *t == 'y')
5858 && !isALNUM(t[1])) ||
5859 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5860 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5861 "!=~ should be !~");
5863 if (!PL_lex_allbrackets &&
5864 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5876 if (PL_expect != XOPERATOR) {
5877 if (s[1] != '<' && !strchr(s,'>'))
5880 s = scan_heredoc(s);
5882 s = scan_inputsymbol(s);
5883 TERM(sublex_start());
5889 if (*s == '=' && !PL_lex_allbrackets &&
5890 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5894 SHop(OP_LEFT_SHIFT);
5899 if (!PL_lex_allbrackets &&
5900 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5907 if (!PL_lex_allbrackets &&
5908 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5916 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5924 const char tmp = *s++;
5926 if (*s == '=' && !PL_lex_allbrackets &&
5927 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5931 SHop(OP_RIGHT_SHIFT);
5933 else if (tmp == '=') {
5934 if (!PL_lex_allbrackets &&
5935 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5943 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5952 if (PL_expect == XOPERATOR) {
5953 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
5954 return deprecate_commaless_var_list();
5958 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
5959 PL_tokenbuf[0] = '@';
5960 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
5961 sizeof PL_tokenbuf - 1, FALSE);
5962 if (PL_expect == XOPERATOR)
5963 no_op("Array length", s);
5964 if (!PL_tokenbuf[1])
5966 PL_expect = XOPERATOR;
5967 PL_pending_ident = '#';
5971 PL_tokenbuf[0] = '$';
5972 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5973 sizeof PL_tokenbuf - 1, FALSE);
5974 if (PL_expect == XOPERATOR)
5976 if (!PL_tokenbuf[1]) {
5978 yyerror("Final $ should be \\$ or $name");
5982 /* This kludge not intended to be bulletproof. */
5983 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
5984 pl_yylval.opval = newSVOP(OP_CONST, 0,
5985 newSViv(CopARYBASE_get(&PL_compiling)));
5986 pl_yylval.opval->op_private = OPpCONST_ARYBASE;
5992 const char tmp = *s;
5993 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5996 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5997 && intuit_more(s)) {
5999 PL_tokenbuf[0] = '@';
6000 if (ckWARN(WARN_SYNTAX)) {
6003 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
6006 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6007 while (t < PL_bufend && *t != ']')
6009 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6010 "Multidimensional syntax %.*s not supported",
6011 (int)((t - PL_bufptr) + 1), PL_bufptr);
6015 else if (*s == '{') {
6017 PL_tokenbuf[0] = '%';
6018 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6019 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6021 char tmpbuf[sizeof PL_tokenbuf];
6024 } while (isSPACE(*t));
6025 if (isIDFIRST_lazy_if(t,UTF)) {
6027 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6031 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
6032 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6033 "You need to quote \"%s\"",
6040 PL_expect = XOPERATOR;
6041 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6042 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6043 if (!islop || PL_last_lop_op == OP_GREPSTART)
6044 PL_expect = XOPERATOR;
6045 else if (strchr("$@\"'`q", *s))
6046 PL_expect = XTERM; /* e.g. print $fh "foo" */
6047 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6048 PL_expect = XTERM; /* e.g. print $fh &sub */
6049 else if (isIDFIRST_lazy_if(s,UTF)) {
6050 char tmpbuf[sizeof PL_tokenbuf];
6052 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6053 if ((t2 = keyword(tmpbuf, len, 0))) {
6054 /* binary operators exclude handle interpretations */
6066 PL_expect = XTERM; /* e.g. print $fh length() */
6071 PL_expect = XTERM; /* e.g. print $fh subr() */
6074 else if (isDIGIT(*s))
6075 PL_expect = XTERM; /* e.g. print $fh 3 */
6076 else if (*s == '.' && isDIGIT(s[1]))
6077 PL_expect = XTERM; /* e.g. print $fh .3 */
6078 else if ((*s == '?' || *s == '-' || *s == '+')
6079 && !isSPACE(s[1]) && s[1] != '=')
6080 PL_expect = XTERM; /* e.g. print $fh -1 */
6081 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6083 PL_expect = XTERM; /* e.g. print $fh /.../
6084 XXX except DORDOR operator
6086 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6088 PL_expect = XTERM; /* print $fh <<"EOF" */
6091 PL_pending_ident = '$';
6095 if (PL_expect == XOPERATOR)
6097 PL_tokenbuf[0] = '@';
6098 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6099 if (!PL_tokenbuf[1]) {
6102 if (PL_lex_state == LEX_NORMAL)
6104 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6106 PL_tokenbuf[0] = '%';
6108 /* Warn about @ where they meant $. */
6109 if (*s == '[' || *s == '{') {
6110 if (ckWARN(WARN_SYNTAX)) {
6111 const char *t = s + 1;
6112 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
6114 if (*t == '}' || *t == ']') {
6116 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6117 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6118 "Scalar value %.*s better written as $%.*s",
6119 (int)(t-PL_bufptr), PL_bufptr,
6120 (int)(t-PL_bufptr-1), PL_bufptr+1);
6125 PL_pending_ident = '@';
6128 case '/': /* may be division, defined-or, or pattern */
6129 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6130 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6131 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6136 case '?': /* may either be conditional or pattern */
6137 if (PL_expect == XOPERATOR) {
6140 if (!PL_lex_allbrackets &&
6141 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6145 PL_lex_allbrackets++;
6151 /* A // operator. */
6152 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6153 (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6154 LEX_FAKEEOF_LOGIC)) {
6162 if (*s == '=' && !PL_lex_allbrackets &&
6163 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6172 /* Disable warning on "study /blah/" */
6173 if (PL_oldoldbufptr == PL_last_uni
6174 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6175 || memNE(PL_last_uni, "study", 5)
6176 || isALNUM_lazy_if(PL_last_uni+5,UTF)
6180 deprecate("?PATTERN? without explicit operator");
6181 s = scan_pat(s,OP_MATCH);
6182 TERM(sublex_start());
6186 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6187 #ifdef PERL_STRICT_CR
6190 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6192 && (s == PL_linestart || s[-1] == '\n') )
6194 PL_lex_formbrack = 0;
6198 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6202 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6205 if (!PL_lex_allbrackets &&
6206 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6213 pl_yylval.ival = OPf_SPECIAL;
6219 if (*s == '=' && !PL_lex_allbrackets &&
6220 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6227 case '0': case '1': case '2': case '3': case '4':
6228 case '5': case '6': case '7': case '8': case '9':
6229 s = scan_num(s, &pl_yylval);
6230 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6231 if (PL_expect == XOPERATOR)
6236 s = scan_str(s,!!PL_madskills,FALSE);
6237 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6238 if (PL_expect == XOPERATOR) {
6239 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6240 return deprecate_commaless_var_list();
6247 pl_yylval.ival = OP_CONST;
6248 TERM(sublex_start());
6251 s = scan_str(s,!!PL_madskills,FALSE);
6252 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6253 if (PL_expect == XOPERATOR) {
6254 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6255 return deprecate_commaless_var_list();
6262 pl_yylval.ival = OP_CONST;
6263 /* FIXME. I think that this can be const if char *d is replaced by
6264 more localised variables. */
6265 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6266 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6267 pl_yylval.ival = OP_STRINGIFY;
6271 TERM(sublex_start());
6274 s = scan_str(s,!!PL_madskills,FALSE);
6275 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6276 if (PL_expect == XOPERATOR)
6277 no_op("Backticks",s);
6280 readpipe_override();
6281 TERM(sublex_start());
6285 if (PL_lex_inwhat && isDIGIT(*s))
6286 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6288 if (PL_expect == XOPERATOR)
6289 no_op("Backslash",s);
6293 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6294 char *start = s + 2;
6295 while (isDIGIT(*start) || *start == '_')
6297 if (*start == '.' && isDIGIT(start[1])) {
6298 s = scan_num(s, &pl_yylval);
6301 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6302 else if (!isALPHA(*start) && (PL_expect == XTERM
6303 || PL_expect == XREF || PL_expect == XSTATE
6304 || PL_expect == XTERMORDORDOR)) {
6305 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
6307 s = scan_num(s, &pl_yylval);
6314 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6357 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6359 /* Some keywords can be followed by any delimiter, including ':' */
6360 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6362 /* x::* is just a word, unless x is "CORE" */
6363 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6367 while (d < PL_bufend && isSPACE(*d))
6368 d++; /* no comments skipped here, or s### is misparsed */
6370 /* Is this a word before a => operator? */
6371 if (*d == '=' && d[1] == '>') {
6374 = (OP*)newSVOP(OP_CONST, 0,
6375 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6376 pl_yylval.opval->op_private = OPpCONST_BARE;
6380 /* Check for plugged-in keyword */
6384 char *saved_bufptr = PL_bufptr;
6386 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6388 if (result == KEYWORD_PLUGIN_DECLINE) {
6389 /* not a plugged-in keyword */
6390 PL_bufptr = saved_bufptr;
6391 } else if (result == KEYWORD_PLUGIN_STMT) {
6392 pl_yylval.opval = o;
6395 return REPORT(PLUGSTMT);
6396 } else if (result == KEYWORD_PLUGIN_EXPR) {
6397 pl_yylval.opval = o;
6399 PL_expect = XOPERATOR;
6400 return REPORT(PLUGEXPR);
6402 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6407 /* Check for built-in keyword */
6408 tmp = keyword(PL_tokenbuf, len, 0);
6410 /* Is this a label? */
6411 if (!anydelim && PL_expect == XSTATE
6412 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6414 pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6419 if (tmp < 0) { /* second-class keyword? */
6420 GV *ogv = NULL; /* override (winner) */
6421 GV *hgv = NULL; /* hidden (loser) */
6422 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6424 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
6427 if (GvIMPORTED_CV(gv))
6429 else if (! CvMETHOD(cv))
6433 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
6434 (gv = *gvp) && isGV_with_GP(gv) &&
6435 GvCVu(gv) && GvIMPORTED_CV(gv))
6442 tmp = 0; /* overridden by import or by GLOBAL */
6445 && -tmp==KEY_lock /* XXX generalizable kludge */
6448 tmp = 0; /* any sub overrides "weak" keyword */
6450 else { /* no override */
6452 if (tmp == KEY_dump) {
6453 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6454 "dump() better written as CORE::dump()");
6458 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6459 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6460 "Ambiguous call resolved as CORE::%s(), "
6461 "qualify as such or use &",
6469 default: /* not a keyword */
6470 /* Trade off - by using this evil construction we can pull the
6471 variable gv into the block labelled keylookup. If not, then
6472 we have to give it function scope so that the goto from the
6473 earlier ':' case doesn't bypass the initialisation. */
6475 just_a_word_zero_gv:
6483 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6487 SV *nextPL_nextwhite = 0;
6491 /* Get the rest if it looks like a package qualifier */
6493 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6495 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6498 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
6499 *s == '\'' ? "'" : "::");
6504 if (PL_expect == XOPERATOR) {
6505 if (PL_bufptr == PL_linestart) {
6506 CopLINE_dec(PL_curcop);
6507 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6508 CopLINE_inc(PL_curcop);
6511 no_op("Bareword",s);
6514 /* Look for a subroutine with this name in current package,
6515 unless name is "Foo::", in which case Foo is a bareword
6516 (and a package name). */
6518 if (len > 2 && !PL_madskills &&
6519 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6521 if (ckWARN(WARN_BAREWORD)
6522 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
6523 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6524 "Bareword \"%s\" refers to nonexistent package",
6527 PL_tokenbuf[len] = '\0';
6533 /* Mustn't actually add anything to a symbol table.
6534 But also don't want to "initialise" any placeholder
6535 constants that might already be there into full
6536 blown PVGVs with attached PVCV. */
6537 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6538 GV_NOADD_NOINIT, SVt_PVCV);
6543 /* if we saw a global override before, get the right name */
6545 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6546 len ? len : strlen(PL_tokenbuf));
6548 SV * const tmp_sv = sv;
6549 sv = newSVpvs("CORE::GLOBAL::");
6550 sv_catsv(sv, tmp_sv);
6551 SvREFCNT_dec(tmp_sv);
6555 if (PL_madskills && !PL_thistoken) {
6556 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
6557 PL_thistoken = newSVpvn(start,s - start);
6558 PL_realtokenstart = s - SvPVX(PL_linestr);
6562 /* Presume this is going to be a bareword of some sort. */
6564 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6565 pl_yylval.opval->op_private = OPpCONST_BARE;
6567 /* And if "Foo::", then that's what it certainly is. */
6572 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
6573 const_op->op_private = OPpCONST_BARE;
6574 rv2cv_op = newCVREF(0, const_op);
6576 cv = rv2cv_op_cv(rv2cv_op, 0);
6578 /* See if it's the indirect object for a list operator. */
6580 if (PL_oldoldbufptr &&
6581 PL_oldoldbufptr < PL_bufptr &&
6582 (PL_oldoldbufptr == PL_last_lop
6583 || PL_oldoldbufptr == PL_last_uni) &&
6584 /* NO SKIPSPACE BEFORE HERE! */
6585 (PL_expect == XREF ||
6586 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6588 bool immediate_paren = *s == '(';
6590 /* (Now we can afford to cross potential line boundary.) */
6591 s = SKIPSPACE2(s,nextPL_nextwhite);
6593 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
6596 /* Two barewords in a row may indicate method call. */
6598 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6599 (tmp = intuit_method(s, gv, cv))) {
6601 if (tmp == METHOD && !PL_lex_allbrackets &&
6602 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6603 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6607 /* If not a declared subroutine, it's an indirect object. */
6608 /* (But it's an indir obj regardless for sort.) */
6609 /* Also, if "_" follows a filetest operator, it's a bareword */
6612 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6614 (PL_last_lop_op != OP_MAPSTART &&
6615 PL_last_lop_op != OP_GREPSTART))))
6616 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6617 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6620 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6625 PL_expect = XOPERATOR;
6628 s = SKIPSPACE2(s,nextPL_nextwhite);
6629 PL_nextwhite = nextPL_nextwhite;
6634 /* Is this a word before a => operator? */
6635 if (*s == '=' && s[1] == '>' && !pkgname) {
6638 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6639 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6640 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6644 /* If followed by a paren, it's certainly a subroutine. */
6649 while (SPACE_OR_TAB(*d))
6651 if (*d == ')' && (sv = cv_const_sv(cv))) {
6658 PL_nextwhite = PL_thiswhite;
6661 start_force(PL_curforce);
6663 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6664 PL_expect = XOPERATOR;
6667 PL_nextwhite = nextPL_nextwhite;
6668 curmad('X', PL_thistoken);
6669 PL_thistoken = newSVpvs("");
6678 /* If followed by var or block, call it a method (unless sub) */
6680 if ((*s == '$' || *s == '{') && !cv) {
6682 PL_last_lop = PL_oldbufptr;
6683 PL_last_lop_op = OP_METHOD;
6684 if (!PL_lex_allbrackets &&
6685 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6686 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6690 /* If followed by a bareword, see if it looks like indir obj. */
6693 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6694 && (tmp = intuit_method(s, gv, cv))) {
6696 if (tmp == METHOD && !PL_lex_allbrackets &&
6697 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6698 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6702 /* Not a method, so call it a subroutine (if defined) */
6705 if (lastchar == '-')
6706 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6707 "Ambiguous use of -%s resolved as -&%s()",
6708 PL_tokenbuf, PL_tokenbuf);
6709 /* Check for a constant sub */
6710 if ((sv = cv_const_sv(cv))) {
6713 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6714 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6715 pl_yylval.opval->op_private = 0;
6716 pl_yylval.opval->op_flags |= OPf_SPECIAL;
6720 op_free(pl_yylval.opval);
6721 pl_yylval.opval = rv2cv_op;
6722 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6723 PL_last_lop = PL_oldbufptr;
6724 PL_last_lop_op = OP_ENTERSUB;
6725 /* Is there a prototype? */
6733 const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
6736 while (*proto == ';')
6741 *proto == '$' || *proto == '_'
6742 || *proto == '*' || *proto == '+'
6747 *proto == '\\' && proto[1] && proto[2] == '\0'
6751 if (*proto == '\\' && proto[1] == '[') {
6752 const char *p = proto + 2;
6753 while(*p && *p != ']')
6755 if(*p == ']' && !p[1]) OPERATOR(UNIOPSUB);
6757 if (*proto == '&' && *s == '{') {
6759 sv_setpvs(PL_subname, "__ANON__");
6761 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6762 if (!PL_lex_allbrackets &&
6763 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6764 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6771 PL_nextwhite = PL_thiswhite;
6774 start_force(PL_curforce);
6775 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6778 PL_nextwhite = nextPL_nextwhite;
6779 curmad('X', PL_thistoken);
6780 PL_thistoken = newSVpvs("");
6783 if (!PL_lex_allbrackets &&
6784 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6785 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6790 /* Guess harder when madskills require "best effort". */
6791 if (PL_madskills && (!gv || !GvCVu(gv))) {
6792 int probable_sub = 0;
6793 if (strchr("\"'`$@%0123456789!*+{[<", *s))
6795 else if (isALPHA(*s)) {
6799 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
6800 if (!keyword(tmpbuf, tmplen, 0))
6803 while (d < PL_bufend && isSPACE(*d))
6805 if (*d == '=' && d[1] == '>')
6810 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6811 op_free(pl_yylval.opval);
6812 pl_yylval.opval = rv2cv_op;
6813 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6814 PL_last_lop = PL_oldbufptr;
6815 PL_last_lop_op = OP_ENTERSUB;
6816 PL_nextwhite = PL_thiswhite;
6818 start_force(PL_curforce);
6819 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6821 PL_nextwhite = nextPL_nextwhite;
6822 curmad('X', PL_thistoken);
6823 PL_thistoken = newSVpvs("");
6825 if (!PL_lex_allbrackets &&
6826 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6827 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6831 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6834 if (!PL_lex_allbrackets &&
6835 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6836 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6841 /* Call it a bare word */
6843 if (PL_hints & HINT_STRICT_SUBS)
6844 pl_yylval.opval->op_private |= OPpCONST_STRICT;
6847 /* after "print" and similar functions (corresponding to
6848 * "F? L" in opcode.pl), whatever wasn't already parsed as
6849 * a filehandle should be subject to "strict subs".
6850 * Likewise for the optional indirect-object argument to system
6851 * or exec, which can't be a bareword */
6852 if ((PL_last_lop_op == OP_PRINT
6853 || PL_last_lop_op == OP_PRTF
6854 || PL_last_lop_op == OP_SAY
6855 || PL_last_lop_op == OP_SYSTEM
6856 || PL_last_lop_op == OP_EXEC)
6857 && (PL_hints & HINT_STRICT_SUBS))
6858 pl_yylval.opval->op_private |= OPpCONST_STRICT;
6859 if (lastchar != '-') {
6860 if (ckWARN(WARN_RESERVED)) {
6864 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
6865 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
6873 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6874 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6875 "Operator or semicolon missing before %c%s",
6876 lastchar, PL_tokenbuf);
6877 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6878 "Ambiguous use of %c resolved as operator %c",
6879 lastchar, lastchar);
6885 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6886 newSVpv(CopFILE(PL_curcop),0));
6890 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6891 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
6894 case KEY___PACKAGE__:
6895 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6897 ? newSVhek(HvNAME_HEK(PL_curstash))
6904 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
6905 const char *pname = "main";
6906 if (PL_tokenbuf[2] == 'D')
6907 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
6908 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6912 GvIOp(gv) = newIO();
6913 IoIFP(GvIOp(gv)) = PL_rsfp;
6914 #if defined(HAS_FCNTL) && defined(F_SETFD)
6916 const int fd = PerlIO_fileno(PL_rsfp);
6917 fcntl(fd,F_SETFD,fd >= 3);
6920 /* Mark this internal pseudo-handle as clean */
6921 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6922 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6923 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6925 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6926 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6927 /* if the script was opened in binmode, we need to revert
6928 * it to text mode for compatibility; but only iff it has CRs
6929 * XXX this is a questionable hack at best. */
6930 if (PL_bufend-PL_bufptr > 2
6931 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6934 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6935 loc = PerlIO_tell(PL_rsfp);
6936 (void)PerlIO_seek(PL_rsfp, 0L, 0);
6939 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6941 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6942 #endif /* NETWARE */
6943 #ifdef PERLIO_IS_STDIO /* really? */
6944 # if defined(__BORLANDC__)
6945 /* XXX see note in do_binmode() */
6946 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
6950 PerlIO_seek(PL_rsfp, loc, 0);
6954 #ifdef PERLIO_LAYERS
6957 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6958 else if (PL_encoding) {
6965 XPUSHs(PL_encoding);
6967 call_method("name", G_SCALAR);
6971 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
6972 Perl_form(aTHX_ ":encoding(%"SVf")",
6981 if (PL_realtokenstart >= 0) {
6982 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6984 PL_endwhite = newSVpvs("");
6985 sv_catsv(PL_endwhite, PL_thiswhite);
6987 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
6988 PL_realtokenstart = -1;
6990 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7006 if (PL_expect == XSTATE) {
7013 if (*s == ':' && s[1] == ':') {
7016 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7017 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
7018 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
7021 else if (tmp == KEY_require || tmp == KEY_do)
7022 /* that's a way to remember we saw "CORE::" */
7035 LOP(OP_ACCEPT,XTERM);
7038 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7043 LOP(OP_ATAN2,XTERM);
7049 LOP(OP_BINMODE,XTERM);
7052 LOP(OP_BLESS,XTERM);
7061 /* When 'use switch' is in effect, continue has a dual
7062 life as a control operator. */
7064 if (!FEATURE_IS_ENABLED("switch"))
7067 /* We have to disambiguate the two senses of
7068 "continue". If the next token is a '{' then
7069 treat it as the start of a continue block;
7070 otherwise treat it as a control operator.
7082 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7092 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7101 if (!PL_cryptseen) {
7102 PL_cryptseen = TRUE;
7106 LOP(OP_CRYPT,XTERM);
7109 LOP(OP_CHMOD,XTERM);
7112 LOP(OP_CHOWN,XTERM);
7115 LOP(OP_CONNECT,XTERM);
7134 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7135 if (orig_keyword == KEY_do) {
7144 PL_hints |= HINT_BLOCK_SCOPE;
7154 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7155 STR_WITH_LEN("NDBM_File::"),
7156 STR_WITH_LEN("DB_File::"),
7157 STR_WITH_LEN("GDBM_File::"),
7158 STR_WITH_LEN("SDBM_File::"),
7159 STR_WITH_LEN("ODBM_File::"),
7161 LOP(OP_DBMOPEN,XTERM);
7167 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7174 pl_yylval.ival = CopLINE(PL_curcop);
7178 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7192 if (*s == '{') { /* block eval */
7193 PL_expect = XTERMBLOCK;
7194 UNIBRACK(OP_ENTERTRY);
7196 else { /* string eval */
7198 UNIBRACK(OP_ENTEREVAL);
7213 case KEY_endhostent:
7219 case KEY_endservent:
7222 case KEY_endprotoent:
7233 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7235 pl_yylval.ival = CopLINE(PL_curcop);
7237 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7240 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7243 if ((PL_bufend - p) >= 3 &&
7244 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7246 else if ((PL_bufend - p) >= 4 &&
7247 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7250 if (isIDFIRST_lazy_if(p,UTF)) {
7251 p = scan_ident(p, PL_bufend,
7252 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
7256 Perl_croak(aTHX_ "Missing $ on loop variable");
7258 s = SvPVX(PL_linestr) + soff;
7264 LOP(OP_FORMLINE,XTERM);
7270 LOP(OP_FCNTL,XTERM);
7276 LOP(OP_FLOCK,XTERM);
7279 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7284 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7289 LOP(OP_GREPSTART, XREF);
7292 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7307 case KEY_getpriority:
7308 LOP(OP_GETPRIORITY,XTERM);
7310 case KEY_getprotobyname:
7313 case KEY_getprotobynumber:
7314 LOP(OP_GPBYNUMBER,XTERM);
7316 case KEY_getprotoent:
7328 case KEY_getpeername:
7329 UNI(OP_GETPEERNAME);
7331 case KEY_gethostbyname:
7334 case KEY_gethostbyaddr:
7335 LOP(OP_GHBYADDR,XTERM);
7337 case KEY_gethostent:
7340 case KEY_getnetbyname:
7343 case KEY_getnetbyaddr:
7344 LOP(OP_GNBYADDR,XTERM);
7349 case KEY_getservbyname:
7350 LOP(OP_GSBYNAME,XTERM);
7352 case KEY_getservbyport:
7353 LOP(OP_GSBYPORT,XTERM);
7355 case KEY_getservent:
7358 case KEY_getsockname:
7359 UNI(OP_GETSOCKNAME);
7361 case KEY_getsockopt:
7362 LOP(OP_GSOCKOPT,XTERM);
7377 pl_yylval.ival = CopLINE(PL_curcop);
7387 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7389 pl_yylval.ival = CopLINE(PL_curcop);
7393 LOP(OP_INDEX,XTERM);
7399 LOP(OP_IOCTL,XTERM);
7411 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7428 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7433 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7447 LOP(OP_LISTEN,XTERM);
7456 s = scan_pat(s,OP_MATCH);
7457 TERM(sublex_start());
7460 LOP(OP_MAPSTART, XREF);
7463 LOP(OP_MKDIR,XTERM);
7466 LOP(OP_MSGCTL,XTERM);
7469 LOP(OP_MSGGET,XTERM);
7472 LOP(OP_MSGRCV,XTERM);
7475 LOP(OP_MSGSND,XTERM);
7480 PL_in_my = (U16)tmp;
7482 if (isIDFIRST_lazy_if(s,UTF)) {
7486 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7487 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7489 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7490 if (!PL_in_my_stash) {
7493 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7497 if (PL_madskills) { /* just add type to declarator token */
7498 sv_catsv(PL_thistoken, PL_nextwhite);
7500 sv_catpvn(PL_thistoken, start, s - start);
7508 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7512 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7517 s = tokenize_use(0, s);
7521 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
7524 if (!PL_lex_allbrackets &&
7525 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7526 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7532 if (isIDFIRST_lazy_if(s,UTF)) {
7534 for (d = s; isALNUM_lazy_if(d,UTF);)
7536 for (t=d; isSPACE(*t);)
7538 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7540 && !(t[0] == '=' && t[1] == '>')
7542 int parms_len = (int)(d-s);
7543 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7544 "Precedence problem: open %.*s should be open(%.*s)",
7545 parms_len, s, parms_len, s);
7551 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7553 pl_yylval.ival = OP_OR;
7563 LOP(OP_OPEN_DIR,XTERM);
7566 checkcomma(s,PL_tokenbuf,"filehandle");
7570 checkcomma(s,PL_tokenbuf,"filehandle");
7589 s = force_word(s,WORD,FALSE,TRUE,FALSE);
7591 s = force_strict_version(s);
7592 PL_lex_expect = XBLOCK;
7596 LOP(OP_PIPE_OP,XTERM);
7599 s = scan_str(s,!!PL_madskills,FALSE);
7602 pl_yylval.ival = OP_CONST;
7603 TERM(sublex_start());
7610 s = scan_str(s,!!PL_madskills,FALSE);
7613 PL_expect = XOPERATOR;
7614 if (SvCUR(PL_lex_stuff)) {
7616 d = SvPV_force(PL_lex_stuff, len);
7618 for (; isSPACE(*d) && len; --len, ++d)
7623 if (!warned && ckWARN(WARN_QW)) {
7624 for (; !isSPACE(*d) && len; --len, ++d) {
7626 Perl_warner(aTHX_ packWARN(WARN_QW),
7627 "Possible attempt to separate words with commas");
7630 else if (*d == '#') {
7631 Perl_warner(aTHX_ packWARN(WARN_QW),
7632 "Possible attempt to put comments in qw() list");
7638 for (; !isSPACE(*d) && len; --len, ++d)
7641 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7642 words = op_append_elem(OP_LIST, words,
7643 newSVOP(OP_CONST, 0, tokeq(sv)));
7648 words = newNULLLIST();
7650 SvREFCNT_dec(PL_lex_stuff);
7651 PL_lex_stuff = NULL;
7653 PL_expect = XOPERATOR;
7654 pl_yylval.opval = sawparens(words);
7659 s = scan_str(s,!!PL_madskills,FALSE);
7662 pl_yylval.ival = OP_STRINGIFY;
7663 if (SvIVX(PL_lex_stuff) == '\'')
7664 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
7665 TERM(sublex_start());
7668 s = scan_pat(s,OP_QR);
7669 TERM(sublex_start());
7672 s = scan_str(s,!!PL_madskills,FALSE);
7675 readpipe_override();
7676 TERM(sublex_start());
7684 s = force_version(s, FALSE);
7686 else if (*s != 'v' || !isDIGIT(s[1])
7687 || (s = force_version(s, TRUE), *s == 'v'))
7689 *PL_tokenbuf = '\0';
7690 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7691 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7692 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
7694 yyerror("<> should be quotes");
7696 if (orig_keyword == KEY_require) {
7704 PL_last_uni = PL_oldbufptr;
7705 PL_last_lop_op = OP_REQUIRE;
7707 return REPORT( (int)REQUIRE );
7713 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7717 LOP(OP_RENAME,XTERM);
7726 LOP(OP_RINDEX,XTERM);
7735 UNIDOR(OP_READLINE);
7738 UNIDOR(OP_BACKTICK);
7747 LOP(OP_REVERSE,XTERM);
7750 UNIDOR(OP_READLINK);
7757 if (pl_yylval.opval)
7758 TERM(sublex_start());
7760 TOKEN(1); /* force error */
7763 checkcomma(s,PL_tokenbuf,"filehandle");
7773 LOP(OP_SELECT,XTERM);
7779 LOP(OP_SEMCTL,XTERM);
7782 LOP(OP_SEMGET,XTERM);
7785 LOP(OP_SEMOP,XTERM);
7791 LOP(OP_SETPGRP,XTERM);
7793 case KEY_setpriority:
7794 LOP(OP_SETPRIORITY,XTERM);
7796 case KEY_sethostent:
7802 case KEY_setservent:
7805 case KEY_setprotoent:
7815 LOP(OP_SEEKDIR,XTERM);
7817 case KEY_setsockopt:
7818 LOP(OP_SSOCKOPT,XTERM);
7824 LOP(OP_SHMCTL,XTERM);
7827 LOP(OP_SHMGET,XTERM);
7830 LOP(OP_SHMREAD,XTERM);
7833 LOP(OP_SHMWRITE,XTERM);
7836 LOP(OP_SHUTDOWN,XTERM);
7845 LOP(OP_SOCKET,XTERM);
7847 case KEY_socketpair:
7848 LOP(OP_SOCKPAIR,XTERM);
7851 checkcomma(s,PL_tokenbuf,"subroutine name");
7853 if (*s == ';' || *s == ')') /* probably a close */
7854 Perl_croak(aTHX_ "sort is now a reserved word");
7856 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7860 LOP(OP_SPLIT,XTERM);
7863 LOP(OP_SPRINTF,XTERM);
7866 LOP(OP_SPLICE,XTERM);
7881 LOP(OP_SUBSTR,XTERM);
7887 char tmpbuf[sizeof PL_tokenbuf];
7888 SSize_t tboffset = 0;
7889 expectation attrful;
7890 bool have_name, have_proto;
7891 const int key = tmp;
7896 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7897 SV *subtoken = newSVpvn(tstart, s - tstart);
7901 s = SKIPSPACE2(s,tmpwhite);
7906 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
7907 (*s == ':' && s[1] == ':'))
7910 SV *nametoke = NULL;
7914 attrful = XATTRBLOCK;
7915 /* remember buffer pos'n for later force_word */
7916 tboffset = s - PL_oldbufptr;
7917 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
7920 nametoke = newSVpvn(s, d - s);
7922 if (memchr(tmpbuf, ':', len))
7923 sv_setpvn(PL_subname, tmpbuf, len);
7925 sv_setsv(PL_subname,PL_curstname);
7926 sv_catpvs(PL_subname,"::");
7927 sv_catpvn(PL_subname,tmpbuf,len);
7934 CURMAD('X', nametoke);
7935 CURMAD('_', tmpwhite);
7936 (void) force_word(PL_oldbufptr + tboffset, WORD,
7939 s = SKIPSPACE2(d,tmpwhite);
7946 Perl_croak(aTHX_ "Missing name in \"my sub\"");
7947 PL_expect = XTERMBLOCK;
7948 attrful = XATTRTERM;
7949 sv_setpvs(PL_subname,"?");
7953 if (key == KEY_format) {
7955 PL_lex_formbrack = PL_lex_brackets + 1;
7957 PL_thistoken = subtoken;
7961 (void) force_word(PL_oldbufptr + tboffset, WORD,
7967 /* Look for a prototype */
7970 bool bad_proto = FALSE;
7971 bool in_brackets = FALSE;
7972 char greedy_proto = ' ';
7973 bool proto_after_greedy_proto = FALSE;
7974 bool must_be_last = FALSE;
7975 bool underscore = FALSE;
7976 bool seen_underscore = FALSE;
7977 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
7979 s = scan_str(s,!!PL_madskills,FALSE);
7981 Perl_croak(aTHX_ "Prototype not terminated");
7982 /* strip spaces and check for bad characters */
7983 d = SvPVX(PL_lex_stuff);
7985 for (p = d; *p; ++p) {
7989 if (warnillegalproto) {
7991 proto_after_greedy_proto = TRUE;
7992 if (!strchr("$@%*;[]&\\_+", *p)) {
8004 else if ( *p == ']' ) {
8005 in_brackets = FALSE;
8007 else if ( (*p == '@' || *p == '%') &&
8008 ( tmp < 2 || d[tmp-2] != '\\' ) &&
8010 must_be_last = TRUE;
8013 else if ( *p == '_' ) {
8014 underscore = seen_underscore = TRUE;
8021 if (proto_after_greedy_proto)
8022 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8023 "Prototype after '%c' for %"SVf" : %s",
8024 greedy_proto, SVfARG(PL_subname), d);
8026 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8027 "Illegal character %sin prototype for %"SVf" : %s",
8028 seen_underscore ? "after '_' " : "",
8029 SVfARG(PL_subname), d);
8030 SvCUR_set(PL_lex_stuff, tmp);
8035 CURMAD('q', PL_thisopen);
8036 CURMAD('_', tmpwhite);
8037 CURMAD('=', PL_thisstuff);
8038 CURMAD('Q', PL_thisclose);
8039 NEXTVAL_NEXTTOKE.opval =
8040 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8041 PL_lex_stuff = NULL;
8044 s = SKIPSPACE2(s,tmpwhite);
8052 if (*s == ':' && s[1] != ':')
8053 PL_expect = attrful;
8054 else if (*s != '{' && key == KEY_sub) {
8056 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8057 else if (*s != ';' && *s != '}')
8058 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8065 curmad('^', newSVpvs(""));
8066 CURMAD('_', tmpwhite);
8070 PL_thistoken = subtoken;
8073 NEXTVAL_NEXTTOKE.opval =
8074 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8075 PL_lex_stuff = NULL;
8081 sv_setpvs(PL_subname, "__ANON__");
8083 sv_setpvs(PL_subname, "__ANON__::__ANON__");
8087 (void) force_word(PL_oldbufptr + tboffset, WORD,
8096 LOP(OP_SYSTEM,XREF);
8099 LOP(OP_SYMLINK,XTERM);
8102 LOP(OP_SYSCALL,XTERM);
8105 LOP(OP_SYSOPEN,XTERM);
8108 LOP(OP_SYSSEEK,XTERM);
8111 LOP(OP_SYSREAD,XTERM);
8114 LOP(OP_SYSWRITE,XTERM);
8118 TERM(sublex_start());
8139 LOP(OP_TRUNCATE,XTERM);
8151 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8153 pl_yylval.ival = CopLINE(PL_curcop);
8157 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8159 pl_yylval.ival = CopLINE(PL_curcop);
8163 LOP(OP_UNLINK,XTERM);
8169 LOP(OP_UNPACK,XTERM);
8172 LOP(OP_UTIME,XTERM);
8178 LOP(OP_UNSHIFT,XTERM);
8181 s = tokenize_use(1, s);
8191 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8193 pl_yylval.ival = CopLINE(PL_curcop);
8197 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8199 pl_yylval.ival = CopLINE(PL_curcop);
8203 PL_hints |= HINT_BLOCK_SCOPE;
8210 LOP(OP_WAITPID,XTERM);
8219 ctl_l[0] = toCTRL('L');
8221 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
8224 /* Make sure $^L is defined */
8225 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
8230 if (PL_expect == XOPERATOR) {
8231 if (*s == '=' && !PL_lex_allbrackets &&
8232 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8240 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8242 pl_yylval.ival = OP_XOR;
8247 TERM(sublex_start());
8252 #pragma segment Main
8256 S_pending_ident(pTHX)
8261 /* pit holds the identifier we read and pending_ident is reset */
8262 char pit = PL_pending_ident;
8263 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8264 /* All routes through this function want to know if there is a colon. */
8265 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8266 PL_pending_ident = 0;
8268 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8269 DEBUG_T({ PerlIO_printf(Perl_debug_log,
8270 "### Pending identifier '%s'\n", PL_tokenbuf); });
8272 /* if we're in a my(), we can't allow dynamics here.
8273 $foo'bar has already been turned into $foo::bar, so
8274 just check for colons.
8276 if it's a legal name, the OP is a PADANY.
8279 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
8281 yyerror(Perl_form(aTHX_ "No package name allowed for "
8282 "variable %s in \"our\"",
8284 tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8288 yyerror(Perl_form(aTHX_ PL_no_myglob,
8289 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8291 pl_yylval.opval = newOP(OP_PADANY, 0);
8292 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8298 build the ops for accesses to a my() variable.
8300 Deny my($a) or my($b) in a sort block, *if* $a or $b is
8301 then used in a comparison. This catches most, but not
8302 all cases. For instance, it catches
8303 sort { my($a); $a <=> $b }
8305 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
8306 (although why you'd do that is anyone's guess).
8311 tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
8312 if (tmp != NOT_IN_PAD) {
8313 /* might be an "our" variable" */
8314 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8315 /* build ops for a bareword */
8316 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8317 HEK * const stashname = HvNAME_HEK(stash);
8318 SV * const sym = newSVhek(stashname);
8319 sv_catpvs(sym, "::");
8320 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
8321 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8322 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8325 ? (GV_ADDMULTI | GV_ADDINEVAL)
8328 ((PL_tokenbuf[0] == '$') ? SVt_PV
8329 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8334 /* if it's a sort block and they're naming $a or $b */
8335 if (PL_last_lop_op == OP_SORT &&
8336 PL_tokenbuf[0] == '$' &&
8337 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8340 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8341 d < PL_bufend && *d != '\n';
8344 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8345 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8351 pl_yylval.opval = newOP(OP_PADANY, 0);
8352 pl_yylval.opval->op_targ = tmp;
8358 Whine if they've said @foo in a doublequoted string,
8359 and @foo isn't a variable we can find in the symbol
8362 if (ckWARN(WARN_AMBIGUOUS) &&
8363 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8364 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
8366 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8367 /* DO NOT warn for @- and @+ */
8368 && !( PL_tokenbuf[2] == '\0' &&
8369 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8372 /* Downgraded from fatal to warning 20000522 mjd */
8373 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8374 "Possible unintended interpolation of %s in string",
8379 /* build ops for a bareword */
8380 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
8382 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8383 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8384 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
8385 ((PL_tokenbuf[0] == '$') ? SVt_PV
8386 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8392 * The following code was generated by perl_keyword.pl.
8396 Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
8400 PERL_ARGS_ASSERT_KEYWORD;
8404 case 1: /* 5 tokens of length 1 */
8436 case 2: /* 18 tokens of length 2 */
8582 case 3: /* 29 tokens of length 3 */
8586 if (name[1] == 'N' &&
8649 if (name[1] == 'i' &&
8681 if (name[1] == 'o' &&
8690 if (name[1] == 'e' &&
8699 if (name[1] == 'n' &&
8708 if (name[1] == 'o' &&
8717 if (name[1] == 'a' &&
8726 if (name[1] == 'o' &&
8788 if (name[1] == 'e' &&
8802 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
8828 if (name[1] == 'i' &&
8837 if (name[1] == 's' &&
8846 if (name[1] == 'e' &&
8855 if (name[1] == 'o' &&
8867 case 4: /* 41 tokens of length 4 */
8871 if (name[1] == 'O' &&
8881 if (name[1] == 'N' &&
8891 if (name[1] == 'i' &&
8901 if (name[1] == 'h' &&
8911 if (name[1] == 'u' &&
8924 if (name[2] == 'c' &&
8933 if (name[2] == 's' &&
8942 if (name[2] == 'a' &&
8978 if (name[1] == 'o' &&
8991 if (name[2] == 't' &&
9000 if (name[2] == 'o' &&
9009 if (name[2] == 't' &&
9018 if (name[2] == 'e' &&
9031 if (name[1] == 'o' &&
9044 if (name[2] == 'y' &&
9053 if (name[2] == 'l' &&
9069 if (name[2] == 's' &&
9078 if (name[2] == 'n' &&
9087 if (name[2] == 'c' &&
9100 if (name[1] == 'e' &&
9110 if (name[1] == 'p' &&
9123 if (name[2] == 'c' &&
9132 if (name[2] == 'p' &&
9141 if (name[2] == 's' &&
9157 if (name[2] == 'n' &&
9227 if (name[2] == 'r' &&
9236 if (name[2] == 'r' &&
9245 if (name[2] == 'a' &&
9261 if (name[2] == 'l' &&
9323 if (name[2] == 'e' &&
9326 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
9339 case 5: /* 39 tokens of length 5 */
9343 if (name[1] == 'E' &&
9354 if (name[1] == 'H' &&
9368 if (name[2] == 'a' &&
9378 if (name[2] == 'a' &&
9395 if (name[2] == 'e' &&
9405 if (name[2] == 'e' &&
9409 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
9425 if (name[3] == 'i' &&
9434 if (name[3] == 'o' &&
9470 if (name[2] == 'o' &&
9480 if (name[2] == 'y' &&
9494 if (name[1] == 'l' &&
9508 if (name[2] == 'n' &&
9518 if (name[2] == 'o' &&
9532 if (name[1] == 'i' &&
9537 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
9546 if (name[2] == 'd' &&
9556 if (name[2] == 'c' &&
9573 if (name[2] == 'c' &&
9583 if (name[2] == 't' &&
9597 if (name[1] == 'k' &&
9608 if (name[1] == 'r' &&
9622 if (name[2] == 's' &&
9632 if (name[2] == 'd' &&
9649 if (name[2] == 'm' &&
9659 if (name[2] == 'i' &&
9669 if (name[2] == 'e' &&
9679 if (name[2] == 'l' &&
9689 if (name[2] == 'a' &&
9702 if (name[3] == 't' &&
9705 return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
9711 if (name[3] == 'd' &&
9728 if (name[1] == 'i' &&
9742 if (name[2] == 'a' &&
9755 if (name[3] == 'e' &&
9790 if (name[2] == 'i' &&
9807 if (name[2] == 'i' &&
9817 if (name[2] == 'i' &&
9834 case 6: /* 33 tokens of length 6 */
9838 if (name[1] == 'c' &&
9853 if (name[2] == 'l' &&
9864 if (name[2] == 'r' &&
9879 if (name[1] == 'e' &&
9894 if (name[2] == 's' &&
9899 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
9905 if (name[2] == 'i' &&
9923 if (name[2] == 'l' &&
9934 if (name[2] == 'r' &&
9949 if (name[1] == 'm' &&
9964 if (name[2] == 'n' &&
9975 if (name[2] == 's' &&
9990 if (name[1] == 's' &&
9996 if (name[4] == 't' &&
10005 if (name[4] == 'e' &&
10008 return -KEY_msgget;
10014 if (name[4] == 'c' &&
10017 return -KEY_msgrcv;
10023 if (name[4] == 'n' &&
10026 return -KEY_msgsnd;
10039 if (name[1] == 'r' &&
10057 if (name[3] == 'a' &&
10061 return -KEY_rename;
10067 if (name[3] == 'u' &&
10081 if (name[2] == 'n' &&
10086 return -KEY_rindex;
10099 if (name[2] == 'a' &&
10113 if (name[3] == 'e' &&
10117 return -KEY_select;
10126 if (name[4] == 't' &&
10129 return -KEY_semctl;
10135 if (name[4] == 'e' &&
10138 return -KEY_semget;
10152 if (name[2] == 'm')
10157 if (name[4] == 't' &&
10160 return -KEY_shmctl;
10166 if (name[4] == 'e' &&
10169 return -KEY_shmget;
10182 if (name[2] == 'c' &&
10187 return -KEY_socket;
10193 if (name[2] == 'l' &&
10198 return -KEY_splice;
10204 if (name[2] == 'b' &&
10209 return -KEY_substr;
10215 if (name[2] == 's' &&
10220 return -KEY_system;
10230 if (name[1] == 'n')
10238 if (name[4] == 's' &&
10247 if (name[4] == 'n' &&
10250 return -KEY_unlink;
10260 if (name[3] == 'a' &&
10264 return -KEY_unpack;
10277 if (name[1] == 'a' &&
10283 return -KEY_values;
10292 case 7: /* 29 tokens of length 7 */
10296 if (name[1] == 'E' &&
10303 return KEY_DESTROY;
10309 if (name[1] == '_' &&
10316 return KEY___END__;
10322 if (name[1] == 'i' &&
10329 return -KEY_binmode;
10335 if (name[1] == 'o' &&
10342 return -KEY_connect;
10351 if (name[2] == 'm' &&
10357 return -KEY_dbmopen;
10363 if (name[2] == 'f')
10368 if (name[4] == 'u' &&
10372 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
10378 if (name[4] == 'n' &&
10382 return KEY_defined;
10399 if (name[1] == 'o' &&
10406 return KEY_foreach;
10412 if (name[1] == 'e' &&
10419 if (name[5] == 'r' &&
10422 return -KEY_getpgrp;
10428 if (name[5] == 'i' &&
10431 return -KEY_getppid;
10444 if (name[1] == 'c' &&
10451 return -KEY_lcfirst;
10457 if (name[1] == 'p' &&
10464 return -KEY_opendir;
10470 if (name[1] == 'a' &&
10477 return KEY_package;
10483 if (name[1] == 'e')
10488 if (name[3] == 'd' &&
10493 return -KEY_readdir;
10499 if (name[3] == 'u' &&
10504 return KEY_require;
10510 if (name[3] == 'e' &&
10515 return -KEY_reverse;
10534 if (name[3] == 'k' &&
10539 return -KEY_seekdir;
10545 if (name[3] == 'p' &&
10550 return -KEY_setpgrp;
10560 if (name[2] == 'm' &&
10566 return -KEY_shmread;
10572 if (name[2] == 'r' &&
10578 return -KEY_sprintf;
10587 if (name[3] == 'l' &&
10592 return -KEY_symlink;
10601 if (name[4] == 'a' &&
10605 return -KEY_syscall;
10611 if (name[4] == 'p' &&
10615 return -KEY_sysopen;
10621 if (name[4] == 'e' &&
10625 return -KEY_sysread;
10631 if (name[4] == 'e' &&
10635 return -KEY_sysseek;
10653 if (name[1] == 'e' &&
10660 return -KEY_telldir;
10669 if (name[2] == 'f' &&
10675 return -KEY_ucfirst;
10681 if (name[2] == 's' &&
10687 return -KEY_unshift;
10697 if (name[1] == 'a' &&
10704 return -KEY_waitpid;
10713 case 8: /* 26 tokens of length 8 */
10717 if (name[1] == 'U' &&
10725 return KEY_AUTOLOAD;
10731 if (name[1] == '_')
10736 if (name[3] == 'A' &&
10742 return KEY___DATA__;
10748 if (name[3] == 'I' &&
10754 return -KEY___FILE__;
10760 if (name[3] == 'I' &&
10766 return -KEY___LINE__;
10782 if (name[2] == 'o' &&
10789 return -KEY_closedir;
10795 if (name[2] == 'n' &&
10802 return -KEY_continue;
10812 if (name[1] == 'b' &&
10820 return -KEY_dbmclose;
10826 if (name[1] == 'n' &&
10832 if (name[4] == 'r' &&
10837 return -KEY_endgrent;
10843 if (name[4] == 'w' &&
10848 return -KEY_endpwent;
10861 if (name[1] == 'o' &&
10869 return -KEY_formline;
10875 if (name[1] == 'e' &&
10881 if (name[4] == 'r')
10886 if (name[6] == 'n' &&
10889 return -KEY_getgrent;
10895 if (name[6] == 'i' &&
10898 return -KEY_getgrgid;
10904 if (name[6] == 'a' &&
10907 return -KEY_getgrnam;
10920 if (name[4] == 'o' &&
10925 return -KEY_getlogin;
10931 if (name[4] == 'w')
10936 if (name[6] == 'n' &&
10939 return -KEY_getpwent;
10945 if (name[6] == 'a' &&
10948 return -KEY_getpwnam;
10954 if (name[6] == 'i' &&
10957 return -KEY_getpwuid;
10977 if (name[1] == 'e' &&
10984 if (name[5] == 'i' &&
10991 return -KEY_readline;
10996 return -KEY_readlink;
11007 if (name[5] == 'i' &&
11011 return -KEY_readpipe;
11027 if (name[2] == 't')
11032 if (name[4] == 'r' &&
11037 return -KEY_setgrent;
11043 if (name[4] == 'w' &&
11048 return -KEY_setpwent;
11064 if (name[3] == 'w' &&
11070 return -KEY_shmwrite;
11076 if (name[3] == 't' &&
11082 return -KEY_shutdown;
11092 if (name[2] == 's' &&
11099 return -KEY_syswrite;
11109 if (name[1] == 'r' &&
11117 return -KEY_truncate;
11126 case 9: /* 9 tokens of length 9 */
11130 if (name[1] == 'N' &&
11139 return KEY_UNITCHECK;
11145 if (name[1] == 'n' &&
11154 return -KEY_endnetent;
11160 if (name[1] == 'e' &&
11169 return -KEY_getnetent;
11175 if (name[1] == 'o' &&
11184 return -KEY_localtime;
11190 if (name[1] == 'r' &&
11199 return KEY_prototype;
11205 if (name[1] == 'u' &&
11214 return -KEY_quotemeta;
11220 if (name[1] == 'e' &&
11229 return -KEY_rewinddir;
11235 if (name[1] == 'e' &&
11244 return -KEY_setnetent;
11250 if (name[1] == 'a' &&
11259 return -KEY_wantarray;
11268 case 10: /* 9 tokens of length 10 */
11272 if (name[1] == 'n' &&
11278 if (name[4] == 'o' &&
11285 return -KEY_endhostent;
11291 if (name[4] == 'e' &&
11298 return -KEY_endservent;
11311 if (name[1] == 'e' &&
11317 if (name[4] == 'o' &&
11324 return -KEY_gethostent;
11333 if (name[5] == 'r' &&
11339 return -KEY_getservent;
11345 if (name[5] == 'c' &&
11351 return -KEY_getsockopt;
11371 if (name[2] == 't')
11376 if (name[4] == 'o' &&
11383 return -KEY_sethostent;
11392 if (name[5] == 'r' &&
11398 return -KEY_setservent;
11404 if (name[5] == 'c' &&
11410 return -KEY_setsockopt;
11427 if (name[2] == 'c' &&
11436 return -KEY_socketpair;
11449 case 11: /* 8 tokens of length 11 */
11453 if (name[1] == '_' &&
11463 { /* __PACKAGE__ */
11464 return -KEY___PACKAGE__;
11470 if (name[1] == 'n' &&
11480 { /* endprotoent */
11481 return -KEY_endprotoent;
11487 if (name[1] == 'e' &&
11496 if (name[5] == 'e' &&
11502 { /* getpeername */
11503 return -KEY_getpeername;
11512 if (name[6] == 'o' &&
11517 { /* getpriority */
11518 return -KEY_getpriority;
11524 if (name[6] == 't' &&
11529 { /* getprotoent */
11530 return -KEY_getprotoent;
11544 if (name[4] == 'o' &&
11551 { /* getsockname */
11552 return -KEY_getsockname;
11565 if (name[1] == 'e' &&
11573 if (name[6] == 'o' &&
11578 { /* setpriority */
11579 return -KEY_setpriority;
11585 if (name[6] == 't' &&
11590 { /* setprotoent */
11591 return -KEY_setprotoent;
11607 case 12: /* 2 tokens of length 12 */
11608 if (name[0] == 'g' &&
11620 if (name[9] == 'd' &&
11623 { /* getnetbyaddr */
11624 return -KEY_getnetbyaddr;
11630 if (name[9] == 'a' &&
11633 { /* getnetbyname */
11634 return -KEY_getnetbyname;
11646 case 13: /* 4 tokens of length 13 */
11647 if (name[0] == 'g' &&
11654 if (name[4] == 'o' &&
11663 if (name[10] == 'd' &&
11666 { /* gethostbyaddr */
11667 return -KEY_gethostbyaddr;
11673 if (name[10] == 'a' &&
11676 { /* gethostbyname */
11677 return -KEY_gethostbyname;
11690 if (name[4] == 'e' &&
11699 if (name[10] == 'a' &&
11702 { /* getservbyname */
11703 return -KEY_getservbyname;
11709 if (name[10] == 'o' &&
11712 { /* getservbyport */
11713 return -KEY_getservbyport;
11732 case 14: /* 1 tokens of length 14 */
11733 if (name[0] == 'g' &&
11747 { /* getprotobyname */
11748 return -KEY_getprotobyname;
11753 case 16: /* 1 tokens of length 16 */
11754 if (name[0] == 'g' &&
11770 { /* getprotobynumber */
11771 return -KEY_getprotobynumber;
11785 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
11789 PERL_ARGS_ASSERT_CHECKCOMMA;
11791 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
11792 if (ckWARN(WARN_SYNTAX)) {
11795 for (w = s+2; *w && level; w++) {
11798 else if (*w == ')')
11801 while (isSPACE(*w))
11803 /* the list of chars below is for end of statements or
11804 * block / parens, boolean operators (&&, ||, //) and branch
11805 * constructs (or, and, if, until, unless, while, err, for).
11806 * Not a very solid hack... */
11807 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
11808 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
11809 "%s (...) interpreted as function",name);
11812 while (s < PL_bufend && isSPACE(*s))
11816 while (s < PL_bufend && isSPACE(*s))
11818 if (isIDFIRST_lazy_if(s,UTF)) {
11819 const char * const w = s++;
11820 while (isALNUM_lazy_if(s,UTF))
11822 while (s < PL_bufend && isSPACE(*s))
11826 if (keyword(w, s - w, 0))
11829 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
11830 if (gv && GvCVu(gv))
11832 Perl_croak(aTHX_ "No comma allowed after %s", what);
11837 /* Either returns sv, or mortalizes sv and returns a new SV*.
11838 Best used as sv=new_constant(..., sv, ...).
11839 If s, pv are NULL, calls subroutine with one argument,
11840 and type is used with error messages only. */
11843 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
11844 SV *sv, SV *pv, const char *type, STRLEN typelen)
11847 HV * const table = GvHV(PL_hintgv); /* ^H */
11851 const char *why1 = "", *why2 = "", *why3 = "";
11853 PERL_ARGS_ASSERT_NEW_CONSTANT;
11855 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
11858 why2 = (const char *)
11859 (strEQ(key,"charnames")
11860 ? "(possibly a missing \"use charnames ...\")"
11862 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
11863 (type ? type: "undef"), why2);
11865 /* This is convoluted and evil ("goto considered harmful")
11866 * but I do not understand the intricacies of all the different
11867 * failure modes of %^H in here. The goal here is to make
11868 * the most probable error message user-friendly. --jhi */
11873 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
11874 (type ? type: "undef"), why1, why2, why3);
11876 yyerror(SvPVX_const(msg));
11881 /* charnames doesn't work well if there have been errors found */
11882 if (PL_error_count > 0 && strEQ(key,"charnames"))
11883 return &PL_sv_undef;
11885 cvp = hv_fetch(table, key, keylen, FALSE);
11886 if (!cvp || !SvOK(*cvp)) {
11889 why3 = "} is not defined";
11892 sv_2mortal(sv); /* Parent created it permanently */
11895 pv = newSVpvn_flags(s, len, SVs_TEMP);
11897 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
11899 typesv = &PL_sv_undef;
11901 PUSHSTACKi(PERLSI_OVERLOAD);
11913 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
11917 /* Check the eval first */
11918 if (!PL_in_eval && SvTRUE(ERRSV)) {
11919 sv_catpvs(ERRSV, "Propagated");
11920 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
11922 res = SvREFCNT_inc_simple(sv);
11926 SvREFCNT_inc_simple_void(res);
11935 why1 = "Call to &{$^H{";
11937 why3 = "}} did not return a defined value";
11945 /* Returns a NUL terminated string, with the length of the string written to
11949 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
11952 register char *d = dest;
11953 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
11955 PERL_ARGS_ASSERT_SCAN_WORD;
11959 Perl_croak(aTHX_ ident_too_long);
11960 if (isALNUM(*s)) /* UTF handled below */
11962 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
11967 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
11971 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
11972 char *t = s + UTF8SKIP(s);
11974 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
11978 Perl_croak(aTHX_ ident_too_long);
11979 Copy(s, d, len, char);
11992 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
11995 char *bracket = NULL;
11997 register char *d = dest;
11998 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
12000 PERL_ARGS_ASSERT_SCAN_IDENT;
12005 while (isDIGIT(*s)) {
12007 Perl_croak(aTHX_ ident_too_long);
12014 Perl_croak(aTHX_ ident_too_long);
12015 if (isALNUM(*s)) /* UTF handled below */
12017 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
12022 else if (*s == ':' && s[1] == ':') {
12026 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
12027 char *t = s + UTF8SKIP(s);
12028 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
12030 if (d + (t - s) > e)
12031 Perl_croak(aTHX_ ident_too_long);
12032 Copy(s, d, t - s, char);
12043 if (PL_lex_state != LEX_NORMAL)
12044 PL_lex_state = LEX_INTERPENDMAYBE;
12047 if (*s == '$' && s[1] &&
12048 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
12061 if (*d == '^' && *s && isCONTROLVAR(*s)) {
12066 if (isSPACE(s[-1])) {
12068 const char ch = *s++;
12069 if (!SPACE_OR_TAB(ch)) {
12075 if (isIDFIRST_lazy_if(d,UTF)) {
12079 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
12080 end += UTF8SKIP(end);
12081 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
12082 end += UTF8SKIP(end);
12084 Copy(s, d, end - s, char);
12089 while ((isALNUM(*s) || *s == ':') && d < e)
12092 Perl_croak(aTHX_ ident_too_long);
12095 while (s < send && SPACE_OR_TAB(*s))
12097 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
12098 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
12099 const char * const brack =
12101 ((*s == '[') ? "[...]" : "{...}");
12102 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
12103 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
12104 funny, dest, brack, funny, dest, brack);
12107 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
12108 PL_lex_allbrackets++;
12112 /* Handle extended ${^Foo} variables
12113 * 1999-02-27 mjd-perl-patch@plover.com */
12114 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
12118 while (isALNUM(*s) && d < e) {
12122 Perl_croak(aTHX_ ident_too_long);
12127 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
12128 PL_lex_state = LEX_INTERPEND;
12131 if (PL_lex_state == LEX_NORMAL) {
12132 if (ckWARN(WARN_AMBIGUOUS) &&
12133 (keyword(dest, d - dest, 0)
12134 || get_cvn_flags(dest, d - dest, 0)))
12138 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
12139 "Ambiguous use of %c{%s} resolved to %c%s",
12140 funny, dest, funny, dest);
12145 s = bracket; /* let the parser handle it */
12149 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
12150 PL_lex_state = LEX_INTERPEND;
12155 S_pmflag(U32 pmfl, const char ch) {
12157 CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
12158 case GLOBAL_PAT_MOD: pmfl |= PMf_GLOBAL; break;
12159 case CONTINUE_PAT_MOD: pmfl |= PMf_CONTINUE; break;
12160 case ONCE_PAT_MOD: pmfl |= PMf_KEEP; break;
12161 case KEEPCOPY_PAT_MOD: pmfl |= RXf_PMf_KEEPCOPY; break;
12162 case NONDESTRUCT_PAT_MOD: pmfl |= PMf_NONDESTRUCT; break;
12168 S_scan_pat(pTHX_ char *start, I32 type)
12172 char *s = scan_str(start,!!PL_madskills,FALSE);
12173 const char * const valid_flags =
12174 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
12179 PERL_ARGS_ASSERT_SCAN_PAT;
12182 const char * const delimiter = skipspace(start);
12186 ? "Search pattern not terminated or ternary operator parsed as search pattern"
12187 : "Search pattern not terminated" ));
12190 pm = (PMOP*)newPMOP(type, 0);
12191 if (PL_multi_open == '?') {
12192 /* This is the only point in the code that sets PMf_ONCE: */
12193 pm->op_pmflags |= PMf_ONCE;
12195 /* Hence it's safe to do this bit of PMOP book-keeping here, which
12196 allows us to restrict the list needed by reset to just the ??
12198 assert(type != OP_TRANS);
12200 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
12203 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
12206 elements = mg->mg_len / sizeof(PMOP**);
12207 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
12208 ((PMOP**)mg->mg_ptr) [elements++] = pm;
12209 mg->mg_len = elements * sizeof(PMOP**);
12210 PmopSTASH_set(pm,PL_curstash);
12216 while (*s && strchr(valid_flags, *s))
12217 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
12220 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
12221 "Having no space between pattern and following word is deprecated");
12225 if (PL_madskills && modstart != s) {
12226 SV* tmptoken = newSVpvn(modstart, s - modstart);
12227 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
12230 /* issue a warning if /c is specified,but /g is not */
12231 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
12233 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
12234 "Use of /c modifier is meaningless without /g" );
12237 PL_lex_op = (OP*)pm;
12238 pl_yylval.ival = OP_MATCH;
12243 S_scan_subst(pTHX_ char *start)
12254 PERL_ARGS_ASSERT_SCAN_SUBST;
12256 pl_yylval.ival = OP_NULL;
12258 s = scan_str(start,!!PL_madskills,FALSE);
12261 Perl_croak(aTHX_ "Substitution pattern not terminated");
12263 if (s[-1] == PL_multi_open)
12266 if (PL_madskills) {
12267 CURMAD('q', PL_thisopen);
12268 CURMAD('_', PL_thiswhite);
12269 CURMAD('E', PL_thisstuff);
12270 CURMAD('Q', PL_thisclose);
12271 PL_realtokenstart = s - SvPVX(PL_linestr);
12275 first_start = PL_multi_start;
12276 s = scan_str(s,!!PL_madskills,FALSE);
12278 if (PL_lex_stuff) {
12279 SvREFCNT_dec(PL_lex_stuff);
12280 PL_lex_stuff = NULL;
12282 Perl_croak(aTHX_ "Substitution replacement not terminated");
12284 PL_multi_start = first_start; /* so whole substitution is taken together */
12286 pm = (PMOP*)newPMOP(OP_SUBST, 0);
12289 if (PL_madskills) {
12290 CURMAD('z', PL_thisopen);
12291 CURMAD('R', PL_thisstuff);
12292 CURMAD('Z', PL_thisclose);
12298 if (*s == EXEC_PAT_MOD) {
12302 else if (strchr(S_PAT_MODS, *s))
12303 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
12306 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
12307 "Having no space between pattern and following word is deprecated");
12315 if (PL_madskills) {
12317 curmad('m', newSVpvn(modstart, s - modstart));
12318 append_madprops(PL_thismad, (OP*)pm, 0);
12322 if ((pm->op_pmflags & PMf_CONTINUE)) {
12323 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
12327 SV * const repl = newSVpvs("");
12329 PL_sublex_info.super_bufptr = s;
12330 PL_sublex_info.super_bufend = PL_bufend;
12332 pm->op_pmflags |= PMf_EVAL;
12335 sv_catpvs(repl, "eval ");
12337 sv_catpvs(repl, "do ");
12339 sv_catpvs(repl, "{");
12340 sv_catsv(repl, PL_lex_repl);
12341 if (strchr(SvPVX(PL_lex_repl), '#'))
12342 sv_catpvs(repl, "\n");
12343 sv_catpvs(repl, "}");
12345 SvREFCNT_dec(PL_lex_repl);
12346 PL_lex_repl = repl;
12349 PL_lex_op = (OP*)pm;
12350 pl_yylval.ival = OP_SUBST;
12355 S_scan_trans(pTHX_ char *start)
12364 bool nondestruct = 0;
12369 PERL_ARGS_ASSERT_SCAN_TRANS;
12371 pl_yylval.ival = OP_NULL;
12373 s = scan_str(start,!!PL_madskills,FALSE);
12375 Perl_croak(aTHX_ "Transliteration pattern not terminated");
12377 if (s[-1] == PL_multi_open)
12380 if (PL_madskills) {
12381 CURMAD('q', PL_thisopen);
12382 CURMAD('_', PL_thiswhite);
12383 CURMAD('E', PL_thisstuff);
12384 CURMAD('Q', PL_thisclose);
12385 PL_realtokenstart = s - SvPVX(PL_linestr);
12389 s = scan_str(s,!!PL_madskills,FALSE);
12391 if (PL_lex_stuff) {
12392 SvREFCNT_dec(PL_lex_stuff);
12393 PL_lex_stuff = NULL;
12395 Perl_croak(aTHX_ "Transliteration replacement not terminated");
12397 if (PL_madskills) {
12398 CURMAD('z', PL_thisopen);
12399 CURMAD('R', PL_thisstuff);
12400 CURMAD('Z', PL_thisclose);
12403 complement = del = squash = 0;
12410 complement = OPpTRANS_COMPLEMENT;
12413 del = OPpTRANS_DELETE;
12416 squash = OPpTRANS_SQUASH;
12428 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
12429 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)tbl);
12430 o->op_private &= ~OPpTRANS_ALL;
12431 o->op_private |= del|squash|complement|
12432 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
12433 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
12436 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
12439 if (PL_madskills) {
12441 curmad('m', newSVpvn(modstart, s - modstart));
12442 append_madprops(PL_thismad, o, 0);
12451 S_scan_heredoc(pTHX_ register char *s)
12455 I32 op_type = OP_SCALAR;
12459 const char *found_newline;
12463 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
12465 I32 stuffstart = s - SvPVX(PL_linestr);
12468 PL_realtokenstart = -1;
12471 PERL_ARGS_ASSERT_SCAN_HEREDOC;
12475 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
12479 while (SPACE_OR_TAB(*peek))
12481 if (*peek == '`' || *peek == '\'' || *peek =='"') {
12484 s = delimcpy(d, e, s, PL_bufend, term, &len);
12494 if (!isALNUM_lazy_if(s,UTF))
12495 deprecate("bare << to mean <<\"\"");
12496 for (; isALNUM_lazy_if(s,UTF); s++) {
12501 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
12502 Perl_croak(aTHX_ "Delimiter for here document is too long");
12505 len = d - PL_tokenbuf;
12508 if (PL_madskills) {
12509 tstart = PL_tokenbuf + !outer;
12510 PL_thisclose = newSVpvn(tstart, len - !outer);
12511 tstart = SvPVX(PL_linestr) + stuffstart;
12512 PL_thisopen = newSVpvn(tstart, s - tstart);
12513 stuffstart = s - SvPVX(PL_linestr);
12516 #ifndef PERL_STRICT_CR
12517 d = strchr(s, '\r');
12519 char * const olds = s;
12521 while (s < PL_bufend) {
12527 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
12536 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
12543 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
12544 herewas = newSVpvn(s,PL_bufend-s);
12548 herewas = newSVpvn(s-1,found_newline-s+1);
12551 herewas = newSVpvn(s,found_newline-s);
12555 if (PL_madskills) {
12556 tstart = SvPVX(PL_linestr) + stuffstart;
12558 sv_catpvn(PL_thisstuff, tstart, s - tstart);
12560 PL_thisstuff = newSVpvn(tstart, s - tstart);
12563 s += SvCUR(herewas);
12566 stuffstart = s - SvPVX(PL_linestr);
12572 tmpstr = newSV_type(SVt_PVIV);
12573 SvGROW(tmpstr, 80);
12574 if (term == '\'') {
12575 op_type = OP_CONST;
12576 SvIV_set(tmpstr, -1);
12578 else if (term == '`') {
12579 op_type = OP_BACKTICK;
12580 SvIV_set(tmpstr, '\\');
12584 PL_multi_start = CopLINE(PL_curcop);
12585 PL_multi_open = PL_multi_close = '<';
12586 term = *PL_tokenbuf;
12587 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
12588 char * const bufptr = PL_sublex_info.super_bufptr;
12589 char * const bufend = PL_sublex_info.super_bufend;
12590 char * const olds = s - SvCUR(herewas);
12591 s = strchr(bufptr, '\n');
12595 while (s < bufend &&
12596 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
12598 CopLINE_inc(PL_curcop);
12601 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12602 missingterm(PL_tokenbuf);
12604 sv_setpvn(herewas,bufptr,d-bufptr+1);
12605 sv_setpvn(tmpstr,d+1,s-d);
12607 sv_catpvn(herewas,s,bufend-s);
12608 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
12615 while (s < PL_bufend &&
12616 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
12618 CopLINE_inc(PL_curcop);
12620 if (s >= PL_bufend) {
12621 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12622 missingterm(PL_tokenbuf);
12624 sv_setpvn(tmpstr,d+1,s-d);
12626 if (PL_madskills) {
12628 sv_catpvn(PL_thisstuff, d + 1, s - d);
12630 PL_thisstuff = newSVpvn(d + 1, s - d);
12631 stuffstart = s - SvPVX(PL_linestr);
12635 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
12637 sv_catpvn(herewas,s,PL_bufend-s);
12638 sv_setsv(PL_linestr,herewas);
12639 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
12640 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12641 PL_last_lop = PL_last_uni = NULL;
12644 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
12645 while (s >= PL_bufend) { /* multiple line string? */
12647 if (PL_madskills) {
12648 tstart = SvPVX(PL_linestr) + stuffstart;
12650 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
12652 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
12656 CopLINE_inc(PL_curcop);
12657 if (!outer || !lex_next_chunk(0)) {
12658 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
12659 missingterm(PL_tokenbuf);
12661 CopLINE_dec(PL_curcop);
12664 stuffstart = s - SvPVX(PL_linestr);
12666 CopLINE_inc(PL_curcop);
12667 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12668 PL_last_lop = PL_last_uni = NULL;
12669 #ifndef PERL_STRICT_CR
12670 if (PL_bufend - PL_linestart >= 2) {
12671 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
12672 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
12674 PL_bufend[-2] = '\n';
12676 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
12678 else if (PL_bufend[-1] == '\r')
12679 PL_bufend[-1] = '\n';
12681 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
12682 PL_bufend[-1] = '\n';
12684 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
12685 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
12686 *(SvPVX(PL_linestr) + off ) = ' ';
12687 sv_catsv(PL_linestr,herewas);
12688 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
12689 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
12693 sv_catsv(tmpstr,PL_linestr);
12698 PL_multi_end = CopLINE(PL_curcop);
12699 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
12700 SvPV_shrink_to_cur(tmpstr);
12702 SvREFCNT_dec(herewas);
12704 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
12706 else if (PL_encoding)
12707 sv_recode_to_utf8(tmpstr, PL_encoding);
12709 PL_lex_stuff = tmpstr;
12710 pl_yylval.ival = op_type;
12714 /* scan_inputsymbol
12715 takes: current position in input buffer
12716 returns: new position in input buffer
12717 side-effects: pl_yylval and lex_op are set.
12722 <FH> read from filehandle
12723 <pkg::FH> read from package qualified filehandle
12724 <pkg'FH> read from package qualified filehandle
12725 <$fh> read from filehandle in $fh
12726 <*.h> filename glob
12731 S_scan_inputsymbol(pTHX_ char *start)
12734 register char *s = start; /* current position in buffer */
12737 char *d = PL_tokenbuf; /* start of temp holding space */
12738 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
12740 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
12742 end = strchr(s, '\n');
12745 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
12747 /* die if we didn't have space for the contents of the <>,
12748 or if it didn't end, or if we see a newline
12751 if (len >= (I32)sizeof PL_tokenbuf)
12752 Perl_croak(aTHX_ "Excessively long <> operator");
12754 Perl_croak(aTHX_ "Unterminated <> operator");
12759 Remember, only scalar variables are interpreted as filehandles by
12760 this code. Anything more complex (e.g., <$fh{$num}>) will be
12761 treated as a glob() call.
12762 This code makes use of the fact that except for the $ at the front,
12763 a scalar variable and a filehandle look the same.
12765 if (*d == '$' && d[1]) d++;
12767 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
12768 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
12771 /* If we've tried to read what we allow filehandles to look like, and
12772 there's still text left, then it must be a glob() and not a getline.
12773 Use scan_str to pull out the stuff between the <> and treat it
12774 as nothing more than a string.
12777 if (d - PL_tokenbuf != len) {
12778 pl_yylval.ival = OP_GLOB;
12779 s = scan_str(start,!!PL_madskills,FALSE);
12781 Perl_croak(aTHX_ "Glob not terminated");
12785 bool readline_overriden = FALSE;
12788 /* we're in a filehandle read situation */
12791 /* turn <> into <ARGV> */
12793 Copy("ARGV",d,5,char);
12795 /* Check whether readline() is overriden */
12796 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
12798 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
12800 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
12801 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
12802 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
12803 readline_overriden = TRUE;
12805 /* if <$fh>, create the ops to turn the variable into a
12809 /* try to find it in the pad for this block, otherwise find
12810 add symbol table ops
12812 const PADOFFSET tmp = pad_findmy(d, len, 0);
12813 if (tmp != NOT_IN_PAD) {
12814 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
12815 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
12816 HEK * const stashname = HvNAME_HEK(stash);
12817 SV * const sym = sv_2mortal(newSVhek(stashname));
12818 sv_catpvs(sym, "::");
12819 sv_catpv(sym, d+1);
12824 OP * const o = newOP(OP_PADSV, 0);
12826 PL_lex_op = readline_overriden
12827 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12828 op_append_elem(OP_LIST, o,
12829 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
12830 : (OP*)newUNOP(OP_READLINE, 0, o);
12839 ? (GV_ADDMULTI | GV_ADDINEVAL)
12842 PL_lex_op = readline_overriden
12843 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12844 op_append_elem(OP_LIST,
12845 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
12846 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12847 : (OP*)newUNOP(OP_READLINE, 0,
12848 newUNOP(OP_RV2SV, 0,
12849 newGVOP(OP_GV, 0, gv)));
12851 if (!readline_overriden)
12852 PL_lex_op->op_flags |= OPf_SPECIAL;
12853 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
12854 pl_yylval.ival = OP_NULL;
12857 /* If it's none of the above, it must be a literal filehandle
12858 (<Foo::BAR> or <FOO>) so build a simple readline OP */
12860 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
12861 PL_lex_op = readline_overriden
12862 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12863 op_append_elem(OP_LIST,
12864 newGVOP(OP_GV, 0, gv),
12865 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12866 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
12867 pl_yylval.ival = OP_NULL;
12876 takes: start position in buffer
12877 keep_quoted preserve \ on the embedded delimiter(s)
12878 keep_delims preserve the delimiters around the string
12879 returns: position to continue reading from buffer
12880 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
12881 updates the read buffer.
12883 This subroutine pulls a string out of the input. It is called for:
12884 q single quotes q(literal text)
12885 ' single quotes 'literal text'
12886 qq double quotes qq(interpolate $here please)
12887 " double quotes "interpolate $here please"
12888 qx backticks qx(/bin/ls -l)
12889 ` backticks `/bin/ls -l`
12890 qw quote words @EXPORT_OK = qw( func() $spam )
12891 m// regexp match m/this/
12892 s/// regexp substitute s/this/that/
12893 tr/// string transliterate tr/this/that/
12894 y/// string transliterate y/this/that/
12895 ($*@) sub prototypes sub foo ($)
12896 (stuff) sub attr parameters sub foo : attr(stuff)
12897 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
12899 In most of these cases (all but <>, patterns and transliterate)
12900 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
12901 calls scan_str(). s/// makes yylex() call scan_subst() which calls
12902 scan_str(). tr/// and y/// make yylex() call scan_trans() which
12905 It skips whitespace before the string starts, and treats the first
12906 character as the delimiter. If the delimiter is one of ([{< then
12907 the corresponding "close" character )]}> is used as the closing
12908 delimiter. It allows quoting of delimiters, and if the string has
12909 balanced delimiters ([{<>}]) it allows nesting.
12911 On success, the SV with the resulting string is put into lex_stuff or,
12912 if that is already non-NULL, into lex_repl. The second case occurs only
12913 when parsing the RHS of the special constructs s/// and tr/// (y///).
12914 For convenience, the terminating delimiter character is stuffed into
12919 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
12922 SV *sv; /* scalar value: string */
12923 const char *tmps; /* temp string, used for delimiter matching */
12924 register char *s = start; /* current position in the buffer */
12925 register char term; /* terminating character */
12926 register char *to; /* current position in the sv's data */
12927 I32 brackets = 1; /* bracket nesting level */
12928 bool has_utf8 = FALSE; /* is there any utf8 content? */
12929 I32 termcode; /* terminating char. code */
12930 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
12931 STRLEN termlen; /* length of terminating string */
12932 int last_off = 0; /* last position for nesting bracket */
12938 PERL_ARGS_ASSERT_SCAN_STR;
12940 /* skip space before the delimiter */
12946 if (PL_realtokenstart >= 0) {
12947 stuffstart = PL_realtokenstart;
12948 PL_realtokenstart = -1;
12951 stuffstart = start - SvPVX(PL_linestr);
12953 /* mark where we are, in case we need to report errors */
12956 /* after skipping whitespace, the next character is the terminator */
12959 termcode = termstr[0] = term;
12963 termcode = utf8_to_uvchr((U8*)s, &termlen);
12964 Copy(s, termstr, termlen, U8);
12965 if (!UTF8_IS_INVARIANT(term))
12969 /* mark where we are */
12970 PL_multi_start = CopLINE(PL_curcop);
12971 PL_multi_open = term;
12973 /* find corresponding closing delimiter */
12974 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
12975 termcode = termstr[0] = term = tmps[5];
12977 PL_multi_close = term;
12979 /* create a new SV to hold the contents. 79 is the SV's initial length.
12980 What a random number. */
12981 sv = newSV_type(SVt_PVIV);
12983 SvIV_set(sv, termcode);
12984 (void)SvPOK_only(sv); /* validate pointer */
12986 /* move past delimiter and try to read a complete string */
12988 sv_catpvn(sv, s, termlen);
12991 tstart = SvPVX(PL_linestr) + stuffstart;
12992 if (!PL_thisopen && !keep_delims) {
12993 PL_thisopen = newSVpvn(tstart, s - tstart);
12994 stuffstart = s - SvPVX(PL_linestr);
12998 if (PL_encoding && !UTF) {
13002 int offset = s - SvPVX_const(PL_linestr);
13003 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
13004 &offset, (char*)termstr, termlen);
13005 const char * const ns = SvPVX_const(PL_linestr) + offset;
13006 char * const svlast = SvEND(sv) - 1;
13008 for (; s < ns; s++) {
13009 if (*s == '\n' && !PL_rsfp)
13010 CopLINE_inc(PL_curcop);
13013 goto read_more_line;
13015 /* handle quoted delimiters */
13016 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
13018 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
13020 if ((svlast-1 - t) % 2) {
13021 if (!keep_quoted) {
13022 *(svlast-1) = term;
13024 SvCUR_set(sv, SvCUR(sv) - 1);
13029 if (PL_multi_open == PL_multi_close) {
13035 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
13036 /* At here, all closes are "was quoted" one,
13037 so we don't check PL_multi_close. */
13039 if (!keep_quoted && *(t+1) == PL_multi_open)
13044 else if (*t == PL_multi_open)
13052 SvCUR_set(sv, w - SvPVX_const(sv));
13054 last_off = w - SvPVX(sv);
13055 if (--brackets <= 0)
13060 if (!keep_delims) {
13061 SvCUR_set(sv, SvCUR(sv) - 1);
13067 /* extend sv if need be */
13068 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
13069 /* set 'to' to the next character in the sv's string */
13070 to = SvPVX(sv)+SvCUR(sv);
13072 /* if open delimiter is the close delimiter read unbridle */
13073 if (PL_multi_open == PL_multi_close) {
13074 for (; s < PL_bufend; s++,to++) {
13075 /* embedded newlines increment the current line number */
13076 if (*s == '\n' && !PL_rsfp)
13077 CopLINE_inc(PL_curcop);
13078 /* handle quoted delimiters */
13079 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
13080 if (!keep_quoted && s[1] == term)
13082 /* any other quotes are simply copied straight through */
13086 /* terminate when run out of buffer (the for() condition), or
13087 have found the terminator */
13088 else if (*s == term) {
13091 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
13094 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
13100 /* if the terminator isn't the same as the start character (e.g.,
13101 matched brackets), we have to allow more in the quoting, and
13102 be prepared for nested brackets.
13105 /* read until we run out of string, or we find the terminator */
13106 for (; s < PL_bufend; s++,to++) {
13107 /* embedded newlines increment the line count */
13108 if (*s == '\n' && !PL_rsfp)
13109 CopLINE_inc(PL_curcop);
13110 /* backslashes can escape the open or closing characters */
13111 if (*s == '\\' && s+1 < PL_bufend) {
13112 if (!keep_quoted &&
13113 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
13118 /* allow nested opens and closes */
13119 else if (*s == PL_multi_close && --brackets <= 0)
13121 else if (*s == PL_multi_open)
13123 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
13128 /* terminate the copied string and update the sv's end-of-string */
13130 SvCUR_set(sv, to - SvPVX_const(sv));
13133 * this next chunk reads more into the buffer if we're not done yet
13137 break; /* handle case where we are done yet :-) */
13139 #ifndef PERL_STRICT_CR
13140 if (to - SvPVX_const(sv) >= 2) {
13141 if ((to[-2] == '\r' && to[-1] == '\n') ||
13142 (to[-2] == '\n' && to[-1] == '\r'))
13146 SvCUR_set(sv, to - SvPVX_const(sv));
13148 else if (to[-1] == '\r')
13151 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
13156 /* if we're out of file, or a read fails, bail and reset the current
13157 line marker so we can report where the unterminated string began
13160 if (PL_madskills) {
13161 char * const tstart = SvPVX(PL_linestr) + stuffstart;
13163 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
13165 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
13168 CopLINE_inc(PL_curcop);
13169 PL_bufptr = PL_bufend;
13170 if (!lex_next_chunk(0)) {
13172 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
13181 /* at this point, we have successfully read the delimited string */
13183 if (!PL_encoding || UTF) {
13185 if (PL_madskills) {
13186 char * const tstart = SvPVX(PL_linestr) + stuffstart;
13187 const int len = s - tstart;
13189 sv_catpvn(PL_thisstuff, tstart, len);
13191 PL_thisstuff = newSVpvn(tstart, len);
13192 if (!PL_thisclose && !keep_delims)
13193 PL_thisclose = newSVpvn(s,termlen);
13198 sv_catpvn(sv, s, termlen);
13203 if (PL_madskills) {
13204 char * const tstart = SvPVX(PL_linestr) + stuffstart;
13205 const int len = s - tstart - termlen;
13207 sv_catpvn(PL_thisstuff, tstart, len);
13209 PL_thisstuff = newSVpvn(tstart, len);
13210 if (!PL_thisclose && !keep_delims)
13211 PL_thisclose = newSVpvn(s - termlen,termlen);
13215 if (has_utf8 || PL_encoding)
13218 PL_multi_end = CopLINE(PL_curcop);
13220 /* if we allocated too much space, give some back */
13221 if (SvCUR(sv) + 5 < SvLEN(sv)) {
13222 SvLEN_set(sv, SvCUR(sv) + 1);
13223 SvPV_renew(sv, SvLEN(sv));
13226 /* decide whether this is the first or second quoted string we've read
13239 takes: pointer to position in buffer
13240 returns: pointer to new position in buffer
13241 side-effects: builds ops for the constant in pl_yylval.op
13243 Read a number in any of the formats that Perl accepts:
13245 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
13246 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
13249 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
13251 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
13254 If it reads a number without a decimal point or an exponent, it will
13255 try converting the number to an integer and see if it can do so
13256 without loss of precision.
13260 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
13263 register const char *s = start; /* current position in buffer */
13264 register char *d; /* destination in temp buffer */
13265 register char *e; /* end of temp buffer */
13266 NV nv; /* number read, as a double */
13267 SV *sv = NULL; /* place to put the converted number */
13268 bool floatit; /* boolean: int or float? */
13269 const char *lastub = NULL; /* position of last underbar */
13270 static char const number_too_long[] = "Number too long";
13272 PERL_ARGS_ASSERT_SCAN_NUM;
13274 /* We use the first character to decide what type of number this is */
13278 Perl_croak(aTHX_ "panic: scan_num");
13280 /* if it starts with a 0, it could be an octal number, a decimal in
13281 0.13 disguise, or a hexadecimal number, or a binary number. */
13285 u holds the "number so far"
13286 shift the power of 2 of the base
13287 (hex == 4, octal == 3, binary == 1)
13288 overflowed was the number more than we can hold?
13290 Shift is used when we add a digit. It also serves as an "are
13291 we in octal/hex/binary?" indicator to disallow hex characters
13292 when in octal mode.
13297 bool overflowed = FALSE;
13298 bool just_zero = TRUE; /* just plain 0 or binary number? */
13299 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
13300 static const char* const bases[5] =
13301 { "", "binary", "", "octal", "hexadecimal" };
13302 static const char* const Bases[5] =
13303 { "", "Binary", "", "Octal", "Hexadecimal" };
13304 static const char* const maxima[5] =
13306 "0b11111111111111111111111111111111",
13310 const char *base, *Base, *max;
13312 /* check for hex */
13313 if (s[1] == 'x' || s[1] == 'X') {
13317 } else if (s[1] == 'b' || s[1] == 'B') {
13322 /* check for a decimal in disguise */
13323 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
13325 /* so it must be octal */
13332 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13333 "Misplaced _ in number");
13337 base = bases[shift];
13338 Base = Bases[shift];
13339 max = maxima[shift];
13341 /* read the rest of the number */
13343 /* x is used in the overflow test,
13344 b is the digit we're adding on. */
13349 /* if we don't mention it, we're done */
13353 /* _ are ignored -- but warned about if consecutive */
13355 if (lastub && s == lastub + 1)
13356 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13357 "Misplaced _ in number");
13361 /* 8 and 9 are not octal */
13362 case '8': case '9':
13364 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
13368 case '2': case '3': case '4':
13369 case '5': case '6': case '7':
13371 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
13374 case '0': case '1':
13375 b = *s++ & 15; /* ASCII digit -> value of digit */
13379 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
13380 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
13381 /* make sure they said 0x */
13384 b = (*s++ & 7) + 9;
13386 /* Prepare to put the digit we have onto the end
13387 of the number so far. We check for overflows.
13393 x = u << shift; /* make room for the digit */
13395 if ((x >> shift) != u
13396 && !(PL_hints & HINT_NEW_BINARY)) {
13399 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13400 "Integer overflow in %s number",
13403 u = x | b; /* add the digit to the end */
13406 n *= nvshift[shift];
13407 /* If an NV has not enough bits in its
13408 * mantissa to represent an UV this summing of
13409 * small low-order numbers is a waste of time
13410 * (because the NV cannot preserve the
13411 * low-order bits anyway): we could just
13412 * remember when did we overflow and in the
13413 * end just multiply n by the right
13421 /* if we get here, we had success: make a scalar value from
13426 /* final misplaced underbar check */
13427 if (s[-1] == '_') {
13428 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
13432 if (n > 4294967295.0)
13433 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13434 "%s number > %s non-portable",
13440 if (u > 0xffffffff)
13441 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13442 "%s number > %s non-portable",
13447 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
13448 sv = new_constant(start, s - start, "integer",
13449 sv, NULL, NULL, 0);
13450 else if (PL_hints & HINT_NEW_BINARY)
13451 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
13456 handle decimal numbers.
13457 we're also sent here when we read a 0 as the first digit
13459 case '1': case '2': case '3': case '4': case '5':
13460 case '6': case '7': case '8': case '9': case '.':
13463 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
13466 /* read next group of digits and _ and copy into d */
13467 while (isDIGIT(*s) || *s == '_') {
13468 /* skip underscores, checking for misplaced ones
13472 if (lastub && s == lastub + 1)
13473 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13474 "Misplaced _ in number");
13478 /* check for end of fixed-length buffer */
13480 Perl_croak(aTHX_ number_too_long);
13481 /* if we're ok, copy the character */
13486 /* final misplaced underbar check */
13487 if (lastub && s == lastub + 1) {
13488 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
13491 /* read a decimal portion if there is one. avoid
13492 3..5 being interpreted as the number 3. followed
13495 if (*s == '.' && s[1] != '.') {
13500 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13501 "Misplaced _ in number");
13505 /* copy, ignoring underbars, until we run out of digits.
13507 for (; isDIGIT(*s) || *s == '_'; s++) {
13508 /* fixed length buffer check */
13510 Perl_croak(aTHX_ number_too_long);
13512 if (lastub && s == lastub + 1)
13513 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13514 "Misplaced _ in number");
13520 /* fractional part ending in underbar? */
13521 if (s[-1] == '_') {
13522 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13523 "Misplaced _ in number");
13525 if (*s == '.' && isDIGIT(s[1])) {
13526 /* oops, it's really a v-string, but without the "v" */
13532 /* read exponent part, if present */
13533 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
13537 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
13538 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
13540 /* stray preinitial _ */
13542 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13543 "Misplaced _ in number");
13547 /* allow positive or negative exponent */
13548 if (*s == '+' || *s == '-')
13551 /* stray initial _ */
13553 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13554 "Misplaced _ in number");
13558 /* read digits of exponent */
13559 while (isDIGIT(*s) || *s == '_') {
13562 Perl_croak(aTHX_ number_too_long);
13566 if (((lastub && s == lastub + 1) ||
13567 (!isDIGIT(s[1]) && s[1] != '_')))
13568 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13569 "Misplaced _ in number");
13577 We try to do an integer conversion first if no characters
13578 indicating "float" have been found.
13583 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
13585 if (flags == IS_NUMBER_IN_UV) {
13587 sv = newSViv(uv); /* Prefer IVs over UVs. */
13590 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
13591 if (uv <= (UV) IV_MIN)
13592 sv = newSViv(-(IV)uv);
13599 /* terminate the string */
13601 nv = Atof(PL_tokenbuf);
13606 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
13607 const char *const key = floatit ? "float" : "integer";
13608 const STRLEN keylen = floatit ? 5 : 7;
13609 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
13610 key, keylen, sv, NULL, NULL, 0);
13614 /* if it starts with a v, it could be a v-string */
13617 sv = newSV(5); /* preallocate storage space */
13618 s = scan_vstring(s, PL_bufend, sv);
13622 /* make the op for the constant and return */
13625 lvalp->opval = newSVOP(OP_CONST, 0, sv);
13627 lvalp->opval = NULL;
13633 S_scan_formline(pTHX_ register char *s)
13636 register char *eol;
13638 SV * const stuff = newSVpvs("");
13639 bool needargs = FALSE;
13640 bool eofmt = FALSE;
13642 char *tokenstart = s;
13643 SV* savewhite = NULL;
13645 if (PL_madskills) {
13646 savewhite = PL_thiswhite;
13651 PERL_ARGS_ASSERT_SCAN_FORMLINE;
13653 while (!needargs) {
13656 #ifdef PERL_STRICT_CR
13657 while (SPACE_OR_TAB(*t))
13660 while (SPACE_OR_TAB(*t) || *t == '\r')
13663 if (*t == '\n' || t == PL_bufend) {
13668 if (PL_in_eval && !PL_rsfp) {
13669 eol = (char *) memchr(s,'\n',PL_bufend-s);
13674 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
13676 for (t = s; t < eol; t++) {
13677 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
13679 goto enough; /* ~~ must be first line in formline */
13681 if (*t == '@' || *t == '^')
13685 sv_catpvn(stuff, s, eol-s);
13686 #ifndef PERL_STRICT_CR
13687 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
13688 char *end = SvPVX(stuff) + SvCUR(stuff);
13691 SvCUR_set(stuff, SvCUR(stuff) - 1);
13702 if (PL_madskills) {
13704 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
13706 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
13709 PL_bufptr = PL_bufend;
13710 CopLINE_inc(PL_curcop);
13711 got_some = lex_next_chunk(0);
13712 CopLINE_dec(PL_curcop);
13715 tokenstart = PL_bufptr;
13723 if (SvCUR(stuff)) {
13726 PL_lex_state = LEX_NORMAL;
13727 start_force(PL_curforce);
13728 NEXTVAL_NEXTTOKE.ival = 0;
13732 PL_lex_state = LEX_FORMLINE;
13734 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
13736 else if (PL_encoding)
13737 sv_recode_to_utf8(stuff, PL_encoding);
13739 start_force(PL_curforce);
13740 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
13742 start_force(PL_curforce);
13743 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
13747 SvREFCNT_dec(stuff);
13749 PL_lex_formbrack = 0;
13753 if (PL_madskills) {
13755 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
13757 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
13758 PL_thiswhite = savewhite;
13765 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
13768 const I32 oldsavestack_ix = PL_savestack_ix;
13769 CV* const outsidecv = PL_compcv;
13772 assert(SvTYPE(PL_compcv) == SVt_PVCV);
13774 SAVEI32(PL_subline);
13775 save_item(PL_subname);
13776 SAVESPTR(PL_compcv);
13778 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
13779 CvFLAGS(PL_compcv) |= flags;
13781 PL_subline = CopLINE(PL_curcop);
13782 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
13783 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
13784 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
13786 return oldsavestack_ix;
13790 #pragma segment Perl_yylex
13793 S_yywarn(pTHX_ const char *const s)
13797 PERL_ARGS_ASSERT_YYWARN;
13799 PL_in_eval |= EVAL_WARNONLY;
13801 PL_in_eval &= ~EVAL_WARNONLY;
13806 Perl_yyerror(pTHX_ const char *const s)
13809 const char *where = NULL;
13810 const char *context = NULL;
13813 int yychar = PL_parser->yychar;
13815 PERL_ARGS_ASSERT_YYERROR;
13817 if (!yychar || (yychar == ';' && !PL_rsfp))
13819 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
13820 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
13821 PL_oldbufptr != PL_bufptr) {
13824 The code below is removed for NetWare because it abends/crashes on NetWare
13825 when the script has error such as not having the closing quotes like:
13826 if ($var eq "value)
13827 Checking of white spaces is anyway done in NetWare code.
13830 while (isSPACE(*PL_oldoldbufptr))
13833 context = PL_oldoldbufptr;
13834 contlen = PL_bufptr - PL_oldoldbufptr;
13836 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
13837 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
13840 The code below is removed for NetWare because it abends/crashes on NetWare
13841 when the script has error such as not having the closing quotes like:
13842 if ($var eq "value)
13843 Checking of white spaces is anyway done in NetWare code.
13846 while (isSPACE(*PL_oldbufptr))
13849 context = PL_oldbufptr;
13850 contlen = PL_bufptr - PL_oldbufptr;
13852 else if (yychar > 255)
13853 where = "next token ???";
13854 else if (yychar == -2) { /* YYEMPTY */
13855 if (PL_lex_state == LEX_NORMAL ||
13856 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
13857 where = "at end of line";
13858 else if (PL_lex_inpat)
13859 where = "within pattern";
13861 where = "within string";
13864 SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
13866 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
13867 else if (isPRINT_LC(yychar)) {
13868 const char string = yychar;
13869 sv_catpvn(where_sv, &string, 1);
13872 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
13873 where = SvPVX_const(where_sv);
13875 msg = sv_2mortal(newSVpv(s, 0));
13876 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
13877 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
13879 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
13881 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
13882 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
13883 Perl_sv_catpvf(aTHX_ msg,
13884 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
13885 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
13888 if (PL_in_eval & EVAL_WARNONLY) {
13889 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
13893 if (PL_error_count >= 10) {
13894 if (PL_in_eval && SvCUR(ERRSV))
13895 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
13896 SVfARG(ERRSV), OutCopFILE(PL_curcop));
13898 Perl_croak(aTHX_ "%s has too many errors.\n",
13899 OutCopFILE(PL_curcop));
13902 PL_in_my_stash = NULL;
13906 #pragma segment Main
13910 S_swallow_bom(pTHX_ U8 *s)
13913 const STRLEN slen = SvCUR(PL_linestr);
13915 PERL_ARGS_ASSERT_SWALLOW_BOM;
13919 if (s[1] == 0xFE) {
13920 /* UTF-16 little-endian? (or UTF-32LE?) */
13921 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
13922 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
13923 #ifndef PERL_NO_UTF16_FILTER
13924 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
13926 if (PL_bufend > (char*)s) {
13927 s = add_utf16_textfilter(s, TRUE);
13930 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13935 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
13936 #ifndef PERL_NO_UTF16_FILTER
13937 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
13939 if (PL_bufend > (char *)s) {
13940 s = add_utf16_textfilter(s, FALSE);
13943 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13948 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
13949 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13950 s += 3; /* UTF-8 */
13956 if (s[2] == 0xFE && s[3] == 0xFF) {
13957 /* UTF-32 big-endian */
13958 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
13961 else if (s[2] == 0 && s[3] != 0) {
13964 * are a good indicator of UTF-16BE. */
13965 #ifndef PERL_NO_UTF16_FILTER
13966 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
13967 s = add_utf16_textfilter(s, FALSE);
13969 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13975 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
13976 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13977 s += 4; /* UTF-8 */
13983 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13986 * are a good indicator of UTF-16LE. */
13987 #ifndef PERL_NO_UTF16_FILTER
13988 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
13989 s = add_utf16_textfilter(s, TRUE);
13991 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13999 #ifndef PERL_NO_UTF16_FILTER
14001 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
14004 SV *const filter = FILTER_DATA(idx);
14005 /* We re-use this each time round, throwing the contents away before we
14007 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
14008 SV *const utf8_buffer = filter;
14009 IV status = IoPAGE(filter);
14010 const bool reverse = cBOOL(IoLINES(filter));
14013 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
14015 /* As we're automatically added, at the lowest level, and hence only called
14016 from this file, we can be sure that we're not called in block mode. Hence
14017 don't bother writing code to deal with block mode. */
14019 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
14022 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
14024 DEBUG_P(PerlIO_printf(Perl_debug_log,
14025 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
14026 FPTR2DPTR(void *, S_utf16_textfilter),
14027 reverse ? 'l' : 'b', idx, maxlen, status,
14028 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
14035 /* First, look in our buffer of existing UTF-8 data: */
14036 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
14040 } else if (status == 0) {
14042 IoPAGE(filter) = 0;
14043 nl = SvEND(utf8_buffer);
14046 STRLEN got = nl - SvPVX(utf8_buffer);
14047 /* Did we have anything to append? */
14049 sv_catpvn(sv, SvPVX(utf8_buffer), got);
14050 /* Everything else in this code works just fine if SVp_POK isn't
14051 set. This, however, needs it, and we need it to work, else
14052 we loop infinitely because the buffer is never consumed. */
14053 sv_chop(utf8_buffer, nl);
14057 /* OK, not a complete line there, so need to read some more UTF-16.
14058 Read an extra octect if the buffer currently has an odd number. */
14062 if (SvCUR(utf16_buffer) >= 2) {
14063 /* Location of the high octet of the last complete code point.
14064 Gosh, UTF-16 is a pain. All the benefits of variable length,
14065 *coupled* with all the benefits of partial reads and
14067 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
14068 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
14070 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
14074 /* We have the first half of a surrogate. Read more. */
14075 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
14078 status = FILTER_READ(idx + 1, utf16_buffer,
14079 160 + (SvCUR(utf16_buffer) & 1));
14080 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
14081 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
14084 IoPAGE(filter) = status;
14089 chars = SvCUR(utf16_buffer) >> 1;
14090 have = SvCUR(utf8_buffer);
14091 SvGROW(utf8_buffer, have + chars * 3 + 1);
14094 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
14095 (U8*)SvPVX_const(utf8_buffer) + have,
14096 chars * 2, &newlen);
14098 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
14099 (U8*)SvPVX_const(utf8_buffer) + have,
14100 chars * 2, &newlen);
14102 SvCUR_set(utf8_buffer, have + newlen);
14105 /* No need to keep this SV "well-formed" with a '\0' after the end, as
14106 it's private to us, and utf16_to_utf8{,reversed} take a
14107 (pointer,length) pair, rather than a NUL-terminated string. */
14108 if(SvCUR(utf16_buffer) & 1) {
14109 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
14110 SvCUR_set(utf16_buffer, 1);
14112 SvCUR_set(utf16_buffer, 0);
14115 DEBUG_P(PerlIO_printf(Perl_debug_log,
14116 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
14118 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
14119 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
14124 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
14126 SV *filter = filter_add(S_utf16_textfilter, NULL);
14128 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
14130 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
14131 sv_setpvs(filter, "");
14132 IoLINES(filter) = reversed;
14133 IoPAGE(filter) = 1; /* Not EOF */
14135 /* Sadly, we have to return a valid pointer, come what may, so we have to
14136 ignore any error return from this. */
14137 SvCUR_set(PL_linestr, 0);
14138 if (FILTER_READ(0, PL_linestr, 0)) {
14139 SvUTF8_on(PL_linestr);
14141 SvUTF8_on(PL_linestr);
14143 PL_bufend = SvEND(PL_linestr);
14144 return (U8*)SvPVX(PL_linestr);
14149 Returns a pointer to the next character after the parsed
14150 vstring, as well as updating the passed in sv.
14152 Function must be called like
14155 s = scan_vstring(s,e,sv);
14157 where s and e are the start and end of the string.
14158 The sv should already be large enough to store the vstring
14159 passed in, for performance reasons.
14164 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
14167 const char *pos = s;
14168 const char *start = s;
14170 PERL_ARGS_ASSERT_SCAN_VSTRING;
14172 if (*pos == 'v') pos++; /* get past 'v' */
14173 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
14175 if ( *pos != '.') {
14176 /* this may not be a v-string if followed by => */
14177 const char *next = pos;
14178 while (next < e && isSPACE(*next))
14180 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
14181 /* return string not v-string */
14182 sv_setpvn(sv,(char *)s,pos-s);
14183 return (char *)pos;
14187 if (!isALPHA(*pos)) {
14188 U8 tmpbuf[UTF8_MAXBYTES+1];
14191 s++; /* get past 'v' */
14196 /* this is atoi() that tolerates underscores */
14199 const char *end = pos;
14201 while (--end >= s) {
14203 const UV orev = rev;
14204 rev += (*end - '0') * mult;
14207 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
14208 "Integer overflow in decimal number");
14212 if (rev > 0x7FFFFFFF)
14213 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
14215 /* Append native character for the rev point */
14216 tmpend = uvchr_to_utf8(tmpbuf, rev);
14217 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
14218 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
14220 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
14226 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
14230 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
14237 Perl_keyword_plugin_standard(pTHX_
14238 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
14240 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
14241 PERL_UNUSED_CONTEXT;
14242 PERL_UNUSED_ARG(keyword_ptr);
14243 PERL_UNUSED_ARG(keyword_len);
14244 PERL_UNUSED_ARG(op_ptr);
14245 return KEYWORD_PLUGIN_DECLINE;
14248 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
14250 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
14252 SAVEI32(PL_lex_brackets);
14253 if (PL_lex_brackets > 100)
14254 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
14255 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
14256 SAVEI32(PL_lex_allbrackets);
14257 PL_lex_allbrackets = 0;
14258 SAVEI8(PL_lex_fakeeof);
14259 PL_lex_fakeeof = (U8)fakeeof;
14260 if(yyparse(gramtype) && !PL_parser->error_count)
14261 qerror(Perl_mess(aTHX_ "Parse error"));
14264 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
14266 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
14270 SAVEVPTR(PL_eval_root);
14271 PL_eval_root = NULL;
14272 parse_recdescent(gramtype, fakeeof);
14278 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
14280 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
14283 if (flags & ~PARSE_OPTIONAL)
14284 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
14285 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
14286 if (!exprop && !(flags & PARSE_OPTIONAL)) {
14287 if (!PL_parser->error_count)
14288 qerror(Perl_mess(aTHX_ "Parse error"));
14289 exprop = newOP(OP_NULL, 0);
14295 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
14297 Parse a Perl arithmetic expression. This may contain operators of precedence
14298 down to the bit shift operators. The expression must be followed (and thus
14299 terminated) either by a comparison or lower-precedence operator or by
14300 something that would normally terminate an expression such as semicolon.
14301 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
14302 otherwise it is mandatory. It is up to the caller to ensure that the
14303 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
14304 the source of the code to be parsed and the lexical context for the
14307 The op tree representing the expression is returned. If an optional
14308 expression is absent, a null pointer is returned, otherwise the pointer
14311 If an error occurs in parsing or compilation, in most cases a valid op
14312 tree is returned anyway. The error is reflected in the parser state,
14313 normally resulting in a single exception at the top level of parsing
14314 which covers all the compilation errors that occurred. Some compilation
14315 errors, however, will throw an exception immediately.
14321 Perl_parse_arithexpr(pTHX_ U32 flags)
14323 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
14327 =for apidoc Amx|OP *|parse_termexpr|U32 flags
14329 Parse a Perl term expression. This may contain operators of precedence
14330 down to the assignment operators. The expression must be followed (and thus
14331 terminated) either by a comma or lower-precedence operator or by
14332 something that would normally terminate an expression such as semicolon.
14333 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
14334 otherwise it is mandatory. It is up to the caller to ensure that the
14335 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
14336 the source of the code to be parsed and the lexical context for the
14339 The op tree representing the expression is returned. If an optional
14340 expression is absent, a null pointer is returned, otherwise the pointer
14343 If an error occurs in parsing or compilation, in most cases a valid op
14344 tree is returned anyway. The error is reflected in the parser state,
14345 normally resulting in a single exception at the top level of parsing
14346 which covers all the compilation errors that occurred. Some compilation
14347 errors, however, will throw an exception immediately.
14353 Perl_parse_termexpr(pTHX_ U32 flags)
14355 return parse_expr(LEX_FAKEEOF_COMMA, flags);
14359 =for apidoc Amx|OP *|parse_listexpr|U32 flags
14361 Parse a Perl list expression. This may contain operators of precedence
14362 down to the comma operator. The expression must be followed (and thus
14363 terminated) either by a low-precedence logic operator such as C<or> or by
14364 something that would normally terminate an expression such as semicolon.
14365 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
14366 otherwise it is mandatory. It is up to the caller to ensure that the
14367 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
14368 the source of the code to be parsed and the lexical context for the
14371 The op tree representing the expression is returned. If an optional
14372 expression is absent, a null pointer is returned, otherwise the pointer
14375 If an error occurs in parsing or compilation, in most cases a valid op
14376 tree is returned anyway. The error is reflected in the parser state,
14377 normally resulting in a single exception at the top level of parsing
14378 which covers all the compilation errors that occurred. Some compilation
14379 errors, however, will throw an exception immediately.
14385 Perl_parse_listexpr(pTHX_ U32 flags)
14387 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
14391 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
14393 Parse a single complete Perl expression. This allows the full
14394 expression grammar, including the lowest-precedence operators such
14395 as C<or>. The expression must be followed (and thus terminated) by a
14396 token that an expression would normally be terminated by: end-of-file,
14397 closing bracketing punctuation, semicolon, or one of the keywords that
14398 signals a postfix expression-statement modifier. If I<flags> includes
14399 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
14400 mandatory. It is up to the caller to ensure that the dynamic parser
14401 state (L</PL_parser> et al) is correctly set to reflect the source of
14402 the code to be parsed and the lexical context for the expression.
14404 The op tree representing the expression is returned. If an optional
14405 expression is absent, a null pointer is returned, otherwise the pointer
14408 If an error occurs in parsing or compilation, in most cases a valid op
14409 tree is returned anyway. The error is reflected in the parser state,
14410 normally resulting in a single exception at the top level of parsing
14411 which covers all the compilation errors that occurred. Some compilation
14412 errors, however, will throw an exception immediately.
14418 Perl_parse_fullexpr(pTHX_ U32 flags)
14420 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
14424 =for apidoc Amx|OP *|parse_block|U32 flags
14426 Parse a single complete Perl code block. This consists of an opening
14427 brace, a sequence of statements, and a closing brace. The block
14428 constitutes a lexical scope, so C<my> variables and various compile-time
14429 effects can be contained within it. It is up to the caller to ensure
14430 that the dynamic parser state (L</PL_parser> et al) is correctly set to
14431 reflect the source of the code to be parsed and the lexical context for
14434 The op tree representing the code block is returned. This is always a
14435 real op, never a null pointer. It will normally be a C<lineseq> list,
14436 including C<nextstate> or equivalent ops. No ops to construct any kind
14437 of runtime scope are included by virtue of it being a block.
14439 If an error occurs in parsing or compilation, in most cases a valid op
14440 tree (most likely null) is returned anyway. The error is reflected in
14441 the parser state, normally resulting in a single exception at the top
14442 level of parsing which covers all the compilation errors that occurred.
14443 Some compilation errors, however, will throw an exception immediately.
14445 The I<flags> parameter is reserved for future use, and must always
14452 Perl_parse_block(pTHX_ U32 flags)
14455 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
14456 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
14460 =for apidoc Amx|OP *|parse_barestmt|U32 flags
14462 Parse a single unadorned Perl statement. This may be a normal imperative
14463 statement or a declaration that has compile-time effect. It does not
14464 include any label or other affixture. It is up to the caller to ensure
14465 that the dynamic parser state (L</PL_parser> et al) is correctly set to
14466 reflect the source of the code to be parsed and the lexical context for
14469 The op tree representing the statement is returned. This may be a
14470 null pointer if the statement is null, for example if it was actually
14471 a subroutine definition (which has compile-time side effects). If not
14472 null, it will be ops directly implementing the statement, suitable to
14473 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
14474 equivalent op (except for those embedded in a scope contained entirely
14475 within the statement).
14477 If an error occurs in parsing or compilation, in most cases a valid op
14478 tree (most likely null) is returned anyway. The error is reflected in
14479 the parser state, normally resulting in a single exception at the top
14480 level of parsing which covers all the compilation errors that occurred.
14481 Some compilation errors, however, will throw an exception immediately.
14483 The I<flags> parameter is reserved for future use, and must always
14490 Perl_parse_barestmt(pTHX_ U32 flags)
14493 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
14494 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
14498 =for apidoc Amx|SV *|parse_label|U32 flags
14500 Parse a single label, possibly optional, of the type that may prefix a
14501 Perl statement. It is up to the caller to ensure that the dynamic parser
14502 state (L</PL_parser> et al) is correctly set to reflect the source of
14503 the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
14504 label is optional, otherwise it is mandatory.
14506 The name of the label is returned in the form of a fresh scalar. If an
14507 optional label is absent, a null pointer is returned.
14509 If an error occurs in parsing, which can only occur if the label is
14510 mandatory, a valid label is returned anyway. The error is reflected in
14511 the parser state, normally resulting in a single exception at the top
14512 level of parsing which covers all the compilation errors that occurred.
14518 Perl_parse_label(pTHX_ U32 flags)
14520 if (flags & ~PARSE_OPTIONAL)
14521 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
14522 if (PL_lex_state == LEX_KNOWNEXT) {
14523 PL_parser->yychar = yylex();
14524 if (PL_parser->yychar == LABEL) {
14525 char *lpv = pl_yylval.pval;
14526 STRLEN llen = strlen(lpv);
14528 PL_parser->yychar = YYEMPTY;
14529 lsv = newSV_type(SVt_PV);
14530 SvPV_set(lsv, lpv);
14531 SvCUR_set(lsv, llen);
14532 SvLEN_set(lsv, llen+1);
14542 STRLEN wlen, bufptr_pos;
14546 if (!isIDFIRST_A(c))
14550 } while(isWORDCHAR_A(c));
14552 if (word_takes_any_delimeter(s, wlen))
14554 bufptr_pos = s - SvPVX(PL_linestr);
14556 lex_read_space(LEX_KEEP_PREVIOUS);
14558 s = SvPVX(PL_linestr) + bufptr_pos;
14559 if (t[0] == ':' && t[1] != ':') {
14560 PL_oldoldbufptr = PL_oldbufptr;
14563 return newSVpvn(s, wlen);
14567 if (flags & PARSE_OPTIONAL) {
14570 qerror(Perl_mess(aTHX_ "Parse error"));
14571 return newSVpvs("x");
14578 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
14580 Parse a single complete Perl statement. This may be a normal imperative
14581 statement or a declaration that has compile-time effect, and may include
14582 optional labels. It is up to the caller to ensure that the dynamic
14583 parser state (L</PL_parser> et al) is correctly set to reflect the source
14584 of the code to be parsed and the lexical context for the statement.
14586 The op tree representing the statement is returned. This may be a
14587 null pointer if the statement is null, for example if it was actually
14588 a subroutine definition (which has compile-time side effects). If not
14589 null, it will be the result of a L</newSTATEOP> call, normally including
14590 a C<nextstate> or equivalent op.
14592 If an error occurs in parsing or compilation, in most cases a valid op
14593 tree (most likely null) is returned anyway. The error is reflected in
14594 the parser state, normally resulting in a single exception at the top
14595 level of parsing which covers all the compilation errors that occurred.
14596 Some compilation errors, however, will throw an exception immediately.
14598 The I<flags> parameter is reserved for future use, and must always
14605 Perl_parse_fullstmt(pTHX_ U32 flags)
14608 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
14609 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
14613 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
14615 Parse a sequence of zero or more Perl statements. These may be normal
14616 imperative statements, including optional labels, or declarations
14617 that have compile-time effect, or any mixture thereof. The statement
14618 sequence ends when a closing brace or end-of-file is encountered in a
14619 place where a new statement could have validly started. It is up to
14620 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
14621 is correctly set to reflect the source of the code to be parsed and the
14622 lexical context for the statements.
14624 The op tree representing the statement sequence is returned. This may
14625 be a null pointer if the statements were all null, for example if there
14626 were no statements or if there were only subroutine definitions (which
14627 have compile-time side effects). If not null, it will be a C<lineseq>
14628 list, normally including C<nextstate> or equivalent ops.
14630 If an error occurs in parsing or compilation, in most cases a valid op
14631 tree is returned anyway. The error is reflected in the parser state,
14632 normally resulting in a single exception at the top level of parsing
14633 which covers all the compilation errors that occurred. Some compilation
14634 errors, however, will throw an exception immediately.
14636 The I<flags> parameter is reserved for future use, and must always
14643 Perl_parse_stmtseq(pTHX_ U32 flags)
14648 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
14649 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
14650 c = lex_peek_unichar(0);
14651 if (c != -1 && c != /*{*/'}')
14652 qerror(Perl_mess(aTHX_ "Parse error"));
14657 Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
14659 PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
14660 deprecate("qw(...) as parentheses");
14661 force_next((4<<24)|')');
14662 if (qwlist->op_type == OP_STUB) {
14666 start_force(PL_curforce);
14667 NEXTVAL_NEXTTOKE.opval = qwlist;
14670 force_next((2<<24)|'(');
14675 * c-indentation-style: bsd
14676 * c-basic-offset: 4
14677 * indent-tabs-mode: t
14680 * ex: set ts=8 sts=4 sw=4 noet: