3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 =head1 Lexer interface
27 This is the lower layer of the Perl parser, managing characters and tokens.
29 =for apidoc AmU|yy_parser *|PL_parser
31 Pointer to a structure encapsulating the state of the parsing operation
32 currently in progress. The pointer can be locally changed to perform
33 a nested parse without interfering with the state of an outer parse.
34 Individual members of C<PL_parser> have their own documentation.
40 #define PERL_IN_TOKE_C
42 #include "dquote_static.c"
44 #define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
47 #define pl_yylval (PL_parser->yylval)
49 /* XXX temporary backwards compatibility */
50 #define PL_lex_brackets (PL_parser->lex_brackets)
51 #define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52 #define PL_lex_fakeeof (PL_parser->lex_fakeeof)
53 #define PL_lex_brackstack (PL_parser->lex_brackstack)
54 #define PL_lex_casemods (PL_parser->lex_casemods)
55 #define PL_lex_casestack (PL_parser->lex_casestack)
56 #define PL_lex_defer (PL_parser->lex_defer)
57 #define PL_lex_dojoin (PL_parser->lex_dojoin)
58 #define PL_lex_expect (PL_parser->lex_expect)
59 #define PL_lex_formbrack (PL_parser->lex_formbrack)
60 #define PL_lex_inpat (PL_parser->lex_inpat)
61 #define PL_lex_inwhat (PL_parser->lex_inwhat)
62 #define PL_lex_op (PL_parser->lex_op)
63 #define PL_lex_repl (PL_parser->lex_repl)
64 #define PL_lex_starts (PL_parser->lex_starts)
65 #define PL_lex_stuff (PL_parser->lex_stuff)
66 #define PL_multi_start (PL_parser->multi_start)
67 #define PL_multi_open (PL_parser->multi_open)
68 #define PL_multi_close (PL_parser->multi_close)
69 #define PL_pending_ident (PL_parser->pending_ident)
70 #define PL_preambled (PL_parser->preambled)
71 #define PL_sublex_info (PL_parser->sublex_info)
72 #define PL_linestr (PL_parser->linestr)
73 #define PL_expect (PL_parser->expect)
74 #define PL_copline (PL_parser->copline)
75 #define PL_bufptr (PL_parser->bufptr)
76 #define PL_oldbufptr (PL_parser->oldbufptr)
77 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
78 #define PL_linestart (PL_parser->linestart)
79 #define PL_bufend (PL_parser->bufend)
80 #define PL_last_uni (PL_parser->last_uni)
81 #define PL_last_lop (PL_parser->last_lop)
82 #define PL_last_lop_op (PL_parser->last_lop_op)
83 #define PL_lex_state (PL_parser->lex_state)
84 #define PL_rsfp (PL_parser->rsfp)
85 #define PL_rsfp_filters (PL_parser->rsfp_filters)
86 #define PL_in_my (PL_parser->in_my)
87 #define PL_in_my_stash (PL_parser->in_my_stash)
88 #define PL_tokenbuf (PL_parser->tokenbuf)
89 #define PL_multi_end (PL_parser->multi_end)
90 #define PL_error_count (PL_parser->error_count)
93 # define PL_endwhite (PL_parser->endwhite)
94 # define PL_faketokens (PL_parser->faketokens)
95 # define PL_lasttoke (PL_parser->lasttoke)
96 # define PL_nextwhite (PL_parser->nextwhite)
97 # define PL_realtokenstart (PL_parser->realtokenstart)
98 # define PL_skipwhite (PL_parser->skipwhite)
99 # define PL_thisclose (PL_parser->thisclose)
100 # define PL_thismad (PL_parser->thismad)
101 # define PL_thisopen (PL_parser->thisopen)
102 # define PL_thisstuff (PL_parser->thisstuff)
103 # define PL_thistoken (PL_parser->thistoken)
104 # define PL_thiswhite (PL_parser->thiswhite)
105 # define PL_thiswhite (PL_parser->thiswhite)
106 # define PL_nexttoke (PL_parser->nexttoke)
107 # define PL_curforce (PL_parser->curforce)
109 # define PL_nexttoke (PL_parser->nexttoke)
110 # define PL_nexttype (PL_parser->nexttype)
111 # define PL_nextval (PL_parser->nextval)
114 /* This can't be done with embed.fnc, because struct yy_parser contains a
115 member named pending_ident, which clashes with the generated #define */
117 S_pending_ident(pTHX);
119 static const char ident_too_long[] = "Identifier too long";
122 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
123 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
125 # define CURMAD(slot,sv)
126 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
129 #define XENUMMASK 0x3f
130 #define XFAKEEOF 0x40
131 #define XFAKEBRACK 0x80
133 #ifdef USE_UTF8_SCRIPTS
134 # define UTF (!IN_BYTES)
136 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
139 /* The maximum number of characters preceding the unrecognized one to display */
140 #define UNRECOGNIZED_PRECEDE_COUNT 10
142 /* In variables named $^X, these are the legal values for X.
143 * 1999-02-27 mjd-perl-patch@plover.com */
144 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
146 #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
148 /* LEX_* are values for PL_lex_state, the state of the lexer.
149 * They are arranged oddly so that the guard on the switch statement
150 * can get by with a single comparison (if the compiler is smart enough).
153 /* #define LEX_NOTPARSING 11 is done in perl.h. */
155 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
156 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
157 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
158 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
159 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
161 /* at end of code, eg "$x" followed by: */
162 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
163 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
165 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
166 string or after \E, $foo, etc */
167 #define LEX_INTERPCONST 2 /* NOT USED */
168 #define LEX_FORMLINE 1 /* expecting a format line */
169 #define LEX_KNOWNEXT 0 /* next token known; just return it */
173 static const char* const lex_state_names[] = {
192 #include "keywords.h"
194 /* CLINE is a macro that ensures PL_copline has a sane value */
199 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
202 # define SKIPSPACE0(s) skipspace0(s)
203 # define SKIPSPACE1(s) skipspace1(s)
204 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
205 # define PEEKSPACE(s) skipspace2(s,0)
207 # define SKIPSPACE0(s) skipspace(s)
208 # define SKIPSPACE1(s) skipspace(s)
209 # define SKIPSPACE2(s,tsv) skipspace(s)
210 # define PEEKSPACE(s) skipspace(s)
214 * Convenience functions to return different tokens and prime the
215 * lexer for the next token. They all take an argument.
217 * TOKEN : generic token (used for '(', DOLSHARP, etc)
218 * OPERATOR : generic operator
219 * AOPERATOR : assignment operator
220 * PREBLOCK : beginning the block after an if, while, foreach, ...
221 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
222 * PREREF : *EXPR where EXPR is not a simple identifier
223 * TERM : expression term
224 * LOOPX : loop exiting command (goto, last, dump, etc)
225 * FTST : file test operator
226 * FUN0 : zero-argument function
227 * FUN0OP : zero-argument function, with its op created in this file
228 * FUN1 : not used, except for not, which isn't a UNIOP
229 * BOop : bitwise or or xor
231 * SHop : shift operator
232 * PWop : power operator
233 * PMop : pattern-matching operator
234 * Aop : addition-level operator
235 * Mop : multiplication-level operator
236 * Eop : equality-testing operator
237 * Rop : relational operator <= != gt
239 * Also see LOP and lop() below.
242 #ifdef DEBUGGING /* Serve -DT. */
243 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
245 # define REPORT(retval) (retval)
248 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
249 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
250 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
251 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
252 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
253 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
254 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
255 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
256 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
257 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
258 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
259 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
260 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
261 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
262 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
263 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
264 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
265 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
266 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
267 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
268 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
270 /* This bit of chicanery makes a unary function followed by
271 * a parenthesis into a function with one argument, highest precedence.
272 * The UNIDOR macro is for unary functions that can be followed by the //
273 * operator (such as C<shift // 0>).
275 #define UNI2(f,x) { \
276 pl_yylval.ival = f; \
279 PL_last_uni = PL_oldbufptr; \
280 PL_last_lop_op = f; \
282 return REPORT( (int)FUNC1 ); \
284 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
286 #define UNI(f) UNI2(f,XTERM)
287 #define UNIDOR(f) UNI2(f,XTERMORDORDOR)
288 #define UNIPROTO(f,optional) { \
289 if (optional) PL_last_uni = PL_oldbufptr; \
293 #define UNIBRACK(f) { \
294 pl_yylval.ival = f; \
296 PL_last_uni = PL_oldbufptr; \
298 return REPORT( (int)FUNC1 ); \
300 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
303 /* grandfather return to old style */
306 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
307 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
308 pl_yylval.ival = (f); \
316 /* how to interpret the pl_yylval associated with the token */
320 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
326 static struct debug_tokens {
328 enum token_type type;
330 } const debug_tokens[] =
332 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
333 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
334 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
335 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
336 { ARROW, TOKENTYPE_NONE, "ARROW" },
337 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
338 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
339 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
340 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
341 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
342 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
343 { DO, TOKENTYPE_NONE, "DO" },
344 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
345 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
346 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
347 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
348 { ELSE, TOKENTYPE_NONE, "ELSE" },
349 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
350 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
351 { FOR, TOKENTYPE_IVAL, "FOR" },
352 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
353 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
354 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
355 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
356 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
357 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
358 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
359 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
360 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
361 { IF, TOKENTYPE_IVAL, "IF" },
362 { LABEL, TOKENTYPE_PVAL, "LABEL" },
363 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
364 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
365 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
366 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
367 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
368 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
369 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
370 { MY, TOKENTYPE_IVAL, "MY" },
371 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
372 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
373 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
374 { OROP, TOKENTYPE_IVAL, "OROP" },
375 { OROR, TOKENTYPE_NONE, "OROR" },
376 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
377 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
378 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
379 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
380 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
381 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
382 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
383 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
384 { PREINC, TOKENTYPE_NONE, "PREINC" },
385 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
386 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
387 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
388 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
389 { SUB, TOKENTYPE_NONE, "SUB" },
390 { THING, TOKENTYPE_OPVAL, "THING" },
391 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
392 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
393 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
394 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
395 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
396 { USE, TOKENTYPE_IVAL, "USE" },
397 { WHEN, TOKENTYPE_IVAL, "WHEN" },
398 { WHILE, TOKENTYPE_IVAL, "WHILE" },
399 { WORD, TOKENTYPE_OPVAL, "WORD" },
400 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
401 { 0, TOKENTYPE_NONE, NULL }
404 /* dump the returned token in rv, plus any optional arg in pl_yylval */
407 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
411 PERL_ARGS_ASSERT_TOKEREPORT;
414 const char *name = NULL;
415 enum token_type type = TOKENTYPE_NONE;
416 const struct debug_tokens *p;
417 SV* const report = newSVpvs("<== ");
419 for (p = debug_tokens; p->token; p++) {
420 if (p->token == (int)rv) {
427 Perl_sv_catpv(aTHX_ report, name);
428 else if ((char)rv > ' ' && (char)rv < '~')
429 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
431 sv_catpvs(report, "EOF");
433 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
436 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
439 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
441 case TOKENTYPE_OPNUM:
442 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
443 PL_op_name[lvalp->ival]);
446 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
448 case TOKENTYPE_OPVAL:
450 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
451 PL_op_name[lvalp->opval->op_type]);
452 if (lvalp->opval->op_type == OP_CONST) {
453 Perl_sv_catpvf(aTHX_ report, " %s",
454 SvPEEK(cSVOPx_sv(lvalp->opval)));
459 sv_catpvs(report, "(opval=null)");
462 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
468 /* print the buffer with suitable escapes */
471 S_printbuf(pTHX_ const char *const fmt, const char *const s)
473 SV* const tmp = newSVpvs("");
475 PERL_ARGS_ASSERT_PRINTBUF;
477 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
484 S_deprecate_commaless_var_list(pTHX) {
486 deprecate("comma-less variable list");
487 return REPORT(','); /* grandfather non-comma-format format */
493 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
494 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
498 S_ao(pTHX_ int toketype)
501 if (*PL_bufptr == '=') {
503 if (toketype == ANDAND)
504 pl_yylval.ival = OP_ANDASSIGN;
505 else if (toketype == OROR)
506 pl_yylval.ival = OP_ORASSIGN;
507 else if (toketype == DORDOR)
508 pl_yylval.ival = OP_DORASSIGN;
516 * When Perl expects an operator and finds something else, no_op
517 * prints the warning. It always prints "<something> found where
518 * operator expected. It prints "Missing semicolon on previous line?"
519 * if the surprise occurs at the start of the line. "do you need to
520 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
521 * where the compiler doesn't know if foo is a method call or a function.
522 * It prints "Missing operator before end of line" if there's nothing
523 * after the missing operator, or "... before <...>" if there is something
524 * after the missing operator.
528 S_no_op(pTHX_ const char *const what, char *s)
531 char * const oldbp = PL_bufptr;
532 const bool is_first = (PL_oldbufptr == PL_linestart);
534 PERL_ARGS_ASSERT_NO_OP;
540 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
541 if (ckWARN_d(WARN_SYNTAX)) {
543 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
544 "\t(Missing semicolon on previous line?)\n");
545 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
547 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
549 if (t < PL_bufptr && isSPACE(*t))
550 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551 "\t(Do you need to predeclare %.*s?)\n",
552 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
556 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
557 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
565 * Complain about missing quote/regexp/heredoc terminator.
566 * If it's called with NULL then it cauterizes the line buffer.
567 * If we're in a delimited string and the delimiter is a control
568 * character, it's reformatted into a two-char sequence like ^C.
573 S_missingterm(pTHX_ char *s)
579 char * const nl = strrchr(s,'\n');
583 else if (isCNTRL(PL_multi_close)) {
585 tmpbuf[1] = (char)toCTRL(PL_multi_close);
590 *tmpbuf = (char)PL_multi_close;
594 q = strchr(s,'"') ? '\'' : '"';
595 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
599 * Check whether the named feature is enabled.
602 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen,
606 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
608 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
610 if (namelen > MAX_FEATURE_LEN)
612 if (negate) he_name[8] = 'n', he_name[9] = 'o';
613 memcpy(&he_name[8 + 2*negate], name, namelen);
618 PL_curcop, he_name, 8 + 2*negate + namelen, 0, 0
620 != &PL_sv_placeholder
626 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
627 * utf16-to-utf8-reversed.
630 #ifdef PERL_CR_FILTER
634 register const char *s = SvPVX_const(sv);
635 register const char * const e = s + SvCUR(sv);
637 PERL_ARGS_ASSERT_STRIP_RETURN;
639 /* outer loop optimized to do nothing if there are no CR-LFs */
641 if (*s++ == '\r' && *s == '\n') {
642 /* hit a CR-LF, need to copy the rest */
643 register char *d = s - 1;
646 if (*s == '\r' && s[1] == '\n')
657 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
659 const I32 count = FILTER_READ(idx+1, sv, maxlen);
660 if (count > 0 && !maxlen)
667 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
669 Creates and initialises a new lexer/parser state object, supplying
670 a context in which to lex and parse from a new source of Perl code.
671 A pointer to the new state object is placed in L</PL_parser>. An entry
672 is made on the save stack so that upon unwinding the new state object
673 will be destroyed and the former value of L</PL_parser> will be restored.
674 Nothing else need be done to clean up the parsing context.
676 The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
677 non-null, provides a string (in SV form) containing code to be parsed.
678 A copy of the string is made, so subsequent modification of I<line>
679 does not affect parsing. I<rsfp>, if non-null, provides an input stream
680 from which code will be read to be parsed. If both are non-null, the
681 code in I<line> comes first and must consist of complete lines of input,
682 and I<rsfp> supplies the remainder of the source.
684 The I<flags> parameter is reserved for future use. Currently it is only
685 used by perl internally, so extensions should always pass zero.
690 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
691 can share filters with the current parser. */
694 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
697 const char *s = NULL;
698 yy_parser *parser, *oparser;
699 if (flags && flags & ~LEX_START_FLAGS)
700 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
702 /* create and initialise a parser */
704 Newxz(parser, 1, yy_parser);
705 parser->old_parser = oparser = PL_parser;
708 parser->stack = NULL;
710 parser->stack_size = 0;
712 /* on scope exit, free this parser and restore any outer one */
714 parser->saved_curcop = PL_curcop;
716 /* initialise lexer state */
719 parser->curforce = -1;
721 parser->nexttoke = 0;
723 parser->error_count = oparser ? oparser->error_count : 0;
724 parser->copline = NOLINE;
725 parser->lex_state = LEX_NORMAL;
726 parser->expect = XSTATE;
728 parser->rsfp_filters =
729 !(flags & LEX_START_SAME_FILTER) || !oparser
731 : MUTABLE_AV(SvREFCNT_inc(
732 oparser->rsfp_filters
733 ? oparser->rsfp_filters
734 : (oparser->rsfp_filters = newAV())
737 Newx(parser->lex_brackstack, 120, char);
738 Newx(parser->lex_casestack, 12, char);
739 *parser->lex_casestack = '\0';
743 s = SvPV_const(line, len);
744 parser->linestr = flags & LEX_START_COPIED
745 ? SvREFCNT_inc_simple_NN(line)
746 : newSVpvn_flags(s, len, SvUTF8(line));
747 if (!len || s[len-1] != ';')
748 sv_catpvs(parser->linestr, "\n;");
750 parser->linestr = newSVpvs("\n;");
752 parser->oldoldbufptr =
755 parser->linestart = SvPVX(parser->linestr);
756 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
757 parser->last_lop = parser->last_uni = NULL;
758 parser->lex_flags = flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES);
760 parser->in_pod = parser->filtered = 0;
764 /* delete a parser object */
767 Perl_parser_free(pTHX_ const yy_parser *parser)
769 PERL_ARGS_ASSERT_PARSER_FREE;
771 PL_curcop = parser->saved_curcop;
772 SvREFCNT_dec(parser->linestr);
774 if (parser->rsfp == PerlIO_stdin())
775 PerlIO_clearerr(parser->rsfp);
776 else if (parser->rsfp && (!parser->old_parser ||
777 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
778 PerlIO_close(parser->rsfp);
779 SvREFCNT_dec(parser->rsfp_filters);
781 Safefree(parser->lex_brackstack);
782 Safefree(parser->lex_casestack);
783 PL_parser = parser->old_parser;
789 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
791 Buffer scalar containing the chunk currently under consideration of the
792 text currently being lexed. This is always a plain string scalar (for
793 which C<SvPOK> is true). It is not intended to be used as a scalar by
794 normal scalar means; instead refer to the buffer directly by the pointer
795 variables described below.
797 The lexer maintains various C<char*> pointers to things in the
798 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
799 reallocated, all of these pointers must be updated. Don't attempt to
800 do this manually, but rather use L</lex_grow_linestr> if you need to
801 reallocate the buffer.
803 The content of the text chunk in the buffer is commonly exactly one
804 complete line of input, up to and including a newline terminator,
805 but there are situations where it is otherwise. The octets of the
806 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
807 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
808 flag on this scalar, which may disagree with it.
810 For direct examination of the buffer, the variable
811 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
812 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
813 of these pointers is usually preferable to examination of the scalar
814 through normal scalar means.
816 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
818 Direct pointer to the end of the chunk of text currently being lexed, the
819 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
820 + SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
821 always located at the end of the buffer, and does not count as part of
822 the buffer's contents.
824 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
826 Points to the current position of lexing inside the lexer buffer.
827 Characters around this point may be freely examined, within
828 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
829 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
830 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
832 Lexing code (whether in the Perl core or not) moves this pointer past
833 the characters that it consumes. It is also expected to perform some
834 bookkeeping whenever a newline character is consumed. This movement
835 can be more conveniently performed by the function L</lex_read_to>,
836 which handles newlines appropriately.
838 Interpretation of the buffer's octets can be abstracted out by
839 using the slightly higher-level functions L</lex_peek_unichar> and
840 L</lex_read_unichar>.
842 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
844 Points to the start of the current line inside the lexer buffer.
845 This is useful for indicating at which column an error occurred, and
846 not much else. This must be updated by any lexing code that consumes
847 a newline; the function L</lex_read_to> handles this detail.
853 =for apidoc Amx|bool|lex_bufutf8
855 Indicates whether the octets in the lexer buffer
856 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
857 of Unicode characters. If not, they should be interpreted as Latin-1
858 characters. This is analogous to the C<SvUTF8> flag for scalars.
860 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
861 contains valid UTF-8. Lexing code must be robust in the face of invalid
864 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
865 is significant, but not the whole story regarding the input character
866 encoding. Normally, when a file is being read, the scalar contains octets
867 and its C<SvUTF8> flag is off, but the octets should be interpreted as
868 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
869 however, the scalar may have the C<SvUTF8> flag on, and in this case its
870 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
871 is in effect. This logic may change in the future; use this function
872 instead of implementing the logic yourself.
878 Perl_lex_bufutf8(pTHX)
884 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
886 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
887 at least I<len> octets (including terminating NUL). Returns a
888 pointer to the reallocated buffer. This is necessary before making
889 any direct modification of the buffer that would increase its length.
890 L</lex_stuff_pvn> provides a more convenient way to insert text into
893 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
894 this function updates all of the lexer's variables that point directly
901 Perl_lex_grow_linestr(pTHX_ STRLEN len)
905 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
906 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
907 linestr = PL_parser->linestr;
908 buf = SvPVX(linestr);
909 if (len <= SvLEN(linestr))
911 bufend_pos = PL_parser->bufend - buf;
912 bufptr_pos = PL_parser->bufptr - buf;
913 oldbufptr_pos = PL_parser->oldbufptr - buf;
914 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
915 linestart_pos = PL_parser->linestart - buf;
916 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
917 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
918 buf = sv_grow(linestr, len);
919 PL_parser->bufend = buf + bufend_pos;
920 PL_parser->bufptr = buf + bufptr_pos;
921 PL_parser->oldbufptr = buf + oldbufptr_pos;
922 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
923 PL_parser->linestart = buf + linestart_pos;
924 if (PL_parser->last_uni)
925 PL_parser->last_uni = buf + last_uni_pos;
926 if (PL_parser->last_lop)
927 PL_parser->last_lop = buf + last_lop_pos;
932 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
934 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
935 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
936 reallocating the buffer if necessary. This means that lexing code that
937 runs later will see the characters as if they had appeared in the input.
938 It is not recommended to do this as part of normal parsing, and most
939 uses of this facility run the risk of the inserted characters being
940 interpreted in an unintended manner.
942 The string to be inserted is represented by I<len> octets starting
943 at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
944 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
945 The characters are recoded for the lexer buffer, according to how the
946 buffer is currently being interpreted (L</lex_bufutf8>). If a string
947 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
948 function is more convenient.
954 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
958 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
959 if (flags & ~(LEX_STUFF_UTF8))
960 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
962 if (flags & LEX_STUFF_UTF8) {
966 const char *p, *e = pv+len;
967 for (p = pv; p != e; p++)
968 highhalf += !!(((U8)*p) & 0x80);
971 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
972 bufptr = PL_parser->bufptr;
973 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
974 SvCUR_set(PL_parser->linestr,
975 SvCUR(PL_parser->linestr) + len+highhalf);
976 PL_parser->bufend += len+highhalf;
977 for (p = pv; p != e; p++) {
980 *bufptr++ = (char)(0xc0 | (c >> 6));
981 *bufptr++ = (char)(0x80 | (c & 0x3f));
988 if (flags & LEX_STUFF_UTF8) {
990 const char *p, *e = pv+len;
991 for (p = pv; p != e; p++) {
994 Perl_croak(aTHX_ "Lexing code attempted to stuff "
995 "non-Latin-1 character into Latin-1 input");
996 } else if (c >= 0xc2 && p+1 != e &&
997 (((U8)p[1]) & 0xc0) == 0x80) {
1000 } else if (c >= 0x80) {
1001 /* malformed UTF-8 */
1003 SAVESPTR(PL_warnhook);
1004 PL_warnhook = PERL_WARNHOOK_FATAL;
1005 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
1011 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1012 bufptr = PL_parser->bufptr;
1013 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1014 SvCUR_set(PL_parser->linestr,
1015 SvCUR(PL_parser->linestr) + len-highhalf);
1016 PL_parser->bufend += len-highhalf;
1017 for (p = pv; p != e; p++) {
1020 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1023 *bufptr++ = (char)c;
1028 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1029 bufptr = PL_parser->bufptr;
1030 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1031 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1032 PL_parser->bufend += len;
1033 Copy(pv, bufptr, len, char);
1039 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1041 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1042 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1043 reallocating the buffer if necessary. This means that lexing code that
1044 runs later will see the characters as if they had appeared in the input.
1045 It is not recommended to do this as part of normal parsing, and most
1046 uses of this facility run the risk of the inserted characters being
1047 interpreted in an unintended manner.
1049 The string to be inserted is represented by octets starting at I<pv>
1050 and continuing to the first nul. These octets are interpreted as either
1051 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1052 in I<flags>. The characters are recoded for the lexer buffer, according
1053 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1054 If it is not convenient to nul-terminate a string to be inserted, the
1055 L</lex_stuff_pvn> function is more appropriate.
1061 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1063 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1064 lex_stuff_pvn(pv, strlen(pv), flags);
1068 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1070 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1071 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1072 reallocating the buffer if necessary. This means that lexing code that
1073 runs later will see the characters as if they had appeared in the input.
1074 It is not recommended to do this as part of normal parsing, and most
1075 uses of this facility run the risk of the inserted characters being
1076 interpreted in an unintended manner.
1078 The string to be inserted is the string value of I<sv>. The characters
1079 are recoded for the lexer buffer, according to how the buffer is currently
1080 being interpreted (L</lex_bufutf8>). If a string to be inserted is
1081 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1082 need to construct a scalar.
1088 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1092 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1094 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1096 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1100 =for apidoc Amx|void|lex_unstuff|char *ptr
1102 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1103 I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1104 This hides the discarded text from any lexing code that runs later,
1105 as if the text had never appeared.
1107 This is not the normal way to consume lexed text. For that, use
1114 Perl_lex_unstuff(pTHX_ char *ptr)
1118 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1119 buf = PL_parser->bufptr;
1121 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1124 bufend = PL_parser->bufend;
1126 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1127 unstuff_len = ptr - buf;
1128 Move(ptr, buf, bufend+1-ptr, char);
1129 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1130 PL_parser->bufend = bufend - unstuff_len;
1134 =for apidoc Amx|void|lex_read_to|char *ptr
1136 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1137 to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1138 performing the correct bookkeeping whenever a newline character is passed.
1139 This is the normal way to consume lexed text.
1141 Interpretation of the buffer's octets can be abstracted out by
1142 using the slightly higher-level functions L</lex_peek_unichar> and
1143 L</lex_read_unichar>.
1149 Perl_lex_read_to(pTHX_ char *ptr)
1152 PERL_ARGS_ASSERT_LEX_READ_TO;
1153 s = PL_parser->bufptr;
1154 if (ptr < s || ptr > PL_parser->bufend)
1155 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1156 for (; s != ptr; s++)
1158 CopLINE_inc(PL_curcop);
1159 PL_parser->linestart = s+1;
1161 PL_parser->bufptr = ptr;
1165 =for apidoc Amx|void|lex_discard_to|char *ptr
1167 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1168 up to I<ptr>. The remaining content of the buffer will be moved, and
1169 all pointers into the buffer updated appropriately. I<ptr> must not
1170 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1171 it is not permitted to discard text that has yet to be lexed.
1173 Normally it is not necessarily to do this directly, because it suffices to
1174 use the implicit discarding behaviour of L</lex_next_chunk> and things
1175 based on it. However, if a token stretches across multiple lines,
1176 and the lexing code has kept multiple lines of text in the buffer for
1177 that purpose, then after completion of the token it would be wise to
1178 explicitly discard the now-unneeded earlier lines, to avoid future
1179 multi-line tokens growing the buffer without bound.
1185 Perl_lex_discard_to(pTHX_ char *ptr)
1189 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1190 buf = SvPVX(PL_parser->linestr);
1192 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1195 if (ptr > PL_parser->bufptr)
1196 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1197 discard_len = ptr - buf;
1198 if (PL_parser->oldbufptr < ptr)
1199 PL_parser->oldbufptr = ptr;
1200 if (PL_parser->oldoldbufptr < ptr)
1201 PL_parser->oldoldbufptr = ptr;
1202 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1203 PL_parser->last_uni = NULL;
1204 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1205 PL_parser->last_lop = NULL;
1206 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1207 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1208 PL_parser->bufend -= discard_len;
1209 PL_parser->bufptr -= discard_len;
1210 PL_parser->oldbufptr -= discard_len;
1211 PL_parser->oldoldbufptr -= discard_len;
1212 if (PL_parser->last_uni)
1213 PL_parser->last_uni -= discard_len;
1214 if (PL_parser->last_lop)
1215 PL_parser->last_lop -= discard_len;
1219 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1221 Reads in the next chunk of text to be lexed, appending it to
1222 L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1223 looked to the end of the current chunk and wants to know more. It is
1224 usual, but not necessary, for lexing to have consumed the entirety of
1225 the current chunk at this time.
1227 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1228 chunk (i.e., the current chunk has been entirely consumed), normally the
1229 current chunk will be discarded at the same time that the new chunk is
1230 read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1231 will not be discarded. If the current chunk has not been entirely
1232 consumed, then it will not be discarded regardless of the flag.
1234 Returns true if some new text was added to the buffer, or false if the
1235 buffer has reached the end of the input text.
1240 #define LEX_FAKE_EOF 0x80000000
1243 Perl_lex_next_chunk(pTHX_ U32 flags)
1247 STRLEN old_bufend_pos, new_bufend_pos;
1248 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1249 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1250 bool got_some_for_debugger = 0;
1252 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1253 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1254 linestr = PL_parser->linestr;
1255 buf = SvPVX(linestr);
1256 if (!(flags & LEX_KEEP_PREVIOUS) &&
1257 PL_parser->bufptr == PL_parser->bufend) {
1258 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1260 if (PL_parser->last_uni != PL_parser->bufend)
1261 PL_parser->last_uni = NULL;
1262 if (PL_parser->last_lop != PL_parser->bufend)
1263 PL_parser->last_lop = NULL;
1264 last_uni_pos = last_lop_pos = 0;
1268 old_bufend_pos = PL_parser->bufend - buf;
1269 bufptr_pos = PL_parser->bufptr - buf;
1270 oldbufptr_pos = PL_parser->oldbufptr - buf;
1271 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1272 linestart_pos = PL_parser->linestart - buf;
1273 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1274 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1276 if (flags & LEX_FAKE_EOF) {
1278 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1280 } else if (filter_gets(linestr, old_bufend_pos)) {
1282 got_some_for_debugger = 1;
1284 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1285 sv_setpvs(linestr, "");
1287 /* End of real input. Close filehandle (unless it was STDIN),
1288 * then add implicit termination.
1290 if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1291 PerlIO_clearerr(PL_parser->rsfp);
1292 else if (PL_parser->rsfp)
1293 (void)PerlIO_close(PL_parser->rsfp);
1294 PL_parser->rsfp = NULL;
1295 PL_parser->in_pod = PL_parser->filtered = 0;
1297 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1300 if (!PL_in_eval && PL_minus_p) {
1302 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1303 PL_minus_n = PL_minus_p = 0;
1304 } else if (!PL_in_eval && PL_minus_n) {
1305 sv_catpvs(linestr, /*{*/";}");
1308 sv_catpvs(linestr, ";");
1311 buf = SvPVX(linestr);
1312 new_bufend_pos = SvCUR(linestr);
1313 PL_parser->bufend = buf + new_bufend_pos;
1314 PL_parser->bufptr = buf + bufptr_pos;
1315 PL_parser->oldbufptr = buf + oldbufptr_pos;
1316 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1317 PL_parser->linestart = buf + linestart_pos;
1318 if (PL_parser->last_uni)
1319 PL_parser->last_uni = buf + last_uni_pos;
1320 if (PL_parser->last_lop)
1321 PL_parser->last_lop = buf + last_lop_pos;
1322 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1323 PL_curstash != PL_debstash) {
1324 /* debugger active and we're not compiling the debugger code,
1325 * so store the line into the debugger's array of lines
1327 update_debugger_info(NULL, buf+old_bufend_pos,
1328 new_bufend_pos-old_bufend_pos);
1334 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1336 Looks ahead one (Unicode) character in the text currently being lexed.
1337 Returns the codepoint (unsigned integer value) of the next character,
1338 or -1 if lexing has reached the end of the input text. To consume the
1339 peeked character, use L</lex_read_unichar>.
1341 If the next character is in (or extends into) the next chunk of input
1342 text, the next chunk will be read in. Normally the current chunk will be
1343 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1344 then the current chunk will not be discarded.
1346 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1347 is encountered, an exception is generated.
1353 Perl_lex_peek_unichar(pTHX_ U32 flags)
1357 if (flags & ~(LEX_KEEP_PREVIOUS))
1358 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1359 s = PL_parser->bufptr;
1360 bufend = PL_parser->bufend;
1366 if (!lex_next_chunk(flags))
1368 s = PL_parser->bufptr;
1369 bufend = PL_parser->bufend;
1375 len = PL_utf8skip[head];
1376 while ((STRLEN)(bufend-s) < len) {
1377 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1379 s = PL_parser->bufptr;
1380 bufend = PL_parser->bufend;
1383 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1384 if (retlen == (STRLEN)-1) {
1385 /* malformed UTF-8 */
1387 SAVESPTR(PL_warnhook);
1388 PL_warnhook = PERL_WARNHOOK_FATAL;
1389 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1395 if (!lex_next_chunk(flags))
1397 s = PL_parser->bufptr;
1404 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1406 Reads the next (Unicode) character in the text currently being lexed.
1407 Returns the codepoint (unsigned integer value) of the character read,
1408 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1409 if lexing has reached the end of the input text. To non-destructively
1410 examine the next character, use L</lex_peek_unichar> instead.
1412 If the next character is in (or extends into) the next chunk of input
1413 text, the next chunk will be read in. Normally the current chunk will be
1414 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1415 then the current chunk will not be discarded.
1417 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1418 is encountered, an exception is generated.
1424 Perl_lex_read_unichar(pTHX_ U32 flags)
1427 if (flags & ~(LEX_KEEP_PREVIOUS))
1428 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1429 c = lex_peek_unichar(flags);
1432 CopLINE_inc(PL_curcop);
1434 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1436 ++(PL_parser->bufptr);
1442 =for apidoc Amx|void|lex_read_space|U32 flags
1444 Reads optional spaces, in Perl style, in the text currently being
1445 lexed. The spaces may include ordinary whitespace characters and
1446 Perl-style comments. C<#line> directives are processed if encountered.
1447 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1448 at a non-space character (or the end of the input text).
1450 If spaces extend into the next chunk of input text, the next chunk will
1451 be read in. Normally the current chunk will be discarded at the same
1452 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1453 chunk will not be discarded.
1458 #define LEX_NO_NEXT_CHUNK 0x80000000
1461 Perl_lex_read_space(pTHX_ U32 flags)
1464 bool need_incline = 0;
1465 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
1466 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1469 sv_free(PL_skipwhite);
1470 PL_skipwhite = NULL;
1473 PL_skipwhite = newSVpvs("");
1474 #endif /* PERL_MAD */
1475 s = PL_parser->bufptr;
1476 bufend = PL_parser->bufend;
1482 } while (!(c == '\n' || (c == 0 && s == bufend)));
1483 } else if (c == '\n') {
1485 PL_parser->linestart = s;
1490 } else if (isSPACE(c)) {
1492 } else if (c == 0 && s == bufend) {
1496 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1497 #endif /* PERL_MAD */
1498 if (flags & LEX_NO_NEXT_CHUNK)
1500 PL_parser->bufptr = s;
1501 CopLINE_inc(PL_curcop);
1502 got_more = lex_next_chunk(flags);
1503 CopLINE_dec(PL_curcop);
1504 s = PL_parser->bufptr;
1505 bufend = PL_parser->bufend;
1508 if (need_incline && PL_parser->rsfp) {
1518 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1519 #endif /* PERL_MAD */
1520 PL_parser->bufptr = s;
1525 * This subroutine has nothing to do with tilting, whether at windmills
1526 * or pinball tables. Its name is short for "increment line". It
1527 * increments the current line number in CopLINE(PL_curcop) and checks
1528 * to see whether the line starts with a comment of the form
1529 * # line 500 "foo.pm"
1530 * If so, it sets the current line number and file to the values in the comment.
1534 S_incline(pTHX_ const char *s)
1542 PERL_ARGS_ASSERT_INCLINE;
1544 CopLINE_inc(PL_curcop);
1547 while (SPACE_OR_TAB(*s))
1549 if (strnEQ(s, "line", 4))
1553 if (SPACE_OR_TAB(*s))
1557 while (SPACE_OR_TAB(*s))
1565 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1567 while (SPACE_OR_TAB(*s))
1569 if (*s == '"' && (t = strchr(s+1, '"'))) {
1575 while (!isSPACE(*t))
1579 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1581 if (*e != '\n' && *e != '\0')
1582 return; /* false alarm */
1584 line_num = atoi(n)-1;
1587 const STRLEN len = t - s;
1588 SV *const temp_sv = CopFILESV(PL_curcop);
1593 cf = SvPVX(temp_sv);
1594 tmplen = SvCUR(temp_sv);
1600 if (!PL_rsfp && !PL_parser->filtered) {
1601 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1602 * to *{"::_<newfilename"} */
1603 /* However, the long form of evals is only turned on by the
1604 debugger - usually they're "(eval %lu)" */
1608 STRLEN tmplen2 = len;
1609 if (tmplen + 2 <= sizeof smallbuf)
1612 Newx(tmpbuf, tmplen + 2, char);
1615 memcpy(tmpbuf + 2, cf, tmplen);
1617 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1622 if (tmplen2 + 2 <= sizeof smallbuf)
1625 Newx(tmpbuf2, tmplen2 + 2, char);
1627 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1628 /* Either they malloc'd it, or we malloc'd it,
1629 so no prefix is present in ours. */
1634 memcpy(tmpbuf2 + 2, s, tmplen2);
1637 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1639 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1640 /* adjust ${"::_<newfilename"} to store the new file name */
1641 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1642 /* The line number may differ. If that is the case,
1643 alias the saved lines that are in the array.
1644 Otherwise alias the whole array. */
1645 if (CopLINE(PL_curcop) == line_num) {
1646 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1647 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1649 else if (GvAV(*gvp)) {
1650 AV * const av = GvAV(*gvp);
1651 const I32 start = CopLINE(PL_curcop)+1;
1652 I32 items = AvFILLp(av) - start;
1654 AV * const av2 = GvAVn(gv2);
1655 SV **svp = AvARRAY(av) + start;
1656 I32 l = (I32)line_num+1;
1658 av_store(av2, l++, SvREFCNT_inc(*svp++));
1663 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1665 if (tmpbuf != smallbuf) Safefree(tmpbuf);
1667 CopFILE_free(PL_curcop);
1668 CopFILE_setn(PL_curcop, s, len);
1670 CopLINE_set(PL_curcop, line_num);
1674 /* skip space before PL_thistoken */
1677 S_skipspace0(pTHX_ register char *s)
1679 PERL_ARGS_ASSERT_SKIPSPACE0;
1686 PL_thiswhite = newSVpvs("");
1687 sv_catsv(PL_thiswhite, PL_skipwhite);
1688 sv_free(PL_skipwhite);
1691 PL_realtokenstart = s - SvPVX(PL_linestr);
1695 /* skip space after PL_thistoken */
1698 S_skipspace1(pTHX_ register char *s)
1700 const char *start = s;
1701 I32 startoff = start - SvPVX(PL_linestr);
1703 PERL_ARGS_ASSERT_SKIPSPACE1;
1708 start = SvPVX(PL_linestr) + startoff;
1709 if (!PL_thistoken && PL_realtokenstart >= 0) {
1710 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1711 PL_thistoken = newSVpvn(tstart, start - tstart);
1713 PL_realtokenstart = -1;
1716 PL_nextwhite = newSVpvs("");
1717 sv_catsv(PL_nextwhite, PL_skipwhite);
1718 sv_free(PL_skipwhite);
1725 S_skipspace2(pTHX_ register char *s, SV **svp)
1728 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1729 const I32 startoff = s - SvPVX(PL_linestr);
1731 PERL_ARGS_ASSERT_SKIPSPACE2;
1734 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1735 if (!PL_madskills || !svp)
1737 start = SvPVX(PL_linestr) + startoff;
1738 if (!PL_thistoken && PL_realtokenstart >= 0) {
1739 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1740 PL_thistoken = newSVpvn(tstart, start - tstart);
1741 PL_realtokenstart = -1;
1745 *svp = newSVpvs("");
1746 sv_setsv(*svp, PL_skipwhite);
1747 sv_free(PL_skipwhite);
1756 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1758 AV *av = CopFILEAVx(PL_curcop);
1760 SV * const sv = newSV_type(SVt_PVMG);
1762 sv_setsv(sv, orig_sv);
1764 sv_setpvn(sv, buf, len);
1767 av_store(av, (I32)CopLINE(PL_curcop), sv);
1773 * Called to gobble the appropriate amount and type of whitespace.
1774 * Skips comments as well.
1778 S_skipspace(pTHX_ register char *s)
1782 #endif /* PERL_MAD */
1783 PERL_ARGS_ASSERT_SKIPSPACE;
1786 sv_free(PL_skipwhite);
1787 PL_skipwhite = NULL;
1789 #endif /* PERL_MAD */
1790 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1791 while (s < PL_bufend && SPACE_OR_TAB(*s))
1794 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1796 lex_read_space(LEX_KEEP_PREVIOUS |
1797 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1798 LEX_NO_NEXT_CHUNK : 0));
1800 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1801 if (PL_linestart > PL_bufptr)
1802 PL_bufptr = PL_linestart;
1807 PL_skipwhite = newSVpvn(start, s-start);
1808 #endif /* PERL_MAD */
1814 * Check the unary operators to ensure there's no ambiguity in how they're
1815 * used. An ambiguous piece of code would be:
1817 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1818 * the +5 is its argument.
1828 if (PL_oldoldbufptr != PL_last_uni)
1830 while (isSPACE(*PL_last_uni))
1833 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1835 if ((t = strchr(s, '(')) && t < PL_bufptr)
1838 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1839 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1840 (int)(s - PL_last_uni), PL_last_uni);
1844 * LOP : macro to build a list operator. Its behaviour has been replaced
1845 * with a subroutine, S_lop() for which LOP is just another name.
1848 #define LOP(f,x) return lop(f,x,s)
1852 * Build a list operator (or something that might be one). The rules:
1853 * - if we have a next token, then it's a list operator [why?]
1854 * - if the next thing is an opening paren, then it's a function
1855 * - else it's a list operator
1859 S_lop(pTHX_ I32 f, int x, char *s)
1863 PERL_ARGS_ASSERT_LOP;
1869 PL_last_lop = PL_oldbufptr;
1870 PL_last_lop_op = (OPCODE)f;
1879 return REPORT(FUNC);
1882 return REPORT(FUNC);
1885 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1886 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1887 return REPORT(LSTOP);
1894 * Sets up for an eventual force_next(). start_force(0) basically does
1895 * an unshift, while start_force(-1) does a push. yylex removes items
1900 S_start_force(pTHX_ int where)
1904 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
1905 where = PL_lasttoke;
1906 assert(PL_curforce < 0 || PL_curforce == where);
1907 if (PL_curforce != where) {
1908 for (i = PL_lasttoke; i > where; --i) {
1909 PL_nexttoke[i] = PL_nexttoke[i-1];
1913 if (PL_curforce < 0) /* in case of duplicate start_force() */
1914 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
1915 PL_curforce = where;
1918 curmad('^', newSVpvs(""));
1919 CURMAD('_', PL_nextwhite);
1924 S_curmad(pTHX_ char slot, SV *sv)
1930 if (PL_curforce < 0)
1931 where = &PL_thismad;
1933 where = &PL_nexttoke[PL_curforce].next_mad;
1939 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1941 else if (PL_encoding) {
1942 sv_recode_to_utf8(sv, PL_encoding);
1947 /* keep a slot open for the head of the list? */
1948 if (slot != '_' && *where && (*where)->mad_key == '^') {
1949 (*where)->mad_key = slot;
1950 sv_free(MUTABLE_SV(((*where)->mad_val)));
1951 (*where)->mad_val = (void*)sv;
1954 addmad(newMADsv(slot, sv), where, 0);
1957 # define start_force(where) NOOP
1958 # define curmad(slot, sv) NOOP
1963 * When the lexer realizes it knows the next token (for instance,
1964 * it is reordering tokens for the parser) then it can call S_force_next
1965 * to know what token to return the next time the lexer is called. Caller
1966 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1967 * and possibly PL_expect to ensure the lexer handles the token correctly.
1971 S_force_next(pTHX_ I32 type)
1976 PerlIO_printf(Perl_debug_log, "### forced token:\n");
1977 tokereport(type, &NEXTVAL_NEXTTOKE);
1981 if (PL_curforce < 0)
1982 start_force(PL_lasttoke);
1983 PL_nexttoke[PL_curforce].next_type = type;
1984 if (PL_lex_state != LEX_KNOWNEXT)
1985 PL_lex_defer = PL_lex_state;
1986 PL_lex_state = LEX_KNOWNEXT;
1987 PL_lex_expect = PL_expect;
1990 PL_nexttype[PL_nexttoke] = type;
1992 if (PL_lex_state != LEX_KNOWNEXT) {
1993 PL_lex_defer = PL_lex_state;
1994 PL_lex_expect = PL_expect;
1995 PL_lex_state = LEX_KNOWNEXT;
2003 int yyc = PL_parser->yychar;
2004 if (yyc != YYEMPTY) {
2007 NEXTVAL_NEXTTOKE = PL_parser->yylval;
2008 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2009 PL_lex_allbrackets--;
2011 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2012 } else if (yyc == '('/*)*/) {
2013 PL_lex_allbrackets--;
2018 PL_parser->yychar = YYEMPTY;
2023 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2026 SV * const sv = newSVpvn_utf8(start, len,
2029 && !is_ascii_string((const U8*)start, len)
2030 && is_utf8_string((const U8*)start, len));
2036 * When the lexer knows the next thing is a word (for instance, it has
2037 * just seen -> and it knows that the next char is a word char, then
2038 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2042 * char *start : buffer position (must be within PL_linestr)
2043 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2044 * int check_keyword : if true, Perl checks to make sure the word isn't
2045 * a keyword (do this if the word is a label, e.g. goto FOO)
2046 * int allow_pack : if true, : characters will also be allowed (require,
2047 * use, etc. do this)
2048 * int allow_initial_tick : used by the "sub" lexer only.
2052 S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
2058 PERL_ARGS_ASSERT_FORCE_WORD;
2060 start = SKIPSPACE1(start);
2062 if (isIDFIRST_lazy_if(s,UTF) ||
2063 (allow_pack && *s == ':') ||
2064 (allow_initial_tick && *s == '\'') )
2066 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2067 if (check_keyword && keyword(PL_tokenbuf, len, 0))
2069 start_force(PL_curforce);
2071 curmad('X', newSVpvn(start,s-start));
2072 if (token == METHOD) {
2077 PL_expect = XOPERATOR;
2081 curmad('g', newSVpvs( "forced" ));
2082 NEXTVAL_NEXTTOKE.opval
2083 = (OP*)newSVOP(OP_CONST,0,
2084 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2085 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2093 * Called when the lexer wants $foo *foo &foo etc, but the program
2094 * text only contains the "foo" portion. The first argument is a pointer
2095 * to the "foo", and the second argument is the type symbol to prefix.
2096 * Forces the next token to be a "WORD".
2097 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2101 S_force_ident(pTHX_ register const char *s, int kind)
2105 PERL_ARGS_ASSERT_FORCE_IDENT;
2108 const STRLEN len = strlen(s);
2109 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2110 UTF ? SVf_UTF8 : 0));
2111 start_force(PL_curforce);
2112 NEXTVAL_NEXTTOKE.opval = o;
2115 o->op_private = OPpCONST_ENTERED;
2116 /* XXX see note in pp_entereval() for why we forgo typo
2117 warnings if the symbol must be introduced in an eval.
2119 gv_fetchpvn_flags(s, len,
2120 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2121 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2122 kind == '$' ? SVt_PV :
2123 kind == '@' ? SVt_PVAV :
2124 kind == '%' ? SVt_PVHV :
2132 Perl_str_to_version(pTHX_ SV *sv)
2137 const char *start = SvPV_const(sv,len);
2138 const char * const end = start + len;
2139 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2141 PERL_ARGS_ASSERT_STR_TO_VERSION;
2143 while (start < end) {
2147 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2152 retval += ((NV)n)/nshift;
2161 * Forces the next token to be a version number.
2162 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2163 * and if "guessing" is TRUE, then no new token is created (and the caller
2164 * must use an alternative parsing method).
2168 S_force_version(pTHX_ char *s, int guessing)
2174 I32 startoff = s - SvPVX(PL_linestr);
2177 PERL_ARGS_ASSERT_FORCE_VERSION;
2185 while (isDIGIT(*d) || *d == '_' || *d == '.')
2189 start_force(PL_curforce);
2190 curmad('X', newSVpvn(s,d-s));
2193 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2195 #ifdef USE_LOCALE_NUMERIC
2196 char *loc = savepv(setlocale(LC_NUMERIC, NULL));
2197 setlocale(LC_NUMERIC, "C");
2199 s = scan_num(s, &pl_yylval);
2200 #ifdef USE_LOCALE_NUMERIC
2201 setlocale(LC_NUMERIC, loc);
2204 version = pl_yylval.opval;
2205 ver = cSVOPx(version)->op_sv;
2206 if (SvPOK(ver) && !SvNIOK(ver)) {
2207 SvUPGRADE(ver, SVt_PVNV);
2208 SvNV_set(ver, str_to_version(ver));
2209 SvNOK_on(ver); /* hint that it is a version */
2212 else if (guessing) {
2215 sv_free(PL_nextwhite); /* let next token collect whitespace */
2217 s = SvPVX(PL_linestr) + startoff;
2225 if (PL_madskills && !version) {
2226 sv_free(PL_nextwhite); /* let next token collect whitespace */
2228 s = SvPVX(PL_linestr) + startoff;
2231 /* NOTE: The parser sees the package name and the VERSION swapped */
2232 start_force(PL_curforce);
2233 NEXTVAL_NEXTTOKE.opval = version;
2240 * S_force_strict_version
2241 * Forces the next token to be a version number using strict syntax rules.
2245 S_force_strict_version(pTHX_ char *s)
2250 I32 startoff = s - SvPVX(PL_linestr);
2252 const char *errstr = NULL;
2254 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2256 while (isSPACE(*s)) /* leading whitespace */
2259 if (is_STRICT_VERSION(s,&errstr)) {
2261 s = (char *)scan_version(s, ver, 0);
2262 version = newSVOP(OP_CONST, 0, ver);
2264 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2265 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2269 yyerror(errstr); /* version required */
2274 if (PL_madskills && !version) {
2275 sv_free(PL_nextwhite); /* let next token collect whitespace */
2277 s = SvPVX(PL_linestr) + startoff;
2280 /* NOTE: The parser sees the package name and the VERSION swapped */
2281 start_force(PL_curforce);
2282 NEXTVAL_NEXTTOKE.opval = version;
2290 * Tokenize a quoted string passed in as an SV. It finds the next
2291 * chunk, up to end of string or a backslash. It may make a new
2292 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2297 S_tokeq(pTHX_ SV *sv)
2301 register char *send;
2306 PERL_ARGS_ASSERT_TOKEQ;
2311 s = SvPV_force(sv, len);
2312 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
2315 /* This is relying on the SV being "well formed" with a trailing '\0' */
2316 while (s < send && !(*s == '\\' && s[1] == '\\'))
2321 if ( PL_hints & HINT_NEW_STRING ) {
2322 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
2326 if (s + 1 < send && (s[1] == '\\'))
2327 s++; /* all that, just for this */
2332 SvCUR_set(sv, d - SvPVX_const(sv));
2334 if ( PL_hints & HINT_NEW_STRING )
2335 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2340 * Now come three functions related to double-quote context,
2341 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2342 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2343 * interact with PL_lex_state, and create fake ( ... ) argument lists
2344 * to handle functions and concatenation.
2345 * They assume that whoever calls them will be setting up a fake
2346 * join call, because each subthing puts a ',' after it. This lets
2349 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2351 * (I'm not sure whether the spurious commas at the end of lcfirst's
2352 * arguments and join's arguments are created or not).
2357 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2359 * Pattern matching will set PL_lex_op to the pattern-matching op to
2360 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2362 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2364 * Everything else becomes a FUNC.
2366 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2367 * had an OP_CONST or OP_READLINE). This just sets us up for a
2368 * call to S_sublex_push().
2372 S_sublex_start(pTHX)
2375 register const I32 op_type = pl_yylval.ival;
2377 if (op_type == OP_NULL) {
2378 pl_yylval.opval = PL_lex_op;
2382 if (op_type == OP_CONST || op_type == OP_READLINE) {
2383 SV *sv = tokeq(PL_lex_stuff);
2385 if (SvTYPE(sv) == SVt_PVIV) {
2386 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2388 const char * const p = SvPV_const(sv, len);
2389 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2393 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2394 PL_lex_stuff = NULL;
2395 /* Allow <FH> // "foo" */
2396 if (op_type == OP_READLINE)
2397 PL_expect = XTERMORDORDOR;
2400 else if (op_type == OP_BACKTICK && PL_lex_op) {
2401 /* readpipe() vas overriden */
2402 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
2403 pl_yylval.opval = PL_lex_op;
2405 PL_lex_stuff = NULL;
2409 PL_sublex_info.super_state = PL_lex_state;
2410 PL_sublex_info.sub_inwhat = (U16)op_type;
2411 PL_sublex_info.sub_op = PL_lex_op;
2412 PL_lex_state = LEX_INTERPPUSH;
2416 pl_yylval.opval = PL_lex_op;
2426 * Create a new scope to save the lexing state. The scope will be
2427 * ended in S_sublex_done. Returns a '(', starting the function arguments
2428 * to the uc, lc, etc. found before.
2429 * Sets PL_lex_state to LEX_INTERPCONCAT.
2438 PL_lex_state = PL_sublex_info.super_state;
2439 SAVEBOOL(PL_lex_dojoin);
2440 SAVEI32(PL_lex_brackets);
2441 SAVEI32(PL_lex_allbrackets);
2442 SAVEI8(PL_lex_fakeeof);
2443 SAVEI32(PL_lex_casemods);
2444 SAVEI32(PL_lex_starts);
2445 SAVEI8(PL_lex_state);
2446 SAVEVPTR(PL_lex_inpat);
2447 SAVEI16(PL_lex_inwhat);
2448 SAVECOPLINE(PL_curcop);
2449 SAVEPPTR(PL_bufptr);
2450 SAVEPPTR(PL_bufend);
2451 SAVEPPTR(PL_oldbufptr);
2452 SAVEPPTR(PL_oldoldbufptr);
2453 SAVEPPTR(PL_last_lop);
2454 SAVEPPTR(PL_last_uni);
2455 SAVEPPTR(PL_linestart);
2456 SAVESPTR(PL_linestr);
2457 SAVEGENERICPV(PL_lex_brackstack);
2458 SAVEGENERICPV(PL_lex_casestack);
2460 PL_linestr = PL_lex_stuff;
2461 PL_lex_stuff = NULL;
2463 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2464 = SvPVX(PL_linestr);
2465 PL_bufend += SvCUR(PL_linestr);
2466 PL_last_lop = PL_last_uni = NULL;
2467 SAVEFREESV(PL_linestr);
2469 PL_lex_dojoin = FALSE;
2470 PL_lex_brackets = 0;
2471 PL_lex_allbrackets = 0;
2472 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2473 Newx(PL_lex_brackstack, 120, char);
2474 Newx(PL_lex_casestack, 12, char);
2475 PL_lex_casemods = 0;
2476 *PL_lex_casestack = '\0';
2478 PL_lex_state = LEX_INTERPCONCAT;
2479 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2481 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2482 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2483 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2484 PL_lex_inpat = PL_sublex_info.sub_op;
2486 PL_lex_inpat = NULL;
2493 * Restores lexer state after a S_sublex_push.
2500 if (!PL_lex_starts++) {
2501 SV * const sv = newSVpvs("");
2502 if (SvUTF8(PL_linestr))
2504 PL_expect = XOPERATOR;
2505 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2509 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2510 PL_lex_state = LEX_INTERPCASEMOD;
2514 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2515 assert(PL_lex_inwhat != OP_TRANSR);
2516 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2517 PL_linestr = PL_lex_repl;
2519 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2520 PL_bufend += SvCUR(PL_linestr);
2521 PL_last_lop = PL_last_uni = NULL;
2522 SAVEFREESV(PL_linestr);
2523 PL_lex_dojoin = FALSE;
2524 PL_lex_brackets = 0;
2525 PL_lex_allbrackets = 0;
2526 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2527 PL_lex_casemods = 0;
2528 *PL_lex_casestack = '\0';
2530 if (SvEVALED(PL_lex_repl)) {
2531 PL_lex_state = LEX_INTERPNORMAL;
2533 /* we don't clear PL_lex_repl here, so that we can check later
2534 whether this is an evalled subst; that means we rely on the
2535 logic to ensure sublex_done() is called again only via the
2536 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2539 PL_lex_state = LEX_INTERPCONCAT;
2549 PL_endwhite = newSVpvs("");
2550 sv_catsv(PL_endwhite, PL_thiswhite);
2554 sv_setpvs(PL_thistoken,"");
2556 PL_realtokenstart = -1;
2560 PL_bufend = SvPVX(PL_linestr);
2561 PL_bufend += SvCUR(PL_linestr);
2562 PL_expect = XOPERATOR;
2563 PL_sublex_info.sub_inwhat = 0;
2571 Extracts a pattern, double-quoted string, or transliteration. This
2574 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2575 processing a pattern (PL_lex_inpat is true), a transliteration
2576 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2578 Returns a pointer to the character scanned up to. If this is
2579 advanced from the start pointer supplied (i.e. if anything was
2580 successfully parsed), will leave an OP for the substring scanned
2581 in pl_yylval. Caller must intuit reason for not parsing further
2582 by looking at the next characters herself.
2586 constants: \N{NAME} only
2587 case and quoting: \U \Q \E
2588 stops on @ and $, but not for $ as tail anchor
2590 In transliterations:
2591 characters are VERY literal, except for - not at the start or end
2592 of the string, which indicates a range. If the range is in bytes,
2593 scan_const expands the range to the full set of intermediate
2594 characters. If the range is in utf8, the hyphen is replaced with
2595 a certain range mark which will be handled by pmtrans() in op.c.
2597 In double-quoted strings:
2599 double-quoted style: \r and \n
2600 constants: \x31, etc.
2601 deprecated backrefs: \1 (in substitution replacements)
2602 case and quoting: \U \Q \E
2605 scan_const does *not* construct ops to handle interpolated strings.
2606 It stops processing as soon as it finds an embedded $ or @ variable
2607 and leaves it to the caller to work out what's going on.
2609 embedded arrays (whether in pattern or not) could be:
2610 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2612 $ in double-quoted strings must be the symbol of an embedded scalar.
2614 $ in pattern could be $foo or could be tail anchor. Assumption:
2615 it's a tail anchor if $ is the last thing in the string, or if it's
2616 followed by one of "()| \r\n\t"
2618 \1 (backreferences) are turned into $1
2620 The structure of the code is
2621 while (there's a character to process) {
2622 handle transliteration ranges
2623 skip regexp comments /(?#comment)/ and codes /(?{code})/
2624 skip #-initiated comments in //x patterns
2625 check for embedded arrays
2626 check for embedded scalars
2628 deprecate \1 in substitution replacements
2629 handle string-changing backslashes \l \U \Q \E, etc.
2630 switch (what was escaped) {
2631 handle \- in a transliteration (becomes a literal -)
2632 if a pattern and not \N{, go treat as regular character
2633 handle \132 (octal characters)
2634 handle \x15 and \x{1234} (hex characters)
2635 handle \N{name} (named characters, also \N{3,5} in a pattern)
2636 handle \cV (control characters)
2637 handle printf-style backslashes (\f, \r, \n, etc)
2640 } (end if backslash)
2641 handle regular character
2642 } (end while character to read)
2647 S_scan_const(pTHX_ char *start)
2650 register char *send = PL_bufend; /* end of the constant */
2651 SV *sv = newSV(send - start); /* sv for the constant. See
2652 note below on sizing. */
2653 register char *s = start; /* start of the constant */
2654 register char *d = SvPVX(sv); /* destination for copies */
2655 bool dorange = FALSE; /* are we in a translit range? */
2656 bool didrange = FALSE; /* did we just finish a range? */
2657 bool has_utf8 = FALSE; /* Output constant is UTF8 */
2658 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
2659 to be UTF8? But, this can
2660 show as true when the source
2661 isn't utf8, as for example
2662 when it is entirely composed
2665 /* Note on sizing: The scanned constant is placed into sv, which is
2666 * initialized by newSV() assuming one byte of output for every byte of
2667 * input. This routine expects newSV() to allocate an extra byte for a
2668 * trailing NUL, which this routine will append if it gets to the end of
2669 * the input. There may be more bytes of input than output (eg., \N{LATIN
2670 * CAPITAL LETTER A}), or more output than input if the constant ends up
2671 * recoded to utf8, but each time a construct is found that might increase
2672 * the needed size, SvGROW() is called. Its size parameter each time is
2673 * based on the best guess estimate at the time, namely the length used so
2674 * far, plus the length the current construct will occupy, plus room for
2675 * the trailing NUL, plus one byte for every input byte still unscanned */
2679 UV literal_endpoint = 0;
2680 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
2683 PERL_ARGS_ASSERT_SCAN_CONST;
2685 assert(PL_lex_inwhat != OP_TRANSR);
2686 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2687 /* If we are doing a trans and we know we want UTF8 set expectation */
2688 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2689 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2693 while (s < send || dorange) {
2695 /* get transliterations out of the way (they're most literal) */
2696 if (PL_lex_inwhat == OP_TRANS) {
2697 /* expand a range A-Z to the full set of characters. AIE! */
2699 I32 i; /* current expanded character */
2700 I32 min; /* first character in range */
2701 I32 max; /* last character in range */
2712 char * const c = (char*)utf8_hop((U8*)d, -1);
2716 *c = (char)UTF_TO_NATIVE(0xff);
2717 /* mark the range as done, and continue */
2723 i = d - SvPVX_const(sv); /* remember current offset */
2726 SvLEN(sv) + (has_utf8 ?
2727 (512 - UTF_CONTINUATION_MARK +
2730 /* How many two-byte within 0..255: 128 in UTF-8,
2731 * 96 in UTF-8-mod. */
2733 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
2735 d = SvPVX(sv) + i; /* refresh d after realloc */
2739 for (j = 0; j <= 1; j++) {
2740 char * const c = (char*)utf8_hop((U8*)d, -1);
2741 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2747 max = (U8)0xff; /* only to \xff */
2748 uvmax = uv; /* \x{100} to uvmax */
2750 d = c; /* eat endpoint chars */
2755 d -= 2; /* eat the first char and the - */
2756 min = (U8)*d; /* first char in range */
2757 max = (U8)d[1]; /* last char in range */
2764 "Invalid range \"%c-%c\" in transliteration operator",
2765 (char)min, (char)max);
2769 if (literal_endpoint == 2 &&
2770 ((isLOWER(min) && isLOWER(max)) ||
2771 (isUPPER(min) && isUPPER(max)))) {
2773 for (i = min; i <= max; i++)
2775 *d++ = NATIVE_TO_NEED(has_utf8,i);
2777 for (i = min; i <= max; i++)
2779 *d++ = NATIVE_TO_NEED(has_utf8,i);
2784 for (i = min; i <= max; i++)
2787 const U8 ch = (U8)NATIVE_TO_UTF(i);
2788 if (UNI_IS_INVARIANT(ch))
2791 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2792 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2801 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2803 *d++ = (char)UTF_TO_NATIVE(0xff);
2805 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2809 /* mark the range as done, and continue */
2813 literal_endpoint = 0;
2818 /* range begins (ignore - as first or last char) */
2819 else if (*s == '-' && s+1 < send && s != start) {
2821 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2828 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
2838 literal_endpoint = 0;
2839 native_range = TRUE;
2844 /* if we get here, we're not doing a transliteration */
2846 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2847 except for the last char, which will be done separately. */
2848 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
2850 while (s+1 < send && *s != ')')
2851 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2853 else if (s[2] == '{' /* This should match regcomp.c */
2854 || (s[2] == '?' && s[3] == '{'))
2857 char *regparse = s + (s[2] == '{' ? 3 : 4);
2860 while (count && (c = *regparse)) {
2861 if (c == '\\' && regparse[1])
2869 if (*regparse != ')')
2870 regparse--; /* Leave one char for continuation. */
2871 while (s < regparse)
2872 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2876 /* likewise skip #-initiated comments in //x patterns */
2877 else if (*s == '#' && PL_lex_inpat &&
2878 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
2879 while (s+1 < send && *s != '\n')
2880 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
2883 /* check for embedded arrays
2884 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
2886 else if (*s == '@' && s[1]) {
2887 if (isALNUM_lazy_if(s+1,UTF))
2889 if (strchr(":'{$", s[1]))
2891 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2892 break; /* in regexp, neither @+ nor @- are interpolated */
2895 /* check for embedded scalars. only stop if we're sure it's a
2898 else if (*s == '$') {
2899 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
2901 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
2903 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2904 "Possible unintended interpolation of $\\ in regex");
2906 break; /* in regexp, $ might be tail anchor */
2910 /* End of else if chain - OP_TRANS rejoin rest */
2913 if (*s == '\\' && s+1 < send) {
2914 char* e; /* Can be used for ending '}', etc. */
2918 /* warn on \1 - \9 in substitution replacements, but note that \11
2919 * is an octal; and \19 is \1 followed by '9' */
2920 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
2921 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
2923 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
2928 /* string-change backslash escapes */
2929 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
2933 /* In a pattern, process \N, but skip any other backslash escapes.
2934 * This is because we don't want to translate an escape sequence
2935 * into a meta symbol and have the regex compiler use the meta
2936 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
2937 * in spite of this, we do have to process \N here while the proper
2938 * charnames handler is in scope. See bugs #56444 and #62056.
2939 * There is a complication because \N in a pattern may also stand
2940 * for 'match a non-nl', and not mean a charname, in which case its
2941 * processing should be deferred to the regex compiler. To be a
2942 * charname it must be followed immediately by a '{', and not look
2943 * like \N followed by a curly quantifier, i.e., not something like
2944 * \N{3,}. regcurly returns a boolean indicating if it is a legal
2946 else if (PL_lex_inpat
2949 || regcurly(s + 1)))
2951 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2952 goto default_action;
2957 /* quoted - in transliterations */
2959 if (PL_lex_inwhat == OP_TRANS) {
2966 if ((isALPHA(*s) || isDIGIT(*s)))
2967 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2968 "Unrecognized escape \\%c passed through",
2970 /* default action is to copy the quoted character */
2971 goto default_action;
2974 /* eg. \132 indicates the octal constant 0132 */
2975 case '0': case '1': case '2': case '3':
2976 case '4': case '5': case '6': case '7':
2980 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
2983 goto NUM_ESCAPE_INSERT;
2985 /* eg. \o{24} indicates the octal constant \024 */
2991 bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
2997 goto NUM_ESCAPE_INSERT;
3000 /* eg. \x24 indicates the hex constant 0x24 */
3004 char* const e = strchr(s, '}');
3005 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
3006 PERL_SCAN_DISALLOW_PREFIX;
3011 yyerror("Missing right brace on \\x{}");
3015 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
3021 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
3022 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
3028 /* Insert oct or hex escaped character. There will always be
3029 * enough room in sv since such escapes will be longer than any
3030 * UTF-8 sequence they can end up as, except if they force us
3031 * to recode the rest of the string into utf8 */
3033 /* Here uv is the ordinal of the next character being added in
3034 * unicode (converted from native). */
3035 if (!UNI_IS_INVARIANT(uv)) {
3036 if (!has_utf8 && uv > 255) {
3037 /* Might need to recode whatever we have accumulated so
3038 * far if it contains any chars variant in utf8 or
3041 SvCUR_set(sv, d - SvPVX_const(sv));
3044 /* See Note on sizing above. */
3045 sv_utf8_upgrade_flags_grow(sv,
3046 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3047 UNISKIP(uv) + (STRLEN)(send - s) + 1);
3048 d = SvPVX(sv) + SvCUR(sv);
3053 d = (char*)uvuni_to_utf8((U8*)d, uv);
3054 if (PL_lex_inwhat == OP_TRANS &&
3055 PL_sublex_info.sub_op) {
3056 PL_sublex_info.sub_op->op_private |=
3057 (PL_lex_repl ? OPpTRANS_FROM_UTF
3061 if (uv > 255 && !dorange)
3062 native_range = FALSE;
3075 /* In a non-pattern \N must be a named character, like \N{LATIN
3076 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3077 * mean to match a non-newline. For non-patterns, named
3078 * characters are converted to their string equivalents. In
3079 * patterns, named characters are not converted to their
3080 * ultimate forms for the same reasons that other escapes
3081 * aren't. Instead, they are converted to the \N{U+...} form
3082 * to get the value from the charnames that is in effect right
3083 * now, while preserving the fact that it was a named character
3084 * so that the regex compiler knows this */
3086 /* This section of code doesn't generally use the
3087 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
3088 * a close examination of this macro and determined it is a
3089 * no-op except on utfebcdic variant characters. Every
3090 * character generated by this that would normally need to be
3091 * enclosed by this macro is invariant, so the macro is not
3092 * needed, and would complicate use of copy(). XXX There are
3093 * other parts of this file where the macro is used
3094 * inconsistently, but are saved by it being a no-op */
3096 /* The structure of this section of code (besides checking for
3097 * errors and upgrading to utf8) is:
3098 * Further disambiguate between the two meanings of \N, and if
3099 * not a charname, go process it elsewhere
3100 * If of form \N{U+...}, pass it through if a pattern;
3101 * otherwise convert to utf8
3102 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3103 * pattern; otherwise convert to utf8 */
3105 /* Here, s points to the 'N'; the test below is guaranteed to
3106 * succeed if we are being called on a pattern as we already
3107 * know from a test above that the next character is a '{'.
3108 * On a non-pattern \N must mean 'named sequence, which
3109 * requires braces */
3112 yyerror("Missing braces on \\N{}");
3117 /* If there is no matching '}', it is an error. */
3118 if (! (e = strchr(s, '}'))) {
3119 if (! PL_lex_inpat) {
3120 yyerror("Missing right brace on \\N{}");
3122 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
3127 /* Here it looks like a named character */
3131 /* XXX This block is temporary code. \N{} implies that the
3132 * pattern is to have Unicode semantics, and therefore
3133 * currently has to be encoded in utf8. By putting it in
3134 * utf8 now, we save a whole pass in the regular expression
3135 * compiler. Once that code is changed so Unicode
3136 * semantics doesn't necessarily have to be in utf8, this
3137 * block should be removed. However, the code that parses
3138 * the output of this would have to be changed to not
3139 * necessarily expect utf8 */
3141 SvCUR_set(sv, d - SvPVX_const(sv));
3144 /* See Note on sizing above. */
3145 sv_utf8_upgrade_flags_grow(sv,
3146 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3147 /* 5 = '\N{' + cur char + NUL */
3148 (STRLEN)(send - s) + 5);
3149 d = SvPVX(sv) + SvCUR(sv);
3154 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3155 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3156 | PERL_SCAN_DISALLOW_PREFIX;
3159 /* For \N{U+...}, the '...' is a unicode value even on
3160 * EBCDIC machines */
3161 s += 2; /* Skip to next char after the 'U+' */
3163 uv = grok_hex(s, &len, &flags, NULL);
3164 if (len == 0 || len != (STRLEN)(e - s)) {
3165 yyerror("Invalid hexadecimal number in \\N{U+...}");
3172 /* On non-EBCDIC platforms, pass through to the regex
3173 * compiler unchanged. The reason we evaluated the
3174 * number above is to make sure there wasn't a syntax
3175 * error. But on EBCDIC we convert to native so
3176 * downstream code can continue to assume it's native
3178 s -= 5; /* Include the '\N{U+' */
3180 d += my_snprintf(d, e - s + 1 + 1, /* includes the }
3183 (unsigned int) UNI_TO_NATIVE(uv));
3185 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3189 else { /* Not a pattern: convert the hex to string */
3191 /* If destination is not in utf8, unconditionally
3192 * recode it to be so. This is because \N{} implies
3193 * Unicode semantics, and scalars have to be in utf8
3194 * to guarantee those semantics */
3196 SvCUR_set(sv, d - SvPVX_const(sv));
3199 /* See Note on sizing above. */
3200 sv_utf8_upgrade_flags_grow(
3202 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3203 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3204 d = SvPVX(sv) + SvCUR(sv);
3208 /* Add the string to the output */
3209 if (UNI_IS_INVARIANT(uv)) {
3212 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3215 else { /* Here is \N{NAME} but not \N{U+...}. */
3217 SV *res; /* result from charnames */
3218 const char *str; /* the string in 'res' */
3219 STRLEN len; /* its length */
3221 /* Get the value for NAME */
3222 res = newSVpvn(s, e - s);
3223 res = new_constant( NULL, 0, "charnames",
3224 /* includes all of: \N{...} */
3225 res, NULL, s - 3, e - s + 4 );
3227 /* Most likely res will be in utf8 already since the
3228 * standard charnames uses pack U, but a custom translator
3229 * can leave it otherwise, so make sure. XXX This can be
3230 * revisited to not have charnames use utf8 for characters
3231 * that don't need it when regexes don't have to be in utf8
3232 * for Unicode semantics. If doing so, remember EBCDIC */
3233 sv_utf8_upgrade(res);
3234 str = SvPV_const(res, len);
3236 /* Don't accept malformed input */
3237 if (! is_utf8_string((U8 *) str, len)) {
3238 yyerror("Malformed UTF-8 returned by \\N");
3240 else if (PL_lex_inpat) {
3242 if (! len) { /* The name resolved to an empty string */
3243 Copy("\\N{}", d, 4, char);
3247 /* In order to not lose information for the regex
3248 * compiler, pass the result in the specially made
3249 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3250 * the code points in hex of each character
3251 * returned by charnames */
3253 const char *str_end = str + len;
3254 STRLEN char_length; /* cur char's byte length */
3255 STRLEN output_length; /* and the number of bytes
3256 after this is translated
3258 const STRLEN off = d - SvPVX_const(sv);
3260 /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3261 * max('U+', '.'); and 1 for NUL */
3262 char hex_string[2 * UTF8_MAXBYTES + 5];
3264 /* Get the first character of the result. */
3265 U32 uv = utf8n_to_uvuni((U8 *) str,
3270 /* The call to is_utf8_string() above hopefully
3271 * guarantees that there won't be an error. But
3272 * it's easy here to make sure. The function just
3273 * above warns and returns 0 if invalid utf8, but
3274 * it can also return 0 if the input is validly a
3275 * NUL. Disambiguate */
3276 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3277 uv = UNICODE_REPLACEMENT;
3280 /* Convert first code point to hex, including the
3281 * boiler plate before it. For all these, we
3282 * convert to native format so that downstream code
3283 * can continue to assume the input is native */
3285 my_snprintf(hex_string, sizeof(hex_string),
3287 (unsigned int) UNI_TO_NATIVE(uv));
3289 /* Make sure there is enough space to hold it */
3290 d = off + SvGROW(sv, off
3292 + (STRLEN)(send - e)
3293 + 2); /* '}' + NUL */
3295 Copy(hex_string, d, output_length, char);
3298 /* For each subsequent character, append dot and
3299 * its ordinal in hex */
3300 while ((str += char_length) < str_end) {
3301 const STRLEN off = d - SvPVX_const(sv);
3302 U32 uv = utf8n_to_uvuni((U8 *) str,
3306 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3307 uv = UNICODE_REPLACEMENT;
3311 my_snprintf(hex_string, sizeof(hex_string),
3313 (unsigned int) UNI_TO_NATIVE(uv));
3315 d = off + SvGROW(sv, off
3317 + (STRLEN)(send - e)
3318 + 2); /* '}' + NUL */
3319 Copy(hex_string, d, output_length, char);
3323 *d++ = '}'; /* Done. Add the trailing brace */
3326 else { /* Here, not in a pattern. Convert the name to a
3329 /* If destination is not in utf8, unconditionally
3330 * recode it to be so. This is because \N{} implies
3331 * Unicode semantics, and scalars have to be in utf8
3332 * to guarantee those semantics */
3334 SvCUR_set(sv, d - SvPVX_const(sv));
3337 /* See Note on sizing above. */
3338 sv_utf8_upgrade_flags_grow(sv,
3339 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3340 len + (STRLEN)(send - s) + 1);
3341 d = SvPVX(sv) + SvCUR(sv);
3343 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3345 /* See Note on sizing above. (NOTE: SvCUR() is not
3346 * set correctly here). */
3347 const STRLEN off = d - SvPVX_const(sv);
3348 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3350 Copy(str, d, len, char);
3355 /* Deprecate non-approved name syntax */
3356 if (ckWARN_d(WARN_DEPRECATED)) {
3357 bool problematic = FALSE;
3360 /* For non-ut8 input, look to see that the first
3361 * character is an alpha, then loop through the rest
3362 * checking that each is a continuation */
3364 if (! isALPHAU(*i)) problematic = TRUE;
3365 else for (i = s + 1; i < e; i++) {
3366 if (isCHARNAME_CONT(*i)) continue;
3372 /* Similarly for utf8. For invariants can check
3373 * directly. We accept anything above the latin1
3374 * range because it is immaterial to Perl if it is
3375 * correct or not, and is expensive to check. But
3376 * it is fairly easy in the latin1 range to convert
3377 * the variants into a single character and check
3379 if (UTF8_IS_INVARIANT(*i)) {
3380 if (! isALPHAU(*i)) problematic = TRUE;
3381 } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3382 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
3388 if (! problematic) for (i = s + UTF8SKIP(s);
3392 if (UTF8_IS_INVARIANT(*i)) {
3393 if (isCHARNAME_CONT(*i)) continue;
3394 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3396 } else if (isCHARNAME_CONT(
3398 TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
3407 /* The e-i passed to the final %.*s makes sure that
3408 * should the trailing NUL be missing that this
3409 * print won't run off the end of the string */
3410 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
3411 "Deprecated character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s",
3412 (int)(i - s + 1), s, (int)(e - i), i + 1);
3415 } /* End \N{NAME} */
3418 native_range = FALSE; /* \N{} is defined to be Unicode */
3420 s = e + 1; /* Point to just after the '}' */
3423 /* \c is a control character */
3427 *d++ = grok_bslash_c(*s++, has_utf8, 1);
3430 yyerror("Missing control char name in \\c");
3434 /* printf-style backslashes, formfeeds, newlines, etc */
3436 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
3439 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
3442 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
3445 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
3448 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
3451 *d++ = ASCII_TO_NEED(has_utf8,'\033');
3454 *d++ = ASCII_TO_NEED(has_utf8,'\007');
3460 } /* end if (backslash) */
3467 /* If we started with encoded form, or already know we want it,
3468 then encode the next character */
3469 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3473 /* One might think that it is wasted effort in the case of the
3474 * source being utf8 (this_utf8 == TRUE) to take the next character
3475 * in the source, convert it to an unsigned value, and then convert
3476 * it back again. But the source has not been validated here. The
3477 * routine that does the conversion checks for errors like
3480 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3481 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
3483 SvCUR_set(sv, d - SvPVX_const(sv));
3486 /* See Note on sizing above. */
3487 sv_utf8_upgrade_flags_grow(sv,
3488 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3489 need + (STRLEN)(send - s) + 1);
3490 d = SvPVX(sv) + SvCUR(sv);
3492 } else if (need > len) {
3493 /* encoded value larger than old, may need extra space (NOTE:
3494 * SvCUR() is not set correctly here). See Note on sizing
3496 const STRLEN off = d - SvPVX_const(sv);
3497 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3501 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3503 if (uv > 255 && !dorange)
3504 native_range = FALSE;
3508 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3510 } /* while loop to process each character */
3512 /* terminate the string and set up the sv */
3514 SvCUR_set(sv, d - SvPVX_const(sv));
3515 if (SvCUR(sv) >= SvLEN(sv))
3516 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
3519 if (PL_encoding && !has_utf8) {
3520 sv_recode_to_utf8(sv, PL_encoding);
3526 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3527 PL_sublex_info.sub_op->op_private |=
3528 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3532 /* shrink the sv if we allocated more than we used */
3533 if (SvCUR(sv) + 5 < SvLEN(sv)) {
3534 SvPV_shrink_to_cur(sv);
3537 /* return the substring (via pl_yylval) only if we parsed anything */
3538 if (s > PL_bufptr) {
3539 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3540 const char *const key = PL_lex_inpat ? "qr" : "q";
3541 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3545 if (PL_lex_inwhat == OP_TRANS) {
3548 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3556 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3559 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3566 * Returns TRUE if there's more to the expression (e.g., a subscript),
3569 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3571 * ->[ and ->{ return TRUE
3572 * { and [ outside a pattern are always subscripts, so return TRUE
3573 * if we're outside a pattern and it's not { or [, then return FALSE
3574 * if we're in a pattern and the first char is a {
3575 * {4,5} (any digits around the comma) returns FALSE
3576 * if we're in a pattern and the first char is a [
3578 * [SOMETHING] has a funky algorithm to decide whether it's a
3579 * character class or not. It has to deal with things like
3580 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3581 * anything else returns TRUE
3584 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3587 S_intuit_more(pTHX_ register char *s)
3591 PERL_ARGS_ASSERT_INTUIT_MORE;
3593 if (PL_lex_brackets)
3595 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3597 if (*s != '{' && *s != '[')
3602 /* In a pattern, so maybe we have {n,m}. */
3610 /* On the other hand, maybe we have a character class */
3613 if (*s == ']' || *s == '^')
3616 /* this is terrifying, and it works */
3617 int weight = 2; /* let's weigh the evidence */
3619 unsigned char un_char = 255, last_un_char;
3620 const char * const send = strchr(s,']');
3621 char tmpbuf[sizeof PL_tokenbuf * 4];
3623 if (!send) /* has to be an expression */
3626 Zero(seen,256,char);
3629 else if (isDIGIT(*s)) {
3631 if (isDIGIT(s[1]) && s[2] == ']')
3637 for (; s < send; s++) {
3638 last_un_char = un_char;
3639 un_char = (unsigned char)*s;
3644 weight -= seen[un_char] * 10;
3645 if (isALNUM_lazy_if(s+1,UTF)) {
3647 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
3648 len = (int)strlen(tmpbuf);
3649 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3650 UTF ? SVf_UTF8 : 0, SVt_PV))
3655 else if (*s == '$' && s[1] &&
3656 strchr("[#!%*<>()-=",s[1])) {
3657 if (/*{*/ strchr("])} =",s[2]))
3666 if (strchr("wds]",s[1]))
3668 else if (seen[(U8)'\''] || seen[(U8)'"'])
3670 else if (strchr("rnftbxcav",s[1]))
3672 else if (isDIGIT(s[1])) {
3674 while (s[1] && isDIGIT(s[1]))
3684 if (strchr("aA01! ",last_un_char))
3686 if (strchr("zZ79~",s[1]))
3688 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3689 weight -= 5; /* cope with negative subscript */
3692 if (!isALNUM(last_un_char)
3693 && !(last_un_char == '$' || last_un_char == '@'
3694 || last_un_char == '&')
3695 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
3700 if (keyword(tmpbuf, d - tmpbuf, 0))
3703 if (un_char == last_un_char + 1)
3705 weight -= seen[un_char];
3710 if (weight >= 0) /* probably a character class */
3720 * Does all the checking to disambiguate
3722 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3723 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3725 * First argument is the stuff after the first token, e.g. "bar".
3727 * Not a method if bar is a filehandle.
3728 * Not a method if foo is a subroutine prototyped to take a filehandle.
3729 * Not a method if it's really "Foo $bar"
3730 * Method if it's "foo $bar"
3731 * Not a method if it's really "print foo $bar"
3732 * Method if it's really "foo package::" (interpreted as package->foo)
3733 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3734 * Not a method if bar is a filehandle or package, but is quoted with
3739 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
3742 char *s = start + (*start == '$');
3743 char tmpbuf[sizeof PL_tokenbuf];
3750 PERL_ARGS_ASSERT_INTUIT_METHOD;
3753 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
3757 const char *proto = CvPROTO(cv);
3768 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
3769 /* start is the beginning of the possible filehandle/object,
3770 * and s is the end of it
3771 * tmpbuf is a copy of it
3774 if (*start == '$') {
3775 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3776 isUPPER(*PL_tokenbuf))
3779 len = start - SvPVX(PL_linestr);
3783 start = SvPVX(PL_linestr) + len;
3787 return *s == '(' ? FUNCMETH : METHOD;
3789 if (!keyword(tmpbuf, len, 0)) {
3790 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3794 soff = s - SvPVX(PL_linestr);
3798 indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
3799 if (indirgv && GvCVu(indirgv))
3801 /* filehandle or package name makes it a method */
3802 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
3804 soff = s - SvPVX(PL_linestr);
3807 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
3808 return 0; /* no assumptions -- "=>" quotes bareword */
3810 start_force(PL_curforce);
3811 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
3812 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
3813 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
3815 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
3816 ( UTF ? SVf_UTF8 : 0 )));
3821 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3823 return *s == '(' ? FUNCMETH : METHOD;
3829 /* Encoded script support. filter_add() effectively inserts a
3830 * 'pre-processing' function into the current source input stream.
3831 * Note that the filter function only applies to the current source file
3832 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3834 * The datasv parameter (which may be NULL) can be used to pass
3835 * private data to this instance of the filter. The filter function
3836 * can recover the SV using the FILTER_DATA macro and use it to
3837 * store private buffers and state information.
3839 * The supplied datasv parameter is upgraded to a PVIO type
3840 * and the IoDIRP/IoANY field is used to store the function pointer,
3841 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
3842 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3843 * private use must be set using malloc'd pointers.
3847 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
3856 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
3857 Perl_croak(aTHX_ "Source filters apply only to byte streams");
3859 if (!PL_rsfp_filters)
3860 PL_rsfp_filters = newAV();
3863 SvUPGRADE(datasv, SVt_PVIO);
3864 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
3865 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
3866 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
3867 FPTR2DPTR(void *, IoANY(datasv)),
3868 SvPV_nolen(datasv)));
3869 av_unshift(PL_rsfp_filters, 1);
3870 av_store(PL_rsfp_filters, 0, datasv) ;
3872 !PL_parser->filtered
3873 && PL_parser->lex_flags & LEX_EVALBYTES
3874 && PL_bufptr < PL_bufend
3876 const char *s = PL_bufptr;
3877 while (s < PL_bufend) {
3879 SV *linestr = PL_parser->linestr;
3880 char *buf = SvPVX(linestr);
3881 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
3882 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
3883 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
3884 STRLEN const linestart_pos = PL_parser->linestart - buf;
3885 STRLEN const last_uni_pos =
3886 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
3887 STRLEN const last_lop_pos =
3888 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
3889 av_push(PL_rsfp_filters, linestr);
3890 PL_parser->linestr =
3891 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
3892 buf = SvPVX(PL_parser->linestr);
3893 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
3894 PL_parser->bufptr = buf + bufptr_pos;
3895 PL_parser->oldbufptr = buf + oldbufptr_pos;
3896 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
3897 PL_parser->linestart = buf + linestart_pos;
3898 if (PL_parser->last_uni)
3899 PL_parser->last_uni = buf + last_uni_pos;
3900 if (PL_parser->last_lop)
3901 PL_parser->last_lop = buf + last_lop_pos;
3902 SvLEN(linestr) = SvCUR(linestr);
3903 SvCUR(linestr) = s-SvPVX(linestr);
3904 PL_parser->filtered = 1;
3914 /* Delete most recently added instance of this filter function. */
3916 Perl_filter_del(pTHX_ filter_t funcp)
3921 PERL_ARGS_ASSERT_FILTER_DEL;
3924 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3925 FPTR2DPTR(void*, funcp)));
3927 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
3929 /* if filter is on top of stack (usual case) just pop it off */
3930 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
3931 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3932 sv_free(av_pop(PL_rsfp_filters));
3936 /* we need to search for the correct entry and clear it */
3937 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
3941 /* Invoke the idxth filter function for the current rsfp. */
3942 /* maxlen 0 = read one text line */
3944 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
3949 /* This API is bad. It should have been using unsigned int for maxlen.
3950 Not sure if we want to change the API, but if not we should sanity
3951 check the value here. */
3952 unsigned int correct_length
3961 PERL_ARGS_ASSERT_FILTER_READ;
3963 if (!PL_parser || !PL_rsfp_filters)
3965 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
3966 /* Provide a default input filter to make life easy. */
3967 /* Note that we append to the line. This is handy. */
3968 DEBUG_P(PerlIO_printf(Perl_debug_log,
3969 "filter_read %d: from rsfp\n", idx));
3970 if (correct_length) {
3973 const int old_len = SvCUR(buf_sv);
3975 /* ensure buf_sv is large enough */
3976 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
3977 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3978 correct_length)) <= 0) {
3979 if (PerlIO_error(PL_rsfp))
3980 return -1; /* error */
3982 return 0 ; /* end of file */
3984 SvCUR_set(buf_sv, old_len + len) ;
3985 SvPVX(buf_sv)[old_len + len] = '\0';
3988 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3989 if (PerlIO_error(PL_rsfp))
3990 return -1; /* error */
3992 return 0 ; /* end of file */
3995 return SvCUR(buf_sv);
3997 /* Skip this filter slot if filter has been deleted */
3998 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
3999 DEBUG_P(PerlIO_printf(Perl_debug_log,
4000 "filter_read %d: skipped (filter deleted)\n",
4002 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4004 if (SvTYPE(datasv) != SVt_PVIO) {
4005 if (correct_length) {
4007 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4008 if (!remainder) return 0; /* eof */
4009 if (correct_length > remainder) correct_length = remainder;
4010 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4011 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4014 const char *s = SvEND(datasv);
4015 const char *send = SvPVX(datasv) + SvLEN(datasv);
4023 if (s == send) return 0; /* eof */
4024 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4025 SvCUR_set(datasv, s-SvPVX(datasv));
4027 return SvCUR(buf_sv);
4029 /* Get function pointer hidden within datasv */
4030 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4031 DEBUG_P(PerlIO_printf(Perl_debug_log,
4032 "filter_read %d: via function %p (%s)\n",
4033 idx, (void*)datasv, SvPV_nolen_const(datasv)));
4034 /* Call function. The function is expected to */
4035 /* call "FILTER_READ(idx+1, buf_sv)" first. */
4036 /* Return: <0:error, =0:eof, >0:not eof */
4037 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4041 S_filter_gets(pTHX_ register SV *sv, STRLEN append)
4045 PERL_ARGS_ASSERT_FILTER_GETS;
4047 #ifdef PERL_CR_FILTER
4048 if (!PL_rsfp_filters) {
4049 filter_add(S_cr_textfilter,NULL);
4052 if (PL_rsfp_filters) {
4054 SvCUR_set(sv, 0); /* start with empty line */
4055 if (FILTER_READ(0, sv, 0) > 0)
4056 return ( SvPVX(sv) ) ;
4061 return (sv_gets(sv, PL_rsfp, append));
4065 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4070 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4072 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4076 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4077 (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4079 return GvHV(gv); /* Foo:: */
4082 /* use constant CLASS => 'MyClass' */
4083 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4084 if (gv && GvCV(gv)) {
4085 SV * const sv = cv_const_sv(GvCV(gv));
4087 pkgname = SvPV_const(sv, len);
4090 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4094 * S_readpipe_override
4095 * Check whether readpipe() is overridden, and generates the appropriate
4096 * optree, provided sublex_start() is called afterwards.
4099 S_readpipe_override(pTHX)
4102 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
4103 pl_yylval.ival = OP_BACKTICK;
4105 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4107 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
4108 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
4109 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4111 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
4112 op_append_elem(OP_LIST,
4113 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4114 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4121 * The intent of this yylex wrapper is to minimize the changes to the
4122 * tokener when we aren't interested in collecting madprops. It remains
4123 * to be seen how successful this strategy will be...
4130 char *s = PL_bufptr;
4132 /* make sure PL_thiswhite is initialized */
4136 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
4137 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4138 return S_pending_ident(aTHX);
4140 /* previous token ate up our whitespace? */
4141 if (!PL_lasttoke && PL_nextwhite) {
4142 PL_thiswhite = PL_nextwhite;
4146 /* isolate the token, and figure out where it is without whitespace */
4147 PL_realtokenstart = -1;
4151 assert(PL_curforce < 0);
4153 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
4154 if (!PL_thistoken) {
4155 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4156 PL_thistoken = newSVpvs("");
4158 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4159 PL_thistoken = newSVpvn(tstart, s - tstart);
4162 if (PL_thismad) /* install head */
4163 CURMAD('X', PL_thistoken);
4166 /* last whitespace of a sublex? */
4167 if (optype == ')' && PL_endwhite) {
4168 CURMAD('X', PL_endwhite);
4173 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
4174 if (!PL_thiswhite && !PL_endwhite && !optype) {
4175 sv_free(PL_thistoken);
4180 /* put off final whitespace till peg */
4181 if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
4182 PL_nextwhite = PL_thiswhite;
4185 else if (PL_thisopen) {
4186 CURMAD('q', PL_thisopen);
4188 sv_free(PL_thistoken);
4192 /* Store actual token text as madprop X */
4193 CURMAD('X', PL_thistoken);
4197 /* add preceding whitespace as madprop _ */
4198 CURMAD('_', PL_thiswhite);
4202 /* add quoted material as madprop = */
4203 CURMAD('=', PL_thisstuff);
4207 /* add terminating quote as madprop Q */
4208 CURMAD('Q', PL_thisclose);
4212 /* special processing based on optype */
4216 /* opval doesn't need a TOKEN since it can already store mp */
4226 if (pl_yylval.opval)
4227 append_madprops(PL_thismad, pl_yylval.opval, 0);
4235 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4244 /* remember any fake bracket that lexer is about to discard */
4245 if (PL_lex_brackets == 1 &&
4246 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4249 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4252 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4253 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4256 break; /* don't bother looking for trailing comment */
4265 /* attach a trailing comment to its statement instead of next token */
4269 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4271 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4273 if (*s == '\n' || *s == '#') {
4274 while (s < PL_bufend && *s != '\n')
4278 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4279 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4296 /* Create new token struct. Note: opvals return early above. */
4297 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4304 S_tokenize_use(pTHX_ int is_use, char *s) {
4307 PERL_ARGS_ASSERT_TOKENIZE_USE;
4309 if (PL_expect != XSTATE)
4310 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4311 is_use ? "use" : "no"));
4313 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4314 s = force_version(s, TRUE);
4315 if (*s == ';' || *s == '}'
4316 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4317 start_force(PL_curforce);
4318 NEXTVAL_NEXTTOKE.opval = NULL;
4321 else if (*s == 'v') {
4322 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4323 s = force_version(s, FALSE);
4327 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4328 s = force_version(s, FALSE);
4330 pl_yylval.ival = is_use;
4334 static const char* const exp_name[] =
4335 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4336 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
4340 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4342 S_word_takes_any_delimeter(char *p, STRLEN len)
4344 return (len == 1 && strchr("msyq", p[0])) ||
4346 (p[0] == 't' && p[1] == 'r') ||
4347 (p[0] == 'q' && strchr("qwxr", p[1]))));
4353 Works out what to call the token just pulled out of the input
4354 stream. The yacc parser takes care of taking the ops we return and
4355 stitching them into a tree.
4361 if read an identifier
4362 if we're in a my declaration
4363 croak if they tried to say my($foo::bar)
4364 build the ops for a my() declaration
4365 if it's an access to a my() variable
4366 are we in a sort block?
4367 croak if my($a); $a <=> $b
4368 build ops for access to a my() variable
4369 if in a dq string, and they've said @foo and we can't find @foo
4371 build ops for a bareword
4372 if we already built the token before, use it.
4377 #pragma segment Perl_yylex
4383 register char *s = PL_bufptr;
4389 /* orig_keyword, gvp, and gv are initialized here because
4390 * jump to the label just_a_word_zero can bypass their
4391 * initialization later. */
4392 I32 orig_keyword = 0;
4397 SV* tmp = newSVpvs("");
4398 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4399 (IV)CopLINE(PL_curcop),
4400 lex_state_names[PL_lex_state],
4401 exp_name[PL_expect],
4402 pv_display(tmp, s, strlen(s), 0, 60));
4405 /* check if there's an identifier for us to look at */
4406 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
4407 return REPORT(S_pending_ident(aTHX));
4409 /* no identifier pending identification */
4411 switch (PL_lex_state) {
4413 case LEX_NORMAL: /* Some compilers will produce faster */
4414 case LEX_INTERPNORMAL: /* code if we comment these out. */
4418 /* when we've already built the next token, just pull it out of the queue */
4422 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4424 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4425 PL_nexttoke[PL_lasttoke].next_mad = 0;
4426 if (PL_thismad && PL_thismad->mad_key == '_') {
4427 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4428 PL_thismad->mad_val = 0;
4429 mad_free(PL_thismad);
4434 PL_lex_state = PL_lex_defer;
4435 PL_expect = PL_lex_expect;
4436 PL_lex_defer = LEX_NORMAL;
4437 if (!PL_nexttoke[PL_lasttoke].next_type)
4442 pl_yylval = PL_nextval[PL_nexttoke];
4444 PL_lex_state = PL_lex_defer;
4445 PL_expect = PL_lex_expect;
4446 PL_lex_defer = LEX_NORMAL;
4452 next_type = PL_nexttoke[PL_lasttoke].next_type;
4454 next_type = PL_nexttype[PL_nexttoke];
4456 if (next_type & (7<<24)) {
4457 if (next_type & (1<<24)) {
4458 if (PL_lex_brackets > 100)
4459 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4460 PL_lex_brackstack[PL_lex_brackets++] =
4461 (char) ((next_type >> 16) & 0xff);
4463 if (next_type & (2<<24))
4464 PL_lex_allbrackets++;
4465 if (next_type & (4<<24))
4466 PL_lex_allbrackets--;
4467 next_type &= 0xffff;
4470 /* FIXME - can these be merged? */
4473 return REPORT(next_type);
4477 /* interpolated case modifiers like \L \U, including \Q and \E.
4478 when we get here, PL_bufptr is at the \
4480 case LEX_INTERPCASEMOD:
4482 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4483 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
4485 /* handle \E or end of string */
4486 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4488 if (PL_lex_casemods) {
4489 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4490 PL_lex_casestack[PL_lex_casemods] = '\0';
4492 if (PL_bufptr != PL_bufend
4493 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
4495 PL_lex_state = LEX_INTERPCONCAT;
4498 PL_thistoken = newSVpvs("\\E");
4501 PL_lex_allbrackets--;
4505 while (PL_bufptr != PL_bufend &&
4506 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4508 PL_thiswhite = newSVpvs("");
4509 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4513 if (PL_bufptr != PL_bufend)
4516 PL_lex_state = LEX_INTERPCONCAT;
4520 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4521 "### Saw case modifier\n"); });
4523 if (s[1] == '\\' && s[2] == 'E') {
4526 PL_thiswhite = newSVpvs("");
4527 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4530 PL_lex_state = LEX_INTERPCONCAT;
4535 if (!PL_madskills) /* when just compiling don't need correct */
4536 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4537 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
4538 if ((*s == 'L' || *s == 'U') &&
4539 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4540 PL_lex_casestack[--PL_lex_casemods] = '\0';
4541 PL_lex_allbrackets--;
4544 if (PL_lex_casemods > 10)
4545 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4546 PL_lex_casestack[PL_lex_casemods++] = *s;
4547 PL_lex_casestack[PL_lex_casemods] = '\0';
4548 PL_lex_state = LEX_INTERPCONCAT;
4549 start_force(PL_curforce);
4550 NEXTVAL_NEXTTOKE.ival = 0;
4551 force_next((2<<24)|'(');
4552 start_force(PL_curforce);
4554 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4556 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4558 NEXTVAL_NEXTTOKE.ival = OP_LC;
4560 NEXTVAL_NEXTTOKE.ival = OP_UC;
4562 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4564 Perl_croak(aTHX_ "panic: yylex");
4566 SV* const tmpsv = newSVpvs("\\ ");
4567 /* replace the space with the character we want to escape
4569 SvPVX(tmpsv)[1] = *s;
4575 if (PL_lex_starts) {
4581 sv_free(PL_thistoken);
4582 PL_thistoken = newSVpvs("");
4585 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4586 if (PL_lex_casemods == 1 && PL_lex_inpat)
4595 case LEX_INTERPPUSH:
4596 return REPORT(sublex_push());
4598 case LEX_INTERPSTART:
4599 if (PL_bufptr == PL_bufend)
4600 return REPORT(sublex_done());
4601 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4602 "### Interpolated variable\n"); });
4604 PL_lex_dojoin = (*PL_bufptr == '@');
4605 PL_lex_state = LEX_INTERPNORMAL;
4606 if (PL_lex_dojoin) {
4607 start_force(PL_curforce);
4608 NEXTVAL_NEXTTOKE.ival = 0;
4610 start_force(PL_curforce);
4611 force_ident("\"", '$');
4612 start_force(PL_curforce);
4613 NEXTVAL_NEXTTOKE.ival = 0;
4615 start_force(PL_curforce);
4616 NEXTVAL_NEXTTOKE.ival = 0;
4617 force_next((2<<24)|'(');
4618 start_force(PL_curforce);
4619 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
4622 if (PL_lex_starts++) {
4627 sv_free(PL_thistoken);
4628 PL_thistoken = newSVpvs("");
4631 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4632 if (!PL_lex_casemods && PL_lex_inpat)
4639 case LEX_INTERPENDMAYBE:
4640 if (intuit_more(PL_bufptr)) {
4641 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
4647 if (PL_lex_dojoin) {
4648 PL_lex_dojoin = FALSE;
4649 PL_lex_state = LEX_INTERPCONCAT;
4653 sv_free(PL_thistoken);
4654 PL_thistoken = newSVpvs("");
4657 PL_lex_allbrackets--;
4660 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4661 && SvEVALED(PL_lex_repl))
4663 if (PL_bufptr != PL_bufend)
4664 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4668 case LEX_INTERPCONCAT:
4670 if (PL_lex_brackets)
4671 Perl_croak(aTHX_ "panic: INTERPCONCAT");
4673 if (PL_bufptr == PL_bufend)
4674 return REPORT(sublex_done());
4676 if (SvIVX(PL_linestr) == '\'') {
4677 SV *sv = newSVsv(PL_linestr);
4680 else if ( PL_hints & HINT_NEW_RE )
4681 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
4682 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4686 s = scan_const(PL_bufptr);
4688 PL_lex_state = LEX_INTERPCASEMOD;
4690 PL_lex_state = LEX_INTERPSTART;
4693 if (s != PL_bufptr) {
4694 start_force(PL_curforce);
4696 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4698 NEXTVAL_NEXTTOKE = pl_yylval;
4701 if (PL_lex_starts++) {
4705 sv_free(PL_thistoken);
4706 PL_thistoken = newSVpvs("");
4709 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4710 if (!PL_lex_casemods && PL_lex_inpat)
4723 PL_lex_state = LEX_NORMAL;
4724 s = scan_formline(PL_bufptr);
4725 if (!PL_lex_formbrack)
4731 PL_oldoldbufptr = PL_oldbufptr;
4737 sv_free(PL_thistoken);
4740 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
4744 if (isIDFIRST_lazy_if(s,UTF))
4747 unsigned char c = *s;
4748 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4749 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4750 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4755 Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4759 goto fake_eof; /* emulate EOF on ^D or ^Z */
4765 if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
4768 if (PL_lex_brackets &&
4769 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
4770 yyerror((const char *)
4772 ? "Format not terminated"
4773 : "Missing right curly or square bracket"));
4775 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4776 "### Tokener got EOF\n");
4780 if (s++ < PL_bufend)
4781 goto retry; /* ignore stray nulls */
4784 if (!PL_in_eval && !PL_preambled) {
4785 PL_preambled = TRUE;
4791 /* Generate a string of Perl code to load the debugger.
4792 * If PERL5DB is set, it will return the contents of that,
4793 * otherwise a compile-time require of perl5db.pl. */
4795 const char * const pdb = PerlEnv_getenv("PERL5DB");
4798 sv_setpv(PL_linestr, pdb);
4799 sv_catpvs(PL_linestr,";");
4801 SETERRNO(0,SS_NORMAL);
4802 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4805 sv_setpvs(PL_linestr,"");
4806 if (PL_preambleav) {
4807 SV **svp = AvARRAY(PL_preambleav);
4808 SV **const end = svp + AvFILLp(PL_preambleav);
4810 sv_catsv(PL_linestr, *svp);
4812 sv_catpvs(PL_linestr, ";");
4814 sv_free(MUTABLE_SV(PL_preambleav));
4815 PL_preambleav = NULL;
4818 sv_catpvs(PL_linestr,
4819 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4820 if (PL_minus_n || PL_minus_p) {
4821 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4823 sv_catpvs(PL_linestr,"chomp;");
4826 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4827 || *PL_splitstr == '"')
4828 && strchr(PL_splitstr + 1, *PL_splitstr))
4829 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4831 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4832 bytes can be used as quoting characters. :-) */
4833 const char *splits = PL_splitstr;
4834 sv_catpvs(PL_linestr, "our @F=split(q\0");
4837 if (*splits == '\\')
4838 sv_catpvn(PL_linestr, splits, 1);
4839 sv_catpvn(PL_linestr, splits, 1);
4840 } while (*splits++);
4841 /* This loop will embed the trailing NUL of
4842 PL_linestr as the last thing it does before
4844 sv_catpvs(PL_linestr, ");");
4848 sv_catpvs(PL_linestr,"our @F=split(' ');");
4851 sv_catpvs(PL_linestr, "\n");
4852 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4853 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4854 PL_last_lop = PL_last_uni = NULL;
4855 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
4856 update_debugger_info(PL_linestr, NULL, 0);
4861 bof = PL_rsfp ? TRUE : FALSE;
4864 fake_eof = LEX_FAKE_EOF;
4866 PL_bufptr = PL_bufend;
4867 CopLINE_inc(PL_curcop);
4868 if (!lex_next_chunk(fake_eof)) {
4869 CopLINE_dec(PL_curcop);
4871 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4873 CopLINE_dec(PL_curcop);
4876 PL_realtokenstart = -1;
4879 /* If it looks like the start of a BOM or raw UTF-16,
4880 * check if it in fact is. */
4881 if (bof && PL_rsfp &&
4886 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4887 bof = (offset == (Off_t)SvCUR(PL_linestr));
4888 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
4889 /* offset may include swallowed CR */
4891 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
4894 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4895 s = swallow_bom((U8*)s);
4898 if (PL_parser->in_pod) {
4899 /* Incest with pod. */
4902 sv_catsv(PL_thiswhite, PL_linestr);
4904 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4905 sv_setpvs(PL_linestr, "");
4906 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4907 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4908 PL_last_lop = PL_last_uni = NULL;
4909 PL_parser->in_pod = 0;
4912 if (PL_rsfp || PL_parser->filtered)
4914 } while (PL_parser->in_pod);
4915 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4916 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4917 PL_last_lop = PL_last_uni = NULL;
4918 if (CopLINE(PL_curcop) == 1) {
4919 while (s < PL_bufend && isSPACE(*s))
4921 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
4925 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
4929 if (*s == '#' && *(s+1) == '!')
4931 #ifdef ALTERNATE_SHEBANG
4933 static char const as[] = ALTERNATE_SHEBANG;
4934 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4935 d = s + (sizeof(as) - 1);
4937 #endif /* ALTERNATE_SHEBANG */
4946 while (*d && !isSPACE(*d))
4950 #ifdef ARG_ZERO_IS_SCRIPT
4951 if (ipathend > ipath) {
4953 * HP-UX (at least) sets argv[0] to the script name,
4954 * which makes $^X incorrect. And Digital UNIX and Linux,
4955 * at least, set argv[0] to the basename of the Perl
4956 * interpreter. So, having found "#!", we'll set it right.
4958 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4960 assert(SvPOK(x) || SvGMAGICAL(x));
4961 if (sv_eq(x, CopFILESV(PL_curcop))) {
4962 sv_setpvn(x, ipath, ipathend - ipath);
4968 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
4969 const char * const lstart = SvPV_const(x,llen);
4971 bstart += blen - llen;
4972 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4973 sv_setpvn(x, ipath, ipathend - ipath);
4978 TAINT_NOT; /* $^X is always tainted, but that's OK */
4980 #endif /* ARG_ZERO_IS_SCRIPT */
4985 d = instr(s,"perl -");
4987 d = instr(s,"perl");
4989 /* avoid getting into infinite loops when shebang
4990 * line contains "Perl" rather than "perl" */
4992 for (d = ipathend-4; d >= ipath; --d) {
4993 if ((*d == 'p' || *d == 'P')
4994 && !ibcmp(d, "perl", 4))
5004 #ifdef ALTERNATE_SHEBANG
5006 * If the ALTERNATE_SHEBANG on this system starts with a
5007 * character that can be part of a Perl expression, then if
5008 * we see it but not "perl", we're probably looking at the
5009 * start of Perl code, not a request to hand off to some
5010 * other interpreter. Similarly, if "perl" is there, but
5011 * not in the first 'word' of the line, we assume the line
5012 * contains the start of the Perl program.
5014 if (d && *s != '#') {
5015 const char *c = ipath;
5016 while (*c && !strchr("; \t\r\n\f\v#", *c))
5019 d = NULL; /* "perl" not in first word; ignore */
5021 *s = '#'; /* Don't try to parse shebang line */
5023 #endif /* ALTERNATE_SHEBANG */
5028 !instr(s,"indir") &&
5029 instr(PL_origargv[0],"perl"))
5036 while (s < PL_bufend && isSPACE(*s))
5038 if (s < PL_bufend) {
5039 Newx(newargv,PL_origargc+3,char*);
5041 while (s < PL_bufend && !isSPACE(*s))
5044 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5047 newargv = PL_origargv;
5050 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5052 Perl_croak(aTHX_ "Can't exec %s", ipath);
5055 while (*d && !isSPACE(*d))
5057 while (SPACE_OR_TAB(*d))
5061 const bool switches_done = PL_doswitches;
5062 const U32 oldpdb = PL_perldb;
5063 const bool oldn = PL_minus_n;
5064 const bool oldp = PL_minus_p;
5068 bool baduni = FALSE;
5070 const char *d2 = d1 + 1;
5071 if (parse_unicode_opts((const char **)&d2)
5075 if (baduni || *d1 == 'M' || *d1 == 'm') {
5076 const char * const m = d1;
5077 while (*d1 && !isSPACE(*d1))
5079 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5082 d1 = moreswitches(d1);
5084 if (PL_doswitches && !switches_done) {
5085 int argc = PL_origargc;
5086 char **argv = PL_origargv;
5089 } while (argc && argv[0][0] == '-' && argv[0][1]);
5090 init_argv_symbols(argc,argv);
5092 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5093 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5094 /* if we have already added "LINE: while (<>) {",
5095 we must not do it again */
5097 sv_setpvs(PL_linestr, "");
5098 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5099 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5100 PL_last_lop = PL_last_uni = NULL;
5101 PL_preambled = FALSE;
5102 if (PERLDB_LINE || PERLDB_SAVESRC)
5103 (void)gv_fetchfile(PL_origfilename);
5110 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5112 PL_lex_state = LEX_FORMLINE;
5117 #ifdef PERL_STRICT_CR
5118 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5120 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5122 case ' ': case '\t': case '\f': case 013:
5124 PL_realtokenstart = -1;
5126 PL_thiswhite = newSVpvs("");
5127 sv_catpvn(PL_thiswhite, s, 1);
5134 PL_realtokenstart = -1;
5138 if (PL_lex_state != LEX_NORMAL ||
5139 (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5140 if (*s == '#' && s == PL_linestart && PL_in_eval
5141 && !PL_rsfp && !PL_parser->filtered) {
5142 /* handle eval qq[#line 1 "foo"\n ...] */
5143 CopLINE_dec(PL_curcop);
5146 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5148 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5153 while (d < PL_bufend && *d != '\n')
5157 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5158 Perl_croak(aTHX_ "panic: input overflow");
5161 PL_thiswhite = newSVpvn(s, d - s);
5166 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5168 PL_lex_state = LEX_FORMLINE;
5174 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5175 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5178 TOKEN(PEG); /* make sure any #! line is accessible */
5183 /* if (PL_madskills && PL_lex_formbrack) { */
5185 while (d < PL_bufend && *d != '\n')
5189 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5190 Perl_croak(aTHX_ "panic: input overflow");
5191 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5193 PL_thiswhite = newSVpvs("");
5194 if (CopLINE(PL_curcop) == 1) {
5195 sv_setpvs(PL_thiswhite, "");
5198 sv_catpvn(PL_thiswhite, s, d - s);
5212 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
5220 while (s < PL_bufend && SPACE_OR_TAB(*s))
5223 if (strnEQ(s,"=>",2)) {
5224 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
5225 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5226 OPERATOR('-'); /* unary minus */
5228 PL_last_uni = PL_oldbufptr;
5230 case 'r': ftst = OP_FTEREAD; break;
5231 case 'w': ftst = OP_FTEWRITE; break;
5232 case 'x': ftst = OP_FTEEXEC; break;
5233 case 'o': ftst = OP_FTEOWNED; break;
5234 case 'R': ftst = OP_FTRREAD; break;
5235 case 'W': ftst = OP_FTRWRITE; break;
5236 case 'X': ftst = OP_FTREXEC; break;
5237 case 'O': ftst = OP_FTROWNED; break;
5238 case 'e': ftst = OP_FTIS; break;
5239 case 'z': ftst = OP_FTZERO; break;
5240 case 's': ftst = OP_FTSIZE; break;
5241 case 'f': ftst = OP_FTFILE; break;
5242 case 'd': ftst = OP_FTDIR; break;
5243 case 'l': ftst = OP_FTLINK; break;
5244 case 'p': ftst = OP_FTPIPE; break;
5245 case 'S': ftst = OP_FTSOCK; break;
5246 case 'u': ftst = OP_FTSUID; break;
5247 case 'g': ftst = OP_FTSGID; break;
5248 case 'k': ftst = OP_FTSVTX; break;
5249 case 'b': ftst = OP_FTBLK; break;
5250 case 'c': ftst = OP_FTCHR; break;
5251 case 't': ftst = OP_FTTTY; break;
5252 case 'T': ftst = OP_FTTEXT; break;
5253 case 'B': ftst = OP_FTBINARY; break;
5254 case 'M': case 'A': case 'C':
5255 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5257 case 'M': ftst = OP_FTMTIME; break;
5258 case 'A': ftst = OP_FTATIME; break;
5259 case 'C': ftst = OP_FTCTIME; break;
5267 PL_last_lop_op = (OPCODE)ftst;
5268 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5269 "### Saw file test %c\n", (int)tmp);
5274 /* Assume it was a minus followed by a one-letter named
5275 * subroutine call (or a -bareword), then. */
5276 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5277 "### '-%c' looked like a file test but was not\n",
5284 const char tmp = *s++;
5287 if (PL_expect == XOPERATOR)
5292 else if (*s == '>') {
5295 if (isIDFIRST_lazy_if(s,UTF)) {
5296 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5304 if (PL_expect == XOPERATOR) {
5305 if (*s == '=' && !PL_lex_allbrackets &&
5306 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5313 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5315 OPERATOR('-'); /* unary minus */
5321 const char tmp = *s++;
5324 if (PL_expect == XOPERATOR)
5329 if (PL_expect == XOPERATOR) {
5330 if (*s == '=' && !PL_lex_allbrackets &&
5331 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5338 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5345 if (PL_expect != XOPERATOR) {
5346 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5347 PL_expect = XOPERATOR;
5348 force_ident(PL_tokenbuf, '*');
5356 if (*s == '=' && !PL_lex_allbrackets &&
5357 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5363 if (*s == '=' && !PL_lex_allbrackets &&
5364 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5371 if (PL_expect == XOPERATOR) {
5372 if (s[1] == '=' && !PL_lex_allbrackets &&
5373 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5378 PL_tokenbuf[0] = '%';
5379 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5380 sizeof PL_tokenbuf - 1, FALSE);
5381 if (!PL_tokenbuf[1]) {
5384 PL_pending_ident = '%';
5388 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5389 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5394 if (PL_lex_brackets > 100)
5395 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5396 PL_lex_brackstack[PL_lex_brackets++] = 0;
5397 PL_lex_allbrackets++;
5399 const char tmp = *s++;
5404 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5406 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5414 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5421 goto just_a_word_zero_gv;
5424 switch (PL_expect) {
5430 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5432 PL_bufptr = s; /* update in case we back off */
5435 "Use of := for an empty attribute list is not allowed");
5442 PL_expect = XTERMBLOCK;
5445 stuffstart = s - SvPVX(PL_linestr) - 1;
5449 while (isIDFIRST_lazy_if(s,UTF)) {
5452 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5453 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5454 if (tmp < 0) tmp = -tmp;
5469 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5471 d = scan_str(d,TRUE,TRUE);
5473 /* MUST advance bufptr here to avoid bogus
5474 "at end of line" context messages from yyerror().
5476 PL_bufptr = s + len;
5477 yyerror("Unterminated attribute parameter in attribute list");
5481 return REPORT(0); /* EOF indicator */
5485 sv_catsv(sv, PL_lex_stuff);
5486 attrs = op_append_elem(OP_LIST, attrs,
5487 newSVOP(OP_CONST, 0, sv));
5488 SvREFCNT_dec(PL_lex_stuff);
5489 PL_lex_stuff = NULL;
5492 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5494 if (PL_in_my == KEY_our) {
5495 deprecate(":unique");
5498 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5501 /* NOTE: any CV attrs applied here need to be part of
5502 the CVf_BUILTIN_ATTRS define in cv.h! */
5503 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5505 CvLVALUE_on(PL_compcv);
5507 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5509 deprecate(":locked");
5511 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5513 CvMETHOD_on(PL_compcv);
5515 /* After we've set the flags, it could be argued that
5516 we don't need to do the attributes.pm-based setting
5517 process, and shouldn't bother appending recognized
5518 flags. To experiment with that, uncomment the
5519 following "else". (Note that's already been
5520 uncommented. That keeps the above-applied built-in
5521 attributes from being intercepted (and possibly
5522 rejected) by a package's attribute routines, but is
5523 justified by the performance win for the common case
5524 of applying only built-in attributes.) */
5526 attrs = op_append_elem(OP_LIST, attrs,
5527 newSVOP(OP_CONST, 0,
5531 if (*s == ':' && s[1] != ':')
5534 break; /* require real whitespace or :'s */
5535 /* XXX losing whitespace on sequential attributes here */
5539 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5540 if (*s != ';' && *s != '}' && *s != tmp
5541 && (tmp != '=' || *s != ')')) {
5542 const char q = ((*s == '\'') ? '"' : '\'');
5543 /* If here for an expression, and parsed no attrs, back
5545 if (tmp == '=' && !attrs) {
5549 /* MUST advance bufptr here to avoid bogus "at end of line"
5550 context messages from yyerror().
5553 yyerror( (const char *)
5555 ? Perl_form(aTHX_ "Invalid separator character "
5556 "%c%c%c in attribute list", q, *s, q)
5557 : "Unterminated attribute list" ) );
5565 start_force(PL_curforce);
5566 NEXTVAL_NEXTTOKE.opval = attrs;
5567 CURMAD('_', PL_nextwhite);
5572 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5573 (s - SvPVX(PL_linestr)) - stuffstart);
5578 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5582 PL_lex_allbrackets--;
5586 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5587 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
5591 PL_lex_allbrackets++;
5594 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5600 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5603 PL_lex_allbrackets--;
5609 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5612 if (PL_lex_brackets <= 0)
5613 yyerror("Unmatched right square bracket");
5616 PL_lex_allbrackets--;
5617 if (PL_lex_state == LEX_INTERPNORMAL) {
5618 if (PL_lex_brackets == 0) {
5619 if (*s == '-' && s[1] == '>')
5620 PL_lex_state = LEX_INTERPENDMAYBE;
5621 else if (*s != '[' && *s != '{')
5622 PL_lex_state = LEX_INTERPEND;
5629 if (PL_lex_brackets > 100) {
5630 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5632 switch (PL_expect) {
5634 if (PL_lex_formbrack) {
5638 if (PL_oldoldbufptr == PL_last_lop)
5639 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5641 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5642 PL_lex_allbrackets++;
5643 OPERATOR(HASHBRACK);
5645 while (s < PL_bufend && SPACE_OR_TAB(*s))
5648 PL_tokenbuf[0] = '\0';
5649 if (d < PL_bufend && *d == '-') {
5650 PL_tokenbuf[0] = '-';
5652 while (d < PL_bufend && SPACE_OR_TAB(*d))
5655 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5656 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5658 while (d < PL_bufend && SPACE_OR_TAB(*d))
5661 const char minus = (PL_tokenbuf[0] == '-');
5662 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5670 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5671 PL_lex_allbrackets++;
5676 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5677 PL_lex_allbrackets++;
5682 if (PL_oldoldbufptr == PL_last_lop)
5683 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5685 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5686 PL_lex_allbrackets++;
5689 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5691 /* This hack is to get the ${} in the message. */
5693 yyerror("syntax error");
5696 OPERATOR(HASHBRACK);
5698 /* This hack serves to disambiguate a pair of curlies
5699 * as being a block or an anon hash. Normally, expectation
5700 * determines that, but in cases where we're not in a
5701 * position to expect anything in particular (like inside
5702 * eval"") we have to resolve the ambiguity. This code
5703 * covers the case where the first term in the curlies is a
5704 * quoted string. Most other cases need to be explicitly
5705 * disambiguated by prepending a "+" before the opening
5706 * curly in order to force resolution as an anon hash.
5708 * XXX should probably propagate the outer expectation
5709 * into eval"" to rely less on this hack, but that could
5710 * potentially break current behavior of eval"".
5714 if (*s == '\'' || *s == '"' || *s == '`') {
5715 /* common case: get past first string, handling escapes */
5716 for (t++; t < PL_bufend && *t != *s;)
5717 if (*t++ == '\\' && (*t == '\\' || *t == *s))
5721 else if (*s == 'q') {
5724 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5727 /* skip q//-like construct */
5729 char open, close, term;
5732 while (t < PL_bufend && isSPACE(*t))
5734 /* check for q => */
5735 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5736 OPERATOR(HASHBRACK);
5740 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5744 for (t++; t < PL_bufend; t++) {
5745 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5747 else if (*t == open)
5751 for (t++; t < PL_bufend; t++) {
5752 if (*t == '\\' && t+1 < PL_bufend)
5754 else if (*t == close && --brackets <= 0)
5756 else if (*t == open)
5763 /* skip plain q word */
5764 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5767 else if (isALNUM_lazy_if(t,UTF)) {
5769 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5772 while (t < PL_bufend && isSPACE(*t))
5774 /* if comma follows first term, call it an anon hash */
5775 /* XXX it could be a comma expression with loop modifiers */
5776 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5777 || (*t == '=' && t[1] == '>')))
5778 OPERATOR(HASHBRACK);
5779 if (PL_expect == XREF)
5782 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5788 pl_yylval.ival = CopLINE(PL_curcop);
5789 if (isSPACE(*s) || *s == '#')
5790 PL_copline = NOLINE; /* invalidate current command line number */
5793 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5797 if (PL_lex_brackets <= 0)
5798 yyerror("Unmatched right curly bracket");
5800 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5801 PL_lex_allbrackets--;
5802 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
5803 PL_lex_formbrack = 0;
5804 if (PL_lex_state == LEX_INTERPNORMAL) {
5805 if (PL_lex_brackets == 0) {
5806 if (PL_expect & XFAKEBRACK) {
5807 PL_expect &= XENUMMASK;
5808 PL_lex_state = LEX_INTERPEND;
5813 PL_thiswhite = newSVpvs("");
5814 sv_catpvs(PL_thiswhite,"}");
5817 return yylex(); /* ignore fake brackets */
5819 if (*s == '-' && s[1] == '>')
5820 PL_lex_state = LEX_INTERPENDMAYBE;
5821 else if (*s != '[' && *s != '{')
5822 PL_lex_state = LEX_INTERPEND;
5825 if (PL_expect & XFAKEBRACK) {
5826 PL_expect &= XENUMMASK;
5828 return yylex(); /* ignore fake brackets */
5830 start_force(PL_curforce);
5832 curmad('X', newSVpvn(s-1,1));
5833 CURMAD('_', PL_thiswhite);
5838 PL_thistoken = newSVpvs("");
5844 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5845 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5852 if (PL_expect == XOPERATOR) {
5853 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5854 && isIDFIRST_lazy_if(s,UTF))
5856 CopLINE_dec(PL_curcop);
5857 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5858 CopLINE_inc(PL_curcop);
5860 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5861 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5868 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5870 PL_expect = XOPERATOR;
5871 force_ident(PL_tokenbuf, '&');
5875 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5881 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5882 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5889 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5890 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5898 const char tmp = *s++;
5900 if (!PL_lex_allbrackets &&
5901 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5908 if (!PL_lex_allbrackets &&
5909 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
5917 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5918 && strchr("+-*/%.^&|<",tmp))
5919 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5920 "Reversed %c= operator",(int)tmp);
5922 if (PL_expect == XSTATE && isALPHA(tmp) &&
5923 (s == PL_linestart+1 || s[-2] == '\n') )
5925 if (PL_in_eval && !PL_rsfp && !PL_parser->filtered) {
5930 if (strnEQ(s,"=cut",4)) {
5946 PL_thiswhite = newSVpvs("");
5947 sv_catpvn(PL_thiswhite, PL_linestart,
5948 PL_bufend - PL_linestart);
5952 PL_parser->in_pod = 1;
5956 if (PL_lex_brackets < PL_lex_formbrack) {
5958 #ifdef PERL_STRICT_CR
5959 while (SPACE_OR_TAB(*t))
5961 while (SPACE_OR_TAB(*t) || *t == '\r')
5964 if (*t == '\n' || *t == '#') {
5970 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5979 const char tmp = *s++;
5981 /* was this !=~ where !~ was meant?
5982 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5984 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5985 const char *t = s+1;
5987 while (t < PL_bufend && isSPACE(*t))
5990 if (*t == '/' || *t == '?' ||
5991 ((*t == 'm' || *t == 's' || *t == 'y')
5992 && !isALNUM(t[1])) ||
5993 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5994 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5995 "!=~ should be !~");
5997 if (!PL_lex_allbrackets &&
5998 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6010 if (PL_expect != XOPERATOR) {
6011 if (s[1] != '<' && !strchr(s,'>'))
6014 s = scan_heredoc(s);
6016 s = scan_inputsymbol(s);
6017 TERM(sublex_start());
6023 if (*s == '=' && !PL_lex_allbrackets &&
6024 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6028 SHop(OP_LEFT_SHIFT);
6033 if (!PL_lex_allbrackets &&
6034 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6041 if (!PL_lex_allbrackets &&
6042 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6050 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6058 const char tmp = *s++;
6060 if (*s == '=' && !PL_lex_allbrackets &&
6061 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6065 SHop(OP_RIGHT_SHIFT);
6067 else if (tmp == '=') {
6068 if (!PL_lex_allbrackets &&
6069 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6077 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6086 if (PL_expect == XOPERATOR) {
6087 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6088 return deprecate_commaless_var_list();
6092 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6093 PL_tokenbuf[0] = '@';
6094 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6095 sizeof PL_tokenbuf - 1, FALSE);
6096 if (PL_expect == XOPERATOR)
6097 no_op("Array length", s);
6098 if (!PL_tokenbuf[1])
6100 PL_expect = XOPERATOR;
6101 PL_pending_ident = '#';
6105 PL_tokenbuf[0] = '$';
6106 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6107 sizeof PL_tokenbuf - 1, FALSE);
6108 if (PL_expect == XOPERATOR)
6110 if (!PL_tokenbuf[1]) {
6112 yyerror("Final $ should be \\$ or $name");
6118 const char tmp = *s;
6119 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6122 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6123 && intuit_more(s)) {
6125 PL_tokenbuf[0] = '@';
6126 if (ckWARN(WARN_SYNTAX)) {
6129 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
6132 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6133 while (t < PL_bufend && *t != ']')
6135 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6136 "Multidimensional syntax %.*s not supported",
6137 (int)((t - PL_bufptr) + 1), PL_bufptr);
6141 else if (*s == '{') {
6143 PL_tokenbuf[0] = '%';
6144 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6145 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6147 char tmpbuf[sizeof PL_tokenbuf];
6150 } while (isSPACE(*t));
6151 if (isIDFIRST_lazy_if(t,UTF)) {
6153 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6157 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
6158 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6159 "You need to quote \"%s\"",
6166 PL_expect = XOPERATOR;
6167 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6168 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6169 if (!islop || PL_last_lop_op == OP_GREPSTART)
6170 PL_expect = XOPERATOR;
6171 else if (strchr("$@\"'`q", *s))
6172 PL_expect = XTERM; /* e.g. print $fh "foo" */
6173 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6174 PL_expect = XTERM; /* e.g. print $fh &sub */
6175 else if (isIDFIRST_lazy_if(s,UTF)) {
6176 char tmpbuf[sizeof PL_tokenbuf];
6178 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6179 if ((t2 = keyword(tmpbuf, len, 0))) {
6180 /* binary operators exclude handle interpretations */
6192 PL_expect = XTERM; /* e.g. print $fh length() */
6197 PL_expect = XTERM; /* e.g. print $fh subr() */
6200 else if (isDIGIT(*s))
6201 PL_expect = XTERM; /* e.g. print $fh 3 */
6202 else if (*s == '.' && isDIGIT(s[1]))
6203 PL_expect = XTERM; /* e.g. print $fh .3 */
6204 else if ((*s == '?' || *s == '-' || *s == '+')
6205 && !isSPACE(s[1]) && s[1] != '=')
6206 PL_expect = XTERM; /* e.g. print $fh -1 */
6207 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6209 PL_expect = XTERM; /* e.g. print $fh /.../
6210 XXX except DORDOR operator
6212 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6214 PL_expect = XTERM; /* print $fh <<"EOF" */
6217 PL_pending_ident = '$';
6221 if (PL_expect == XOPERATOR)
6223 PL_tokenbuf[0] = '@';
6224 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6225 if (!PL_tokenbuf[1]) {
6228 if (PL_lex_state == LEX_NORMAL)
6230 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6232 PL_tokenbuf[0] = '%';
6234 /* Warn about @ where they meant $. */
6235 if (*s == '[' || *s == '{') {
6236 if (ckWARN(WARN_SYNTAX)) {
6237 const char *t = s + 1;
6238 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
6240 if (*t == '}' || *t == ']') {
6242 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6243 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6244 "Scalar value %.*s better written as $%.*s",
6245 (int)(t-PL_bufptr), PL_bufptr,
6246 (int)(t-PL_bufptr-1), PL_bufptr+1);
6251 PL_pending_ident = '@';
6254 case '/': /* may be division, defined-or, or pattern */
6255 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6256 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6257 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6262 case '?': /* may either be conditional or pattern */
6263 if (PL_expect == XOPERATOR) {
6266 if (!PL_lex_allbrackets &&
6267 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6271 PL_lex_allbrackets++;
6277 /* A // operator. */
6278 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6279 (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6280 LEX_FAKEEOF_LOGIC)) {
6288 if (*s == '=' && !PL_lex_allbrackets &&
6289 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6298 /* Disable warning on "study /blah/" */
6299 if (PL_oldoldbufptr == PL_last_uni
6300 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6301 || memNE(PL_last_uni, "study", 5)
6302 || isALNUM_lazy_if(PL_last_uni+5,UTF)
6306 deprecate("?PATTERN? without explicit operator");
6307 s = scan_pat(s,OP_MATCH);
6308 TERM(sublex_start());
6312 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6313 #ifdef PERL_STRICT_CR
6316 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6318 && (s == PL_linestart || s[-1] == '\n') )
6320 PL_lex_formbrack = 0;
6324 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6328 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6331 if (!PL_lex_allbrackets &&
6332 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6339 pl_yylval.ival = OPf_SPECIAL;
6345 if (*s == '=' && !PL_lex_allbrackets &&
6346 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6353 case '0': case '1': case '2': case '3': case '4':
6354 case '5': case '6': case '7': case '8': case '9':
6355 s = scan_num(s, &pl_yylval);
6356 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6357 if (PL_expect == XOPERATOR)
6362 s = scan_str(s,!!PL_madskills,FALSE);
6363 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6364 if (PL_expect == XOPERATOR) {
6365 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6366 return deprecate_commaless_var_list();
6373 pl_yylval.ival = OP_CONST;
6374 TERM(sublex_start());
6377 s = scan_str(s,!!PL_madskills,FALSE);
6378 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6379 if (PL_expect == XOPERATOR) {
6380 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6381 return deprecate_commaless_var_list();
6388 pl_yylval.ival = OP_CONST;
6389 /* FIXME. I think that this can be const if char *d is replaced by
6390 more localised variables. */
6391 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6392 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6393 pl_yylval.ival = OP_STRINGIFY;
6397 TERM(sublex_start());
6400 s = scan_str(s,!!PL_madskills,FALSE);
6401 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6402 if (PL_expect == XOPERATOR)
6403 no_op("Backticks",s);
6406 readpipe_override();
6407 TERM(sublex_start());
6411 if (PL_lex_inwhat && isDIGIT(*s))
6412 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6414 if (PL_expect == XOPERATOR)
6415 no_op("Backslash",s);
6419 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6420 char *start = s + 2;
6421 while (isDIGIT(*start) || *start == '_')
6423 if (*start == '.' && isDIGIT(start[1])) {
6424 s = scan_num(s, &pl_yylval);
6427 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6428 else if (!isALPHA(*start) && (PL_expect == XTERM
6429 || PL_expect == XREF || PL_expect == XSTATE
6430 || PL_expect == XTERMORDORDOR)) {
6431 GV *const gv = gv_fetchpvn_flags(s, start - s,
6432 UTF ? SVf_UTF8 : 0, SVt_PVCV);
6434 s = scan_num(s, &pl_yylval);
6441 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6484 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6486 /* Some keywords can be followed by any delimiter, including ':' */
6487 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6489 /* x::* is just a word, unless x is "CORE" */
6490 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
6494 while (d < PL_bufend && isSPACE(*d))
6495 d++; /* no comments skipped here, or s### is misparsed */
6497 /* Is this a word before a => operator? */
6498 if (*d == '=' && d[1] == '>') {
6501 = (OP*)newSVOP(OP_CONST, 0,
6502 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6503 pl_yylval.opval->op_private = OPpCONST_BARE;
6507 /* Check for plugged-in keyword */
6511 char *saved_bufptr = PL_bufptr;
6513 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6515 if (result == KEYWORD_PLUGIN_DECLINE) {
6516 /* not a plugged-in keyword */
6517 PL_bufptr = saved_bufptr;
6518 } else if (result == KEYWORD_PLUGIN_STMT) {
6519 pl_yylval.opval = o;
6522 return REPORT(PLUGSTMT);
6523 } else if (result == KEYWORD_PLUGIN_EXPR) {
6524 pl_yylval.opval = o;
6526 PL_expect = XOPERATOR;
6527 return REPORT(PLUGEXPR);
6529 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6534 /* Check for built-in keyword */
6535 tmp = keyword(PL_tokenbuf, len, 0);
6537 /* Is this a label? */
6538 if (!anydelim && PL_expect == XSTATE
6539 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6541 pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6546 if (tmp < 0) { /* second-class keyword? */
6547 GV *ogv = NULL; /* override (winner) */
6548 GV *hgv = NULL; /* hidden (loser) */
6549 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6551 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6552 UTF ? SVf_UTF8 : 0, SVt_PVCV)) &&
6555 if (GvIMPORTED_CV(gv))
6557 else if (! CvMETHOD(cv))
6561 (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
6562 UTF ? -(I32)len : (I32)len, FALSE)) &&
6563 (gv = *gvp) && isGV_with_GP(gv) &&
6564 GvCVu(gv) && GvIMPORTED_CV(gv))
6571 tmp = 0; /* overridden by import or by GLOBAL */
6574 && -tmp==KEY_lock /* XXX generalizable kludge */
6577 tmp = 0; /* any sub overrides "weak" keyword */
6579 else { /* no override */
6581 if (tmp == KEY_dump) {
6582 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6583 "dump() better written as CORE::dump()");
6587 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6588 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6589 "Ambiguous call resolved as CORE::%s(), "
6590 "qualify as such or use &",
6598 default: /* not a keyword */
6599 /* Trade off - by using this evil construction we can pull the
6600 variable gv into the block labelled keylookup. If not, then
6601 we have to give it function scope so that the goto from the
6602 earlier ':' case doesn't bypass the initialisation. */
6604 just_a_word_zero_gv:
6612 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6616 SV *nextPL_nextwhite = 0;
6620 /* Get the rest if it looks like a package qualifier */
6622 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6624 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6627 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
6628 *s == '\'' ? "'" : "::");
6633 if (PL_expect == XOPERATOR) {
6634 if (PL_bufptr == PL_linestart) {
6635 CopLINE_dec(PL_curcop);
6636 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6637 CopLINE_inc(PL_curcop);
6640 no_op("Bareword",s);
6643 /* Look for a subroutine with this name in current package,
6644 unless name is "Foo::", in which case Foo is a bareword
6645 (and a package name). */
6647 if (len > 2 && !PL_madskills &&
6648 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
6650 if (ckWARN(WARN_BAREWORD)
6651 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
6652 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6653 "Bareword \"%s\" refers to nonexistent package",
6656 PL_tokenbuf[len] = '\0';
6662 /* Mustn't actually add anything to a symbol table.
6663 But also don't want to "initialise" any placeholder
6664 constants that might already be there into full
6665 blown PVGVs with attached PVCV. */
6666 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6667 GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
6673 /* if we saw a global override before, get the right name */
6675 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6676 len ? len : strlen(PL_tokenbuf));
6678 SV * const tmp_sv = sv;
6679 sv = newSVpvs("CORE::GLOBAL::");
6680 sv_catsv(sv, tmp_sv);
6681 SvREFCNT_dec(tmp_sv);
6685 if (PL_madskills && !PL_thistoken) {
6686 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
6687 PL_thistoken = newSVpvn(start,s - start);
6688 PL_realtokenstart = s - SvPVX(PL_linestr);
6692 /* Presume this is going to be a bareword of some sort. */
6694 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6695 pl_yylval.opval->op_private = OPpCONST_BARE;
6697 /* And if "Foo::", then that's what it certainly is. */
6702 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
6703 const_op->op_private = OPpCONST_BARE;
6704 rv2cv_op = newCVREF(0, const_op);
6706 cv = rv2cv_op_cv(rv2cv_op, 0);
6708 /* See if it's the indirect object for a list operator. */
6710 if (PL_oldoldbufptr &&
6711 PL_oldoldbufptr < PL_bufptr &&
6712 (PL_oldoldbufptr == PL_last_lop
6713 || PL_oldoldbufptr == PL_last_uni) &&
6714 /* NO SKIPSPACE BEFORE HERE! */
6715 (PL_expect == XREF ||
6716 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
6718 bool immediate_paren = *s == '(';
6720 /* (Now we can afford to cross potential line boundary.) */
6721 s = SKIPSPACE2(s,nextPL_nextwhite);
6723 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
6726 /* Two barewords in a row may indicate method call. */
6728 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
6729 (tmp = intuit_method(s, gv, cv))) {
6731 if (tmp == METHOD && !PL_lex_allbrackets &&
6732 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6733 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6737 /* If not a declared subroutine, it's an indirect object. */
6738 /* (But it's an indir obj regardless for sort.) */
6739 /* Also, if "_" follows a filetest operator, it's a bareword */
6742 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
6744 (PL_last_lop_op != OP_MAPSTART &&
6745 PL_last_lop_op != OP_GREPSTART))))
6746 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6747 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6750 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6755 PL_expect = XOPERATOR;
6758 s = SKIPSPACE2(s,nextPL_nextwhite);
6759 PL_nextwhite = nextPL_nextwhite;
6764 /* Is this a word before a => operator? */
6765 if (*s == '=' && s[1] == '>' && !pkgname) {
6768 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
6769 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6770 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
6774 /* If followed by a paren, it's certainly a subroutine. */
6779 while (SPACE_OR_TAB(*d))
6781 if (*d == ')' && (sv = cv_const_sv(cv))) {
6788 PL_nextwhite = PL_thiswhite;
6791 start_force(PL_curforce);
6793 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6794 PL_expect = XOPERATOR;
6797 PL_nextwhite = nextPL_nextwhite;
6798 curmad('X', PL_thistoken);
6799 PL_thistoken = newSVpvs("");
6808 /* If followed by var or block, call it a method (unless sub) */
6810 if ((*s == '$' || *s == '{') && !cv) {
6812 PL_last_lop = PL_oldbufptr;
6813 PL_last_lop_op = OP_METHOD;
6814 if (!PL_lex_allbrackets &&
6815 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6816 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6820 /* If followed by a bareword, see if it looks like indir obj. */
6823 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
6824 && (tmp = intuit_method(s, gv, cv))) {
6826 if (tmp == METHOD && !PL_lex_allbrackets &&
6827 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6828 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6832 /* Not a method, so call it a subroutine (if defined) */
6835 if (lastchar == '-')
6836 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6837 "Ambiguous use of -%s resolved as -&%s()",
6838 PL_tokenbuf, PL_tokenbuf);
6839 /* Check for a constant sub */
6840 if ((sv = cv_const_sv(cv))) {
6843 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6844 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6845 pl_yylval.opval->op_private = 0;
6846 pl_yylval.opval->op_flags |= OPf_SPECIAL;
6850 op_free(pl_yylval.opval);
6851 pl_yylval.opval = rv2cv_op;
6852 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6853 PL_last_lop = PL_oldbufptr;
6854 PL_last_lop_op = OP_ENTERSUB;
6855 /* Is there a prototype? */
6862 STRLEN protolen = CvPROTOLEN(cv);
6863 const char *proto = CvPROTO(cv);
6867 if ((optional = *proto == ';'))
6870 while (*proto == ';');
6874 *proto == '$' || *proto == '_'
6875 || *proto == '*' || *proto == '+'
6880 *proto == '\\' && proto[1] && proto[2] == '\0'
6883 UNIPROTO(UNIOPSUB,optional);
6884 if (*proto == '\\' && proto[1] == '[') {
6885 const char *p = proto + 2;
6886 while(*p && *p != ']')
6888 if(*p == ']' && !p[1])
6889 UNIPROTO(UNIOPSUB,optional);
6891 if (*proto == '&' && *s == '{') {
6893 sv_setpvs(PL_subname, "__ANON__");
6895 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6896 if (!PL_lex_allbrackets &&
6897 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6898 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6905 PL_nextwhite = PL_thiswhite;
6908 start_force(PL_curforce);
6909 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6912 PL_nextwhite = nextPL_nextwhite;
6913 curmad('X', PL_thistoken);
6914 PL_thistoken = newSVpvs("");
6917 if (!PL_lex_allbrackets &&
6918 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6919 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6924 /* Guess harder when madskills require "best effort". */
6925 if (PL_madskills && (!gv || !GvCVu(gv))) {
6926 int probable_sub = 0;
6927 if (strchr("\"'`$@%0123456789!*+{[<", *s))
6929 else if (isALPHA(*s)) {
6933 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
6934 if (!keyword(tmpbuf, tmplen, 0))
6937 while (d < PL_bufend && isSPACE(*d))
6939 if (*d == '=' && d[1] == '>')
6944 gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
6946 op_free(pl_yylval.opval);
6947 pl_yylval.opval = rv2cv_op;
6948 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
6949 PL_last_lop = PL_oldbufptr;
6950 PL_last_lop_op = OP_ENTERSUB;
6951 PL_nextwhite = PL_thiswhite;
6953 start_force(PL_curforce);
6954 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6956 PL_nextwhite = nextPL_nextwhite;
6957 curmad('X', PL_thistoken);
6958 PL_thistoken = newSVpvs("");
6960 if (!PL_lex_allbrackets &&
6961 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6962 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6966 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
6969 if (!PL_lex_allbrackets &&
6970 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6971 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6976 /* Call it a bare word */
6978 if (PL_hints & HINT_STRICT_SUBS)
6979 pl_yylval.opval->op_private |= OPpCONST_STRICT;
6982 /* after "print" and similar functions (corresponding to
6983 * "F? L" in opcode.pl), whatever wasn't already parsed as
6984 * a filehandle should be subject to "strict subs".
6985 * Likewise for the optional indirect-object argument to system
6986 * or exec, which can't be a bareword */
6987 if ((PL_last_lop_op == OP_PRINT
6988 || PL_last_lop_op == OP_PRTF
6989 || PL_last_lop_op == OP_SAY
6990 || PL_last_lop_op == OP_SYSTEM
6991 || PL_last_lop_op == OP_EXEC)
6992 && (PL_hints & HINT_STRICT_SUBS))
6993 pl_yylval.opval->op_private |= OPpCONST_STRICT;
6994 if (lastchar != '-') {
6995 if (ckWARN(WARN_RESERVED)) {
6999 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7000 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7008 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
7009 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7010 "Operator or semicolon missing before %c%s",
7011 lastchar, PL_tokenbuf);
7012 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7013 "Ambiguous use of %c resolved as operator %c",
7014 lastchar, lastchar);
7021 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7026 (OP*)newSVOP(OP_CONST, 0,
7027 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7030 case KEY___PACKAGE__:
7032 (OP*)newSVOP(OP_CONST, 0,
7034 ? newSVhek(HvNAME_HEK(PL_curstash))
7041 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7042 const char *pname = "main";
7045 if (PL_tokenbuf[2] == 'D')
7048 PL_curstash ? PL_curstash : PL_defstash;
7049 pname = HvNAME_get(stash);
7050 plen = HvNAMELEN (stash);
7051 if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
7053 gv = gv_fetchpvn_flags(
7054 Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
7055 plen+6, GV_ADD|putf8, SVt_PVIO
7059 GvIOp(gv) = newIO();
7060 IoIFP(GvIOp(gv)) = PL_rsfp;
7061 #if defined(HAS_FCNTL) && defined(F_SETFD)
7063 const int fd = PerlIO_fileno(PL_rsfp);
7064 fcntl(fd,F_SETFD,fd >= 3);
7067 /* Mark this internal pseudo-handle as clean */
7068 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7069 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7070 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7072 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7073 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7074 /* if the script was opened in binmode, we need to revert
7075 * it to text mode for compatibility; but only iff it has CRs
7076 * XXX this is a questionable hack at best. */
7077 if (PL_bufend-PL_bufptr > 2
7078 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7081 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7082 loc = PerlIO_tell(PL_rsfp);
7083 (void)PerlIO_seek(PL_rsfp, 0L, 0);
7086 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7088 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7089 #endif /* NETWARE */
7091 PerlIO_seek(PL_rsfp, loc, 0);
7095 #ifdef PERLIO_LAYERS
7098 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7099 else if (PL_encoding) {
7106 XPUSHs(PL_encoding);
7108 call_method("name", G_SCALAR);
7112 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7113 Perl_form(aTHX_ ":encoding(%"SVf")",
7122 if (PL_realtokenstart >= 0) {
7123 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7125 PL_endwhite = newSVpvs("");
7126 sv_catsv(PL_endwhite, PL_thiswhite);
7128 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7129 PL_realtokenstart = -1;
7131 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7141 FUN0OP(newPVOP(OP_RUNCV,0,NULL));
7150 if (PL_expect == XSTATE) {
7157 if (*s == ':' && s[1] == ':') {
7160 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7161 if (!(tmp = keyword(PL_tokenbuf, len, 1)))
7162 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
7165 else if (tmp == KEY_require || tmp == KEY_do
7167 /* that's a way to remember we saw "CORE::" */
7180 LOP(OP_ACCEPT,XTERM);
7183 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7188 LOP(OP_ATAN2,XTERM);
7194 LOP(OP_BINMODE,XTERM);
7197 LOP(OP_BLESS,XTERM);
7206 /* We have to disambiguate the two senses of
7207 "continue". If the next token is a '{' then
7208 treat it as the start of a continue block;
7209 otherwise treat it as a control operator.
7219 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7229 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7238 if (!PL_cryptseen) {
7239 PL_cryptseen = TRUE;
7243 LOP(OP_CRYPT,XTERM);
7246 LOP(OP_CHMOD,XTERM);
7249 LOP(OP_CHOWN,XTERM);
7252 LOP(OP_CONNECT,XTERM);
7271 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7272 if (orig_keyword == KEY_do) {
7281 PL_hints |= HINT_BLOCK_SCOPE;
7291 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7292 STR_WITH_LEN("NDBM_File::"),
7293 STR_WITH_LEN("DB_File::"),
7294 STR_WITH_LEN("GDBM_File::"),
7295 STR_WITH_LEN("SDBM_File::"),
7296 STR_WITH_LEN("ODBM_File::"),
7298 LOP(OP_DBMOPEN,XTERM);
7304 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7311 pl_yylval.ival = CopLINE(PL_curcop);
7315 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7329 if (*s == '{') { /* block eval */
7330 PL_expect = XTERMBLOCK;
7331 UNIBRACK(OP_ENTERTRY);
7333 else { /* string eval */
7335 UNIBRACK(OP_ENTEREVAL);
7340 UNIBRACK(-OP_ENTEREVAL);
7354 case KEY_endhostent:
7360 case KEY_endservent:
7363 case KEY_endprotoent:
7374 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7376 pl_yylval.ival = CopLINE(PL_curcop);
7378 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7381 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7384 if ((PL_bufend - p) >= 3 &&
7385 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7387 else if ((PL_bufend - p) >= 4 &&
7388 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7391 if (isIDFIRST_lazy_if(p,UTF)) {
7392 p = scan_ident(p, PL_bufend,
7393 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
7397 Perl_croak(aTHX_ "Missing $ on loop variable");
7399 s = SvPVX(PL_linestr) + soff;
7405 LOP(OP_FORMLINE,XTERM);
7411 LOP(OP_FCNTL,XTERM);
7417 LOP(OP_FLOCK,XTERM);
7420 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7425 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7430 LOP(OP_GREPSTART, XREF);
7433 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7448 case KEY_getpriority:
7449 LOP(OP_GETPRIORITY,XTERM);
7451 case KEY_getprotobyname:
7454 case KEY_getprotobynumber:
7455 LOP(OP_GPBYNUMBER,XTERM);
7457 case KEY_getprotoent:
7469 case KEY_getpeername:
7470 UNI(OP_GETPEERNAME);
7472 case KEY_gethostbyname:
7475 case KEY_gethostbyaddr:
7476 LOP(OP_GHBYADDR,XTERM);
7478 case KEY_gethostent:
7481 case KEY_getnetbyname:
7484 case KEY_getnetbyaddr:
7485 LOP(OP_GNBYADDR,XTERM);
7490 case KEY_getservbyname:
7491 LOP(OP_GSBYNAME,XTERM);
7493 case KEY_getservbyport:
7494 LOP(OP_GSBYPORT,XTERM);
7496 case KEY_getservent:
7499 case KEY_getsockname:
7500 UNI(OP_GETSOCKNAME);
7502 case KEY_getsockopt:
7503 LOP(OP_GSOCKOPT,XTERM);
7518 pl_yylval.ival = CopLINE(PL_curcop);
7523 orig_keyword==KEY_glob ? (orig_keyword=0, -OP_GLOB) : OP_GLOB,
7531 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7533 pl_yylval.ival = CopLINE(PL_curcop);
7537 LOP(OP_INDEX,XTERM);
7543 LOP(OP_IOCTL,XTERM);
7555 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7572 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7577 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7591 LOP(OP_LISTEN,XTERM);
7600 s = scan_pat(s,OP_MATCH);
7601 TERM(sublex_start());
7604 LOP(OP_MAPSTART, XREF);
7607 LOP(OP_MKDIR,XTERM);
7610 LOP(OP_MSGCTL,XTERM);
7613 LOP(OP_MSGGET,XTERM);
7616 LOP(OP_MSGRCV,XTERM);
7619 LOP(OP_MSGSND,XTERM);
7624 PL_in_my = (U16)tmp;
7626 if (isIDFIRST_lazy_if(s,UTF)) {
7630 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7631 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7633 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7634 if (!PL_in_my_stash) {
7637 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7641 if (PL_madskills) { /* just add type to declarator token */
7642 sv_catsv(PL_thistoken, PL_nextwhite);
7644 sv_catpvn(PL_thistoken, start, s - start);
7652 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7656 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7661 s = tokenize_use(0, s);
7665 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
7668 if (!PL_lex_allbrackets &&
7669 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7670 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7676 if (isIDFIRST_lazy_if(s,UTF)) {
7678 for (d = s; isALNUM_lazy_if(d,UTF);)
7680 for (t=d; isSPACE(*t);)
7682 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7684 && !(t[0] == '=' && t[1] == '>')
7685 && !keyword(s, d-s, 0)
7687 int parms_len = (int)(d-s);
7688 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7689 "Precedence problem: open %.*s should be open(%.*s)",
7690 parms_len, s, parms_len, s);
7696 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7698 pl_yylval.ival = OP_OR;
7708 LOP(OP_OPEN_DIR,XTERM);
7711 checkcomma(s,PL_tokenbuf,"filehandle");
7715 checkcomma(s,PL_tokenbuf,"filehandle");
7734 s = force_word(s,WORD,FALSE,TRUE,FALSE);
7736 s = force_strict_version(s);
7737 PL_lex_expect = XBLOCK;
7741 LOP(OP_PIPE_OP,XTERM);
7744 s = scan_str(s,!!PL_madskills,FALSE);
7747 pl_yylval.ival = OP_CONST;
7748 TERM(sublex_start());
7755 s = scan_str(s,!!PL_madskills,FALSE);
7758 PL_expect = XOPERATOR;
7759 if (SvCUR(PL_lex_stuff)) {
7760 int warned_comma = !ckWARN(WARN_QW);
7761 int warned_comment = warned_comma;
7762 d = SvPV_force(PL_lex_stuff, len);
7764 for (; isSPACE(*d) && len; --len, ++d)
7769 if (!warned_comma || !warned_comment) {
7770 for (; !isSPACE(*d) && len; --len, ++d) {
7771 if (!warned_comma && *d == ',') {
7772 Perl_warner(aTHX_ packWARN(WARN_QW),
7773 "Possible attempt to separate words with commas");
7776 else if (!warned_comment && *d == '#') {
7777 Perl_warner(aTHX_ packWARN(WARN_QW),
7778 "Possible attempt to put comments in qw() list");
7784 for (; !isSPACE(*d) && len; --len, ++d)
7787 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7788 words = op_append_elem(OP_LIST, words,
7789 newSVOP(OP_CONST, 0, tokeq(sv)));
7794 words = newNULLLIST();
7796 SvREFCNT_dec(PL_lex_stuff);
7797 PL_lex_stuff = NULL;
7799 PL_expect = XOPERATOR;
7800 pl_yylval.opval = sawparens(words);
7805 s = scan_str(s,!!PL_madskills,FALSE);
7808 pl_yylval.ival = OP_STRINGIFY;
7809 if (SvIVX(PL_lex_stuff) == '\'')
7810 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
7811 TERM(sublex_start());
7814 s = scan_pat(s,OP_QR);
7815 TERM(sublex_start());
7818 s = scan_str(s,!!PL_madskills,FALSE);
7821 readpipe_override();
7822 TERM(sublex_start());
7830 s = force_version(s, FALSE);
7832 else if (*s != 'v' || !isDIGIT(s[1])
7833 || (s = force_version(s, TRUE), *s == 'v'))
7835 *PL_tokenbuf = '\0';
7836 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7837 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
7838 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
7839 GV_ADD | (UTF ? SVf_UTF8 : 0));
7841 yyerror("<> should be quotes");
7843 if (orig_keyword == KEY_require) {
7851 PL_last_uni = PL_oldbufptr;
7852 PL_last_lop_op = OP_REQUIRE;
7854 return REPORT( (int)REQUIRE );
7860 s = force_word(s,WORD,TRUE,FALSE,FALSE);
7864 LOP(OP_RENAME,XTERM);
7873 LOP(OP_RINDEX,XTERM);
7882 UNIDOR(OP_READLINE);
7885 UNIDOR(OP_BACKTICK);
7894 LOP(OP_REVERSE,XTERM);
7897 UNIDOR(OP_READLINK);
7904 if (pl_yylval.opval)
7905 TERM(sublex_start());
7907 TOKEN(1); /* force error */
7910 checkcomma(s,PL_tokenbuf,"filehandle");
7920 LOP(OP_SELECT,XTERM);
7926 LOP(OP_SEMCTL,XTERM);
7929 LOP(OP_SEMGET,XTERM);
7932 LOP(OP_SEMOP,XTERM);
7938 LOP(OP_SETPGRP,XTERM);
7940 case KEY_setpriority:
7941 LOP(OP_SETPRIORITY,XTERM);
7943 case KEY_sethostent:
7949 case KEY_setservent:
7952 case KEY_setprotoent:
7962 LOP(OP_SEEKDIR,XTERM);
7964 case KEY_setsockopt:
7965 LOP(OP_SSOCKOPT,XTERM);
7971 LOP(OP_SHMCTL,XTERM);
7974 LOP(OP_SHMGET,XTERM);
7977 LOP(OP_SHMREAD,XTERM);
7980 LOP(OP_SHMWRITE,XTERM);
7983 LOP(OP_SHUTDOWN,XTERM);
7992 LOP(OP_SOCKET,XTERM);
7994 case KEY_socketpair:
7995 LOP(OP_SOCKPAIR,XTERM);
7998 checkcomma(s,PL_tokenbuf,"subroutine name");
8000 if (*s == ';' || *s == ')') /* probably a close */
8001 Perl_croak(aTHX_ "sort is now a reserved word");
8003 s = force_word(s,WORD,TRUE,TRUE,FALSE);
8007 LOP(OP_SPLIT,XTERM);
8010 LOP(OP_SPRINTF,XTERM);
8013 LOP(OP_SPLICE,XTERM);
8028 LOP(OP_SUBSTR,XTERM);
8034 char tmpbuf[sizeof PL_tokenbuf];
8035 SSize_t tboffset = 0;
8036 expectation attrful;
8037 bool have_name, have_proto;
8038 const int key = tmp;
8043 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
8044 SV *subtoken = newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr));
8048 s = SKIPSPACE2(s,tmpwhite);
8053 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
8054 (*s == ':' && s[1] == ':'))
8057 SV *nametoke = NULL;
8061 attrful = XATTRBLOCK;
8062 /* remember buffer pos'n for later force_word */
8063 tboffset = s - PL_oldbufptr;
8064 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
8067 nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
8069 if (memchr(tmpbuf, ':', len))
8070 sv_setpvn(PL_subname, tmpbuf, len);
8072 sv_setsv(PL_subname,PL_curstname);
8073 sv_catpvs(PL_subname,"::");
8074 sv_catpvn(PL_subname,tmpbuf,len);
8076 if (SvUTF8(PL_linestr))
8077 SvUTF8_on(PL_subname);
8083 CURMAD('X', nametoke);
8084 CURMAD('_', tmpwhite);
8085 (void) force_word(PL_oldbufptr + tboffset, WORD,
8088 s = SKIPSPACE2(d,tmpwhite);
8095 Perl_croak(aTHX_ "Missing name in \"my sub\"");
8096 PL_expect = XTERMBLOCK;
8097 attrful = XATTRTERM;
8098 sv_setpvs(PL_subname,"?");
8102 if (key == KEY_format) {
8104 PL_lex_formbrack = PL_lex_brackets + 1;
8106 PL_thistoken = subtoken;
8110 (void) force_word(PL_oldbufptr + tboffset, WORD,
8116 /* Look for a prototype */
8119 bool bad_proto = FALSE;
8120 bool in_brackets = FALSE;
8121 char greedy_proto = ' ';
8122 bool proto_after_greedy_proto = FALSE;
8123 bool must_be_last = FALSE;
8124 bool underscore = FALSE;
8125 bool seen_underscore = FALSE;
8126 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
8129 s = scan_str(s,!!PL_madskills,FALSE);
8131 Perl_croak(aTHX_ "Prototype not terminated");
8132 /* strip spaces and check for bad characters */
8133 d = SvPV(PL_lex_stuff, tmplen);
8135 for (p = d; tmplen; tmplen--, ++p) {
8139 if (warnillegalproto) {
8141 proto_after_greedy_proto = TRUE;
8142 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
8154 else if ( *p == ']' ) {
8155 in_brackets = FALSE;
8157 else if ( (*p == '@' || *p == '%') &&
8158 ( tmp < 2 || d[tmp-2] != '\\' ) &&
8160 must_be_last = TRUE;
8163 else if ( *p == '_' ) {
8164 underscore = seen_underscore = TRUE;
8171 if (proto_after_greedy_proto)
8172 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8173 "Prototype after '%c' for %"SVf" : %s",
8174 greedy_proto, SVfARG(PL_subname), d);
8176 SV *dsv = newSVpvs_flags("", SVs_TEMP);
8177 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
8178 "Illegal character %sin prototype for %"SVf" : %s",
8179 seen_underscore ? "after '_' " : "",
8182 newSVpvn_flags(d, tmp, SVs_TEMP | SvUTF8(PL_lex_stuff)),
8183 tmp, UNI_DISPLAY_ISPRINT));
8185 SvCUR_set(PL_lex_stuff, tmp);
8190 CURMAD('q', PL_thisopen);
8191 CURMAD('_', tmpwhite);
8192 CURMAD('=', PL_thisstuff);
8193 CURMAD('Q', PL_thisclose);
8194 NEXTVAL_NEXTTOKE.opval =
8195 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8196 PL_lex_stuff = NULL;
8199 s = SKIPSPACE2(s,tmpwhite);
8207 if (*s == ':' && s[1] != ':')
8208 PL_expect = attrful;
8209 else if (*s != '{' && key == KEY_sub) {
8211 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8212 else if (*s != ';' && *s != '}')
8213 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8220 curmad('^', newSVpvs(""));
8221 CURMAD('_', tmpwhite);
8225 PL_thistoken = subtoken;
8228 NEXTVAL_NEXTTOKE.opval =
8229 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8230 PL_lex_stuff = NULL;
8236 sv_setpvs(PL_subname, "__ANON__");
8238 sv_setpvs(PL_subname, "__ANON__::__ANON__");
8242 (void) force_word(PL_oldbufptr + tboffset, WORD,
8251 LOP(OP_SYSTEM,XREF);
8254 LOP(OP_SYMLINK,XTERM);
8257 LOP(OP_SYSCALL,XTERM);
8260 LOP(OP_SYSOPEN,XTERM);
8263 LOP(OP_SYSSEEK,XTERM);
8266 LOP(OP_SYSREAD,XTERM);
8269 LOP(OP_SYSWRITE,XTERM);
8273 TERM(sublex_start());
8294 LOP(OP_TRUNCATE,XTERM);
8306 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8308 pl_yylval.ival = CopLINE(PL_curcop);
8312 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8314 pl_yylval.ival = CopLINE(PL_curcop);
8318 LOP(OP_UNLINK,XTERM);
8324 LOP(OP_UNPACK,XTERM);
8327 LOP(OP_UTIME,XTERM);
8333 LOP(OP_UNSHIFT,XTERM);
8336 s = tokenize_use(1, s);
8346 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8348 pl_yylval.ival = CopLINE(PL_curcop);
8352 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8354 pl_yylval.ival = CopLINE(PL_curcop);
8358 PL_hints |= HINT_BLOCK_SCOPE;
8365 LOP(OP_WAITPID,XTERM);
8374 ctl_l[0] = toCTRL('L');
8376 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
8379 /* Make sure $^L is defined */
8380 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
8385 if (PL_expect == XOPERATOR) {
8386 if (*s == '=' && !PL_lex_allbrackets &&
8387 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8395 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8397 pl_yylval.ival = OP_XOR;
8402 TERM(sublex_start());
8407 #pragma segment Main
8411 S_pending_ident(pTHX)
8416 /* pit holds the identifier we read and pending_ident is reset */
8417 char pit = PL_pending_ident;
8418 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8419 /* All routes through this function want to know if there is a colon. */
8420 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8421 PL_pending_ident = 0;
8423 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8424 DEBUG_T({ PerlIO_printf(Perl_debug_log,
8425 "### Pending identifier '%s'\n", PL_tokenbuf); });
8427 /* if we're in a my(), we can't allow dynamics here.
8428 $foo'bar has already been turned into $foo::bar, so
8429 just check for colons.
8431 if it's a legal name, the OP is a PADANY.
8434 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
8436 yyerror(Perl_form(aTHX_ "No package name allowed for "
8437 "variable %s in \"our\"",
8439 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8443 yyerror(Perl_form(aTHX_ PL_no_myglob,
8444 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8446 pl_yylval.opval = newOP(OP_PADANY, 0);
8447 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8448 UTF ? SVf_UTF8 : 0);
8454 build the ops for accesses to a my() variable.
8456 Deny my($a) or my($b) in a sort block, *if* $a or $b is
8457 then used in a comparison. This catches most, but not
8458 all cases. For instance, it catches
8459 sort { my($a); $a <=> $b }
8461 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
8462 (although why you'd do that is anyone's guess).
8467 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8468 UTF ? SVf_UTF8 : 0);
8469 if (tmp != NOT_IN_PAD) {
8470 /* might be an "our" variable" */
8471 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8472 /* build ops for a bareword */
8473 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8474 HEK * const stashname = HvNAME_HEK(stash);
8475 SV * const sym = newSVhek(stashname);
8476 sv_catpvs(sym, "::");
8477 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
8478 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8479 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8482 ? (GV_ADDMULTI | GV_ADDINEVAL)
8485 ((PL_tokenbuf[0] == '$') ? SVt_PV
8486 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8491 /* if it's a sort block and they're naming $a or $b */
8492 if (PL_last_lop_op == OP_SORT &&
8493 PL_tokenbuf[0] == '$' &&
8494 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8497 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8498 d < PL_bufend && *d != '\n';
8501 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8502 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8508 pl_yylval.opval = newOP(OP_PADANY, 0);
8509 pl_yylval.opval->op_targ = tmp;
8515 Whine if they've said @foo in a doublequoted string,
8516 and @foo isn't a variable we can find in the symbol
8519 if (ckWARN(WARN_AMBIGUOUS) &&
8520 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
8521 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8522 ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8523 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8524 /* DO NOT warn for @- and @+ */
8525 && !( PL_tokenbuf[2] == '\0' &&
8526 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8529 /* Downgraded from fatal to warning 20000522 mjd */
8530 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8531 "Possible unintended interpolation of %s in string",
8536 /* build ops for a bareword */
8537 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(PL_tokenbuf + 1,
8539 UTF ? SVf_UTF8 : 0 ));
8540 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8541 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8542 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
8543 | ( UTF ? SVf_UTF8 : 0 ),
8544 ((PL_tokenbuf[0] == '$') ? SVt_PV
8545 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8551 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8555 PERL_ARGS_ASSERT_CHECKCOMMA;
8557 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
8558 if (ckWARN(WARN_SYNTAX)) {
8561 for (w = s+2; *w && level; w++) {
8569 /* the list of chars below is for end of statements or
8570 * block / parens, boolean operators (&&, ||, //) and branch
8571 * constructs (or, and, if, until, unless, while, err, for).
8572 * Not a very solid hack... */
8573 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8574 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8575 "%s (...) interpreted as function",name);
8578 while (s < PL_bufend && isSPACE(*s))
8582 while (s < PL_bufend && isSPACE(*s))
8584 if (isIDFIRST_lazy_if(s,UTF)) {
8585 const char * const w = s++;
8586 while (isALNUM_lazy_if(s,UTF))
8588 while (s < PL_bufend && isSPACE(*s))
8592 if (keyword(w, s - w, 0))
8595 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8596 if (gv && GvCVu(gv))
8598 Perl_croak(aTHX_ "No comma allowed after %s", what);
8603 /* Either returns sv, or mortalizes sv and returns a new SV*.
8604 Best used as sv=new_constant(..., sv, ...).
8605 If s, pv are NULL, calls subroutine with one argument,
8606 and type is used with error messages only. */
8609 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8610 SV *sv, SV *pv, const char *type, STRLEN typelen)
8613 HV * const table = GvHV(PL_hintgv); /* ^H */
8617 const char *why1 = "", *why2 = "", *why3 = "";
8619 PERL_ARGS_ASSERT_NEW_CONSTANT;
8621 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8624 why2 = (const char *)
8625 (strEQ(key,"charnames")
8626 ? "(possibly a missing \"use charnames ...\")"
8628 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
8629 (type ? type: "undef"), why2);
8631 /* This is convoluted and evil ("goto considered harmful")
8632 * but I do not understand the intricacies of all the different
8633 * failure modes of %^H in here. The goal here is to make
8634 * the most probable error message user-friendly. --jhi */
8639 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
8640 (type ? type: "undef"), why1, why2, why3);
8642 yyerror(SvPVX_const(msg));
8647 /* charnames doesn't work well if there have been errors found */
8648 if (PL_error_count > 0 && strEQ(key,"charnames"))
8649 return &PL_sv_undef;
8651 cvp = hv_fetch(table, key, keylen, FALSE);
8652 if (!cvp || !SvOK(*cvp)) {
8655 why3 = "} is not defined";
8658 sv_2mortal(sv); /* Parent created it permanently */
8661 pv = newSVpvn_flags(s, len, SVs_TEMP);
8663 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
8665 typesv = &PL_sv_undef;
8667 PUSHSTACKi(PERLSI_OVERLOAD);
8679 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
8683 /* Check the eval first */
8684 if (!PL_in_eval && SvTRUE(ERRSV)) {
8685 sv_catpvs(ERRSV, "Propagated");
8686 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
8688 res = SvREFCNT_inc_simple(sv);
8692 SvREFCNT_inc_simple_void(res);
8701 why1 = "Call to &{$^H{";
8703 why3 = "}} did not return a defined value";
8711 /* Returns a NUL terminated string, with the length of the string written to
8715 S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
8718 register char *d = dest;
8719 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
8721 PERL_ARGS_ASSERT_SCAN_WORD;
8725 Perl_croak(aTHX_ ident_too_long);
8726 if (isALNUM(*s)) /* UTF handled below */
8728 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
8733 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
8737 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
8738 char *t = s + UTF8SKIP(s);
8740 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
8744 Perl_croak(aTHX_ ident_too_long);
8745 Copy(s, d, len, char);
8758 S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
8761 char *bracket = NULL;
8763 register char *d = dest;
8764 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
8766 PERL_ARGS_ASSERT_SCAN_IDENT;
8771 while (isDIGIT(*s)) {
8773 Perl_croak(aTHX_ ident_too_long);
8780 Perl_croak(aTHX_ ident_too_long);
8781 if (isALNUM(*s)) /* UTF handled below */
8783 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
8788 else if (*s == ':' && s[1] == ':') {
8792 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
8793 char *t = s + UTF8SKIP(s);
8794 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
8796 if (d + (t - s) > e)
8797 Perl_croak(aTHX_ ident_too_long);
8798 Copy(s, d, t - s, char);
8809 if (PL_lex_state != LEX_NORMAL)
8810 PL_lex_state = LEX_INTERPENDMAYBE;
8813 if (*s == '$' && s[1] &&
8814 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
8826 const STRLEN skip = UTF8SKIP(s);
8829 for ( i = 0; i < skip; i++ )
8837 if (*d == '^' && *s && isCONTROLVAR(*s)) {
8842 if (isSPACE(s[-1])) {
8844 const char ch = *s++;
8845 if (!SPACE_OR_TAB(ch)) {
8851 if (isIDFIRST_lazy_if(d,UTF)) {
8855 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
8856 end += UTF8SKIP(end);
8857 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
8858 end += UTF8SKIP(end);
8860 Copy(s, d, end - s, char);
8865 while ((isALNUM(*s) || *s == ':') && d < e)
8868 Perl_croak(aTHX_ ident_too_long);
8871 while (s < send && SPACE_OR_TAB(*s))
8873 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
8874 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
8875 const char * const brack =
8877 ((*s == '[') ? "[...]" : "{...}");
8878 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
8879 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8880 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
8881 funny, dest, brack, funny, dest, brack);
8884 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
8885 PL_lex_allbrackets++;
8889 /* Handle extended ${^Foo} variables
8890 * 1999-02-27 mjd-perl-patch@plover.com */
8891 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
8895 while (isALNUM(*s) && d < e) {
8899 Perl_croak(aTHX_ ident_too_long);
8904 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
8905 PL_lex_state = LEX_INTERPEND;
8908 if (PL_lex_state == LEX_NORMAL) {
8909 if (ckWARN(WARN_AMBIGUOUS) &&
8910 (keyword(dest, d - dest, 0)
8911 || get_cvn_flags(dest, d - dest, 0)))
8915 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8916 "Ambiguous use of %c{%s} resolved to %c%s",
8917 funny, dest, funny, dest);
8922 s = bracket; /* let the parser handle it */
8926 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
8927 PL_lex_state = LEX_INTERPEND;
8932 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
8934 /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
8935 * the parse starting at 's', based on the subset that are valid in this
8936 * context input to this routine in 'valid_flags'. Advances s. Returns
8937 * TRUE if the input was a valid flag, so the next char may be as well;
8938 * otherwise FALSE. 'charset' should point to a NUL upon first call on the
8939 * current regex. This routine will set it to any charset modifier found.
8940 * The caller shouldn't change it. This way, another charset modifier
8941 * encountered in the parse can be detected as an error, as we have decided
8946 if (! strchr(valid_flags, c)) {
8955 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
8956 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
8957 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
8958 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
8959 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
8960 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
8961 case LOCALE_PAT_MOD:
8963 /* In 5.14, qr//lt is legal but deprecated; the 't' means they
8964 * can't be regex modifiers.
8965 * In 5.14, s///le is legal and ambiguous. Try to disambiguate as
8966 * much as easily done. s///lei, for example, has to mean regex
8967 * modifiers if it's not an error (as does any word character
8968 * following the 'e'). Otherwise, we resolve to the backwards-
8969 * compatible, but less likely 's/// le ...', i.e. as meaning
8970 * less-than-or-equal. The reason it's not likely is that s//
8971 * returns a number for code in the field (/r returns a string, but
8972 * that wasn't added until the 5.13 series), and so '<=' should be
8973 * used for comparing, not 'le'. */
8974 if (*((*s) + 1) == 't') {
8977 else if (*((*s) + 1) == 'e' && ! isALNUM(*((*s) + 2))) {
8979 /* 'e' is valid only for substitutes, s///e. If it is not
8980 * valid in the current context, then 'm//le' must mean the
8981 * comparison operator, so use the regular deprecation message.
8983 if (! strchr(valid_flags, 'e')) {
8986 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
8987 "Ambiguous use of 's//le...' resolved as 's// le...'; Rewrite as 's//el' if you meant 'use locale rules and evaluate rhs as an expression'. In Perl 5.18, it will be resolved the other way");
8991 goto multiple_charsets;
8993 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
8996 case UNICODE_PAT_MOD:
8997 /* In 5.14, qr//unless and qr//until are legal but deprecated; the
8998 * 'n' means they can't be regex modifiers */
8999 if (*((*s) + 1) == 'n') {
9003 goto multiple_charsets;
9005 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9008 case ASCII_RESTRICT_PAT_MOD:
9009 /* In 5.14, qr//and is legal but deprecated; the 'n' means they
9010 * can't be regex modifiers */
9011 if (*((*s) + 1) == 'n') {
9016 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9020 /* Error if previous modifier wasn't an 'a', but if it was, see
9021 * if, and accept, a second occurrence (only) */
9023 || get_regex_charset(*pmfl)
9024 != REGEX_ASCII_RESTRICTED_CHARSET)
9026 goto multiple_charsets;
9028 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9032 case DEPENDS_PAT_MOD:
9034 goto multiple_charsets;
9036 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9045 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
9046 "Having no space between pattern and following word is deprecated");
9050 if (*charset != c) {
9051 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9053 else if (c == 'a') {
9054 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9057 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9060 /* Pretend that it worked, so will continue processing before dieing */
9066 S_scan_pat(pTHX_ char *start, I32 type)
9070 char *s = scan_str(start,!!PL_madskills,FALSE);
9071 const char * const valid_flags =
9072 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9073 char charset = '\0'; /* character set modifier */
9078 PERL_ARGS_ASSERT_SCAN_PAT;
9081 const char * const delimiter = skipspace(start);
9085 ? "Search pattern not terminated or ternary operator parsed as search pattern"
9086 : "Search pattern not terminated" ));
9089 pm = (PMOP*)newPMOP(type, 0);
9090 if (PL_multi_open == '?') {
9091 /* This is the only point in the code that sets PMf_ONCE: */
9092 pm->op_pmflags |= PMf_ONCE;
9094 /* Hence it's safe to do this bit of PMOP book-keeping here, which
9095 allows us to restrict the list needed by reset to just the ??
9097 assert(type != OP_TRANS);
9099 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9102 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9105 elements = mg->mg_len / sizeof(PMOP**);
9106 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9107 ((PMOP**)mg->mg_ptr) [elements++] = pm;
9108 mg->mg_len = elements * sizeof(PMOP**);
9109 PmopSTASH_set(pm,PL_curstash);
9115 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
9117 if (PL_madskills && modstart != s) {
9118 SV* tmptoken = newSVpvn(modstart, s - modstart);
9119 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9122 /* issue a warning if /c is specified,but /g is not */
9123 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9125 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9126 "Use of /c modifier is meaningless without /g" );
9129 PL_lex_op = (OP*)pm;
9130 pl_yylval.ival = OP_MATCH;
9135 S_scan_subst(pTHX_ char *start)
9142 char charset = '\0'; /* character set modifier */
9147 PERL_ARGS_ASSERT_SCAN_SUBST;
9149 pl_yylval.ival = OP_NULL;
9151 s = scan_str(start,!!PL_madskills,FALSE);
9154 Perl_croak(aTHX_ "Substitution pattern not terminated");
9156 if (s[-1] == PL_multi_open)
9160 CURMAD('q', PL_thisopen);
9161 CURMAD('_', PL_thiswhite);
9162 CURMAD('E', PL_thisstuff);
9163 CURMAD('Q', PL_thisclose);
9164 PL_realtokenstart = s - SvPVX(PL_linestr);
9168 first_start = PL_multi_start;
9169 s = scan_str(s,!!PL_madskills,FALSE);
9172 SvREFCNT_dec(PL_lex_stuff);
9173 PL_lex_stuff = NULL;
9175 Perl_croak(aTHX_ "Substitution replacement not terminated");
9177 PL_multi_start = first_start; /* so whole substitution is taken together */
9179 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9183 CURMAD('z', PL_thisopen);
9184 CURMAD('R', PL_thisstuff);
9185 CURMAD('Z', PL_thisclose);
9191 if (*s == EXEC_PAT_MOD) {
9195 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9204 curmad('m', newSVpvn(modstart, s - modstart));
9205 append_madprops(PL_thismad, (OP*)pm, 0);
9209 if ((pm->op_pmflags & PMf_CONTINUE)) {
9210 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9214 SV * const repl = newSVpvs("");
9216 PL_sublex_info.super_bufptr = s;
9217 PL_sublex_info.super_bufend = PL_bufend;
9219 pm->op_pmflags |= PMf_EVAL;
9222 sv_catpvs(repl, "eval ");
9224 sv_catpvs(repl, "do ");
9226 sv_catpvs(repl, "{");
9227 sv_catsv(repl, PL_lex_repl);
9228 if (strchr(SvPVX(PL_lex_repl), '#'))
9229 sv_catpvs(repl, "\n");
9230 sv_catpvs(repl, "}");
9232 SvREFCNT_dec(PL_lex_repl);
9236 PL_lex_op = (OP*)pm;
9237 pl_yylval.ival = OP_SUBST;
9242 S_scan_trans(pTHX_ char *start)
9251 bool nondestruct = 0;
9256 PERL_ARGS_ASSERT_SCAN_TRANS;
9258 pl_yylval.ival = OP_NULL;
9260 s = scan_str(start,!!PL_madskills,FALSE);
9262 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9264 if (s[-1] == PL_multi_open)
9268 CURMAD('q', PL_thisopen);
9269 CURMAD('_', PL_thiswhite);
9270 CURMAD('E', PL_thisstuff);
9271 CURMAD('Q', PL_thisclose);
9272 PL_realtokenstart = s - SvPVX(PL_linestr);
9276 s = scan_str(s,!!PL_madskills,FALSE);
9279 SvREFCNT_dec(PL_lex_stuff);
9280 PL_lex_stuff = NULL;
9282 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9285 CURMAD('z', PL_thisopen);
9286 CURMAD('R', PL_thisstuff);
9287 CURMAD('Z', PL_thisclose);
9290 complement = del = squash = 0;
9297 complement = OPpTRANS_COMPLEMENT;
9300 del = OPpTRANS_DELETE;
9303 squash = OPpTRANS_SQUASH;
9315 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
9316 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)tbl);
9317 o->op_private &= ~OPpTRANS_ALL;
9318 o->op_private |= del|squash|complement|
9319 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9320 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
9323 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9328 curmad('m', newSVpvn(modstart, s - modstart));
9329 append_madprops(PL_thismad, o, 0);
9338 S_scan_heredoc(pTHX_ register char *s)
9342 I32 op_type = OP_SCALAR;
9346 const char *found_newline;
9350 const int outer = (PL_rsfp || PL_parser->filtered)
9351 && !(PL_lex_inwhat == OP_SCALAR);
9353 I32 stuffstart = s - SvPVX(PL_linestr);
9356 PL_realtokenstart = -1;
9359 PERL_ARGS_ASSERT_SCAN_HEREDOC;
9363 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9367 while (SPACE_OR_TAB(*peek))
9369 if (*peek == '`' || *peek == '\'' || *peek =='"') {
9372 s = delimcpy(d, e, s, PL_bufend, term, &len);
9382 if (!isALNUM_lazy_if(s,UTF))
9383 deprecate("bare << to mean <<\"\"");
9384 for (; isALNUM_lazy_if(s,UTF); s++) {
9389 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9390 Perl_croak(aTHX_ "Delimiter for here document is too long");
9393 len = d - PL_tokenbuf;
9397 tstart = PL_tokenbuf + !outer;
9398 PL_thisclose = newSVpvn(tstart, len - !outer);
9399 tstart = SvPVX(PL_linestr) + stuffstart;
9400 PL_thisopen = newSVpvn(tstart, s - tstart);
9401 stuffstart = s - SvPVX(PL_linestr);
9404 #ifndef PERL_STRICT_CR
9405 d = strchr(s, '\r');
9407 char * const olds = s;
9409 while (s < PL_bufend) {
9415 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9424 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9431 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
9432 herewas = newSVpvn(s,PL_bufend-s);
9436 herewas = newSVpvn(s-1,found_newline-s+1);
9439 herewas = newSVpvn(s,found_newline-s);
9444 tstart = SvPVX(PL_linestr) + stuffstart;
9446 sv_catpvn(PL_thisstuff, tstart, s - tstart);
9448 PL_thisstuff = newSVpvn(tstart, s - tstart);
9451 s += SvCUR(herewas);
9454 stuffstart = s - SvPVX(PL_linestr);
9460 tmpstr = newSV_type(SVt_PVIV);
9464 SvIV_set(tmpstr, -1);
9466 else if (term == '`') {
9467 op_type = OP_BACKTICK;
9468 SvIV_set(tmpstr, '\\');
9472 PL_multi_start = CopLINE(PL_curcop);
9473 PL_multi_open = PL_multi_close = '<';
9474 term = *PL_tokenbuf;
9475 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp
9476 && !PL_parser->filtered) {
9477 char * const bufptr = PL_sublex_info.super_bufptr;
9478 char * const bufend = PL_sublex_info.super_bufend;
9479 char * const olds = s - SvCUR(herewas);
9480 s = strchr(bufptr, '\n');
9484 while (s < bufend &&
9485 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9487 CopLINE_inc(PL_curcop);
9490 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9491 missingterm(PL_tokenbuf);
9493 sv_setpvn(herewas,bufptr,d-bufptr+1);
9494 sv_setpvn(tmpstr,d+1,s-d);
9496 sv_catpvn(herewas,s,bufend-s);
9497 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
9504 while (s < PL_bufend &&
9505 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9507 CopLINE_inc(PL_curcop);
9509 if (s >= PL_bufend) {
9510 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9511 missingterm(PL_tokenbuf);
9513 sv_setpvn(tmpstr,d+1,s-d);
9517 sv_catpvn(PL_thisstuff, d + 1, s - d);
9519 PL_thisstuff = newSVpvn(d + 1, s - d);
9520 stuffstart = s - SvPVX(PL_linestr);
9524 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
9526 sv_catpvn(herewas,s,PL_bufend-s);
9527 sv_setsv(PL_linestr,herewas);
9528 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9529 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9530 PL_last_lop = PL_last_uni = NULL;
9533 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
9534 while (s >= PL_bufend) { /* multiple line string? */
9537 tstart = SvPVX(PL_linestr) + stuffstart;
9539 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
9541 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
9545 CopLINE_inc(PL_curcop);
9546 if (!outer || !lex_next_chunk(0)) {
9547 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
9548 missingterm(PL_tokenbuf);
9550 CopLINE_dec(PL_curcop);
9553 stuffstart = s - SvPVX(PL_linestr);
9555 CopLINE_inc(PL_curcop);
9556 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9557 PL_last_lop = PL_last_uni = NULL;
9558 #ifndef PERL_STRICT_CR
9559 if (PL_bufend - PL_linestart >= 2) {
9560 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9561 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9563 PL_bufend[-2] = '\n';
9565 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9567 else if (PL_bufend[-1] == '\r')
9568 PL_bufend[-1] = '\n';
9570 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9571 PL_bufend[-1] = '\n';
9573 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
9574 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
9575 *(SvPVX(PL_linestr) + off ) = ' ';
9576 lex_grow_linestr(SvCUR(PL_linestr) + SvCUR(herewas) + 1);
9577 sv_catsv(PL_linestr,herewas);
9578 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9579 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
9583 sv_catsv(tmpstr,PL_linestr);
9588 PL_multi_end = CopLINE(PL_curcop);
9589 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9590 SvPV_shrink_to_cur(tmpstr);
9592 SvREFCNT_dec(herewas);
9594 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9596 else if (PL_encoding)
9597 sv_recode_to_utf8(tmpstr, PL_encoding);
9599 PL_lex_stuff = tmpstr;
9600 pl_yylval.ival = op_type;
9605 takes: current position in input buffer
9606 returns: new position in input buffer
9607 side-effects: pl_yylval and lex_op are set.
9612 <FH> read from filehandle
9613 <pkg::FH> read from package qualified filehandle
9614 <pkg'FH> read from package qualified filehandle
9615 <$fh> read from filehandle in $fh
9621 S_scan_inputsymbol(pTHX_ char *start)
9624 register char *s = start; /* current position in buffer */
9627 char *d = PL_tokenbuf; /* start of temp holding space */
9628 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
9630 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9632 end = strchr(s, '\n');
9635 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
9637 /* die if we didn't have space for the contents of the <>,
9638 or if it didn't end, or if we see a newline
9641 if (len >= (I32)sizeof PL_tokenbuf)
9642 Perl_croak(aTHX_ "Excessively long <> operator");
9644 Perl_croak(aTHX_ "Unterminated <> operator");
9649 Remember, only scalar variables are interpreted as filehandles by
9650 this code. Anything more complex (e.g., <$fh{$num}>) will be
9651 treated as a glob() call.
9652 This code makes use of the fact that except for the $ at the front,
9653 a scalar variable and a filehandle look the same.
9655 if (*d == '$' && d[1]) d++;
9657 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9658 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9659 d += UTF ? UTF8SKIP(d) : 1;
9661 /* If we've tried to read what we allow filehandles to look like, and
9662 there's still text left, then it must be a glob() and not a getline.
9663 Use scan_str to pull out the stuff between the <> and treat it
9664 as nothing more than a string.
9667 if (d - PL_tokenbuf != len) {
9668 pl_yylval.ival = OP_GLOB;
9669 s = scan_str(start,!!PL_madskills,FALSE);
9671 Perl_croak(aTHX_ "Glob not terminated");
9675 bool readline_overriden = FALSE;
9678 /* we're in a filehandle read situation */
9681 /* turn <> into <ARGV> */
9683 Copy("ARGV",d,5,char);
9685 /* Check whether readline() is overriden */
9686 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
9688 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9690 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9691 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
9692 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9693 readline_overriden = TRUE;
9695 /* if <$fh>, create the ops to turn the variable into a
9699 /* try to find it in the pad for this block, otherwise find
9700 add symbol table ops
9702 const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
9703 if (tmp != NOT_IN_PAD) {
9704 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9705 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9706 HEK * const stashname = HvNAME_HEK(stash);
9707 SV * const sym = sv_2mortal(newSVhek(stashname));
9708 sv_catpvs(sym, "::");
9714 OP * const o = newOP(OP_PADSV, 0);
9716 PL_lex_op = readline_overriden
9717 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9718 op_append_elem(OP_LIST, o,
9719 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9720 : (OP*)newUNOP(OP_READLINE, 0, o);
9729 ? (GV_ADDMULTI | GV_ADDINEVAL)
9730 : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
9732 PL_lex_op = readline_overriden
9733 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9734 op_append_elem(OP_LIST,
9735 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9736 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9737 : (OP*)newUNOP(OP_READLINE, 0,
9738 newUNOP(OP_RV2SV, 0,
9739 newGVOP(OP_GV, 0, gv)));
9741 if (!readline_overriden)
9742 PL_lex_op->op_flags |= OPf_SPECIAL;
9743 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9744 pl_yylval.ival = OP_NULL;
9747 /* If it's none of the above, it must be a literal filehandle
9748 (<Foo::BAR> or <FOO>) so build a simple readline OP */
9750 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
9751 PL_lex_op = readline_overriden
9752 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9753 op_append_elem(OP_LIST,
9754 newGVOP(OP_GV, 0, gv),
9755 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9756 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
9757 pl_yylval.ival = OP_NULL;
9766 takes: start position in buffer
9767 keep_quoted preserve \ on the embedded delimiter(s)
9768 keep_delims preserve the delimiters around the string
9769 returns: position to continue reading from buffer
9770 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9771 updates the read buffer.
9773 This subroutine pulls a string out of the input. It is called for:
9774 q single quotes q(literal text)
9775 ' single quotes 'literal text'
9776 qq double quotes qq(interpolate $here please)
9777 " double quotes "interpolate $here please"
9778 qx backticks qx(/bin/ls -l)
9779 ` backticks `/bin/ls -l`
9780 qw quote words @EXPORT_OK = qw( func() $spam )
9781 m// regexp match m/this/
9782 s/// regexp substitute s/this/that/
9783 tr/// string transliterate tr/this/that/
9784 y/// string transliterate y/this/that/
9785 ($*@) sub prototypes sub foo ($)
9786 (stuff) sub attr parameters sub foo : attr(stuff)
9787 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
9789 In most of these cases (all but <>, patterns and transliterate)
9790 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
9791 calls scan_str(). s/// makes yylex() call scan_subst() which calls
9792 scan_str(). tr/// and y/// make yylex() call scan_trans() which
9795 It skips whitespace before the string starts, and treats the first
9796 character as the delimiter. If the delimiter is one of ([{< then
9797 the corresponding "close" character )]}> is used as the closing
9798 delimiter. It allows quoting of delimiters, and if the string has
9799 balanced delimiters ([{<>}]) it allows nesting.
9801 On success, the SV with the resulting string is put into lex_stuff or,
9802 if that is already non-NULL, into lex_repl. The second case occurs only
9803 when parsing the RHS of the special constructs s/// and tr/// (y///).
9804 For convenience, the terminating delimiter character is stuffed into
9809 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
9812 SV *sv; /* scalar value: string */
9813 const char *tmps; /* temp string, used for delimiter matching */
9814 register char *s = start; /* current position in the buffer */
9815 register char term; /* terminating character */
9816 register char *to; /* current position in the sv's data */
9817 I32 brackets = 1; /* bracket nesting level */
9818 bool has_utf8 = FALSE; /* is there any utf8 content? */
9819 I32 termcode; /* terminating char. code */
9820 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
9821 STRLEN termlen; /* length of terminating string */
9822 int last_off = 0; /* last position for nesting bracket */
9828 PERL_ARGS_ASSERT_SCAN_STR;
9830 /* skip space before the delimiter */
9836 if (PL_realtokenstart >= 0) {
9837 stuffstart = PL_realtokenstart;
9838 PL_realtokenstart = -1;
9841 stuffstart = start - SvPVX(PL_linestr);
9843 /* mark where we are, in case we need to report errors */
9846 /* after skipping whitespace, the next character is the terminator */
9849 termcode = termstr[0] = term;
9853 termcode = utf8_to_uvchr((U8*)s, &termlen);
9854 Copy(s, termstr, termlen, U8);
9855 if (!UTF8_IS_INVARIANT(term))
9859 /* mark where we are */
9860 PL_multi_start = CopLINE(PL_curcop);
9861 PL_multi_open = term;
9863 /* find corresponding closing delimiter */
9864 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
9865 termcode = termstr[0] = term = tmps[5];
9867 PL_multi_close = term;
9869 /* create a new SV to hold the contents. 79 is the SV's initial length.
9870 What a random number. */
9871 sv = newSV_type(SVt_PVIV);
9873 SvIV_set(sv, termcode);
9874 (void)SvPOK_only(sv); /* validate pointer */
9876 /* move past delimiter and try to read a complete string */
9878 sv_catpvn(sv, s, termlen);
9881 tstart = SvPVX(PL_linestr) + stuffstart;
9882 if (!PL_thisopen && !keep_delims) {
9883 PL_thisopen = newSVpvn(tstart, s - tstart);
9884 stuffstart = s - SvPVX(PL_linestr);
9888 if (PL_encoding && !UTF) {
9892 int offset = s - SvPVX_const(PL_linestr);
9893 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
9894 &offset, (char*)termstr, termlen);
9895 const char * const ns = SvPVX_const(PL_linestr) + offset;
9896 char * const svlast = SvEND(sv) - 1;
9898 for (; s < ns; s++) {
9899 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
9900 CopLINE_inc(PL_curcop);
9903 goto read_more_line;
9905 /* handle quoted delimiters */
9906 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
9908 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
9910 if ((svlast-1 - t) % 2) {
9914 SvCUR_set(sv, SvCUR(sv) - 1);
9919 if (PL_multi_open == PL_multi_close) {
9925 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
9926 /* At here, all closes are "was quoted" one,
9927 so we don't check PL_multi_close. */
9929 if (!keep_quoted && *(t+1) == PL_multi_open)
9934 else if (*t == PL_multi_open)
9942 SvCUR_set(sv, w - SvPVX_const(sv));
9944 last_off = w - SvPVX(sv);
9945 if (--brackets <= 0)
9951 SvCUR_set(sv, SvCUR(sv) - 1);
9957 /* extend sv if need be */
9958 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
9959 /* set 'to' to the next character in the sv's string */
9960 to = SvPVX(sv)+SvCUR(sv);
9962 /* if open delimiter is the close delimiter read unbridle */
9963 if (PL_multi_open == PL_multi_close) {
9964 for (; s < PL_bufend; s++,to++) {
9965 /* embedded newlines increment the current line number */
9966 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
9967 CopLINE_inc(PL_curcop);
9968 /* handle quoted delimiters */
9969 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
9970 if (!keep_quoted && s[1] == term)
9972 /* any other quotes are simply copied straight through */
9976 /* terminate when run out of buffer (the for() condition), or
9977 have found the terminator */
9978 else if (*s == term) {
9981 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
9984 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
9990 /* if the terminator isn't the same as the start character (e.g.,
9991 matched brackets), we have to allow more in the quoting, and
9992 be prepared for nested brackets.
9995 /* read until we run out of string, or we find the terminator */
9996 for (; s < PL_bufend; s++,to++) {
9997 /* embedded newlines increment the line count */
9998 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
9999 CopLINE_inc(PL_curcop);
10000 /* backslashes can escape the open or closing characters */
10001 if (*s == '\\' && s+1 < PL_bufend) {
10002 if (!keep_quoted &&
10003 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10008 /* allow nested opens and closes */
10009 else if (*s == PL_multi_close && --brackets <= 0)
10011 else if (*s == PL_multi_open)
10013 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10018 /* terminate the copied string and update the sv's end-of-string */
10020 SvCUR_set(sv, to - SvPVX_const(sv));
10023 * this next chunk reads more into the buffer if we're not done yet
10027 break; /* handle case where we are done yet :-) */
10029 #ifndef PERL_STRICT_CR
10030 if (to - SvPVX_const(sv) >= 2) {
10031 if ((to[-2] == '\r' && to[-1] == '\n') ||
10032 (to[-2] == '\n' && to[-1] == '\r'))
10036 SvCUR_set(sv, to - SvPVX_const(sv));
10038 else if (to[-1] == '\r')
10041 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10046 /* if we're out of file, or a read fails, bail and reset the current
10047 line marker so we can report where the unterminated string began
10050 if (PL_madskills) {
10051 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10053 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10055 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10058 CopLINE_inc(PL_curcop);
10059 PL_bufptr = PL_bufend;
10060 if (!lex_next_chunk(0)) {
10062 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10071 /* at this point, we have successfully read the delimited string */
10073 if (!PL_encoding || UTF) {
10075 if (PL_madskills) {
10076 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10077 const int len = s - tstart;
10079 sv_catpvn(PL_thisstuff, tstart, len);
10081 PL_thisstuff = newSVpvn(tstart, len);
10082 if (!PL_thisclose && !keep_delims)
10083 PL_thisclose = newSVpvn(s,termlen);
10088 sv_catpvn(sv, s, termlen);
10093 if (PL_madskills) {
10094 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10095 const int len = s - tstart - termlen;
10097 sv_catpvn(PL_thisstuff, tstart, len);
10099 PL_thisstuff = newSVpvn(tstart, len);
10100 if (!PL_thisclose && !keep_delims)
10101 PL_thisclose = newSVpvn(s - termlen,termlen);
10105 if (has_utf8 || PL_encoding)
10108 PL_multi_end = CopLINE(PL_curcop);
10110 /* if we allocated too much space, give some back */
10111 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10112 SvLEN_set(sv, SvCUR(sv) + 1);
10113 SvPV_renew(sv, SvLEN(sv));
10116 /* decide whether this is the first or second quoted string we've read
10129 takes: pointer to position in buffer
10130 returns: pointer to new position in buffer
10131 side-effects: builds ops for the constant in pl_yylval.op
10133 Read a number in any of the formats that Perl accepts:
10135 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10136 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
10139 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10141 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10144 If it reads a number without a decimal point or an exponent, it will
10145 try converting the number to an integer and see if it can do so
10146 without loss of precision.
10150 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10153 register const char *s = start; /* current position in buffer */
10154 register char *d; /* destination in temp buffer */
10155 register char *e; /* end of temp buffer */
10156 NV nv; /* number read, as a double */
10157 SV *sv = NULL; /* place to put the converted number */
10158 bool floatit; /* boolean: int or float? */
10159 const char *lastub = NULL; /* position of last underbar */
10160 static char const number_too_long[] = "Number too long";
10162 PERL_ARGS_ASSERT_SCAN_NUM;
10164 /* We use the first character to decide what type of number this is */
10168 Perl_croak(aTHX_ "panic: scan_num");
10170 /* if it starts with a 0, it could be an octal number, a decimal in
10171 0.13 disguise, or a hexadecimal number, or a binary number. */
10175 u holds the "number so far"
10176 shift the power of 2 of the base
10177 (hex == 4, octal == 3, binary == 1)
10178 overflowed was the number more than we can hold?
10180 Shift is used when we add a digit. It also serves as an "are
10181 we in octal/hex/binary?" indicator to disallow hex characters
10182 when in octal mode.
10187 bool overflowed = FALSE;
10188 bool just_zero = TRUE; /* just plain 0 or binary number? */
10189 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10190 static const char* const bases[5] =
10191 { "", "binary", "", "octal", "hexadecimal" };
10192 static const char* const Bases[5] =
10193 { "", "Binary", "", "Octal", "Hexadecimal" };
10194 static const char* const maxima[5] =
10196 "0b11111111111111111111111111111111",
10200 const char *base, *Base, *max;
10202 /* check for hex */
10203 if (s[1] == 'x' || s[1] == 'X') {
10207 } else if (s[1] == 'b' || s[1] == 'B') {
10212 /* check for a decimal in disguise */
10213 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
10215 /* so it must be octal */
10222 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10223 "Misplaced _ in number");
10227 base = bases[shift];
10228 Base = Bases[shift];
10229 max = maxima[shift];
10231 /* read the rest of the number */
10233 /* x is used in the overflow test,
10234 b is the digit we're adding on. */
10239 /* if we don't mention it, we're done */
10243 /* _ are ignored -- but warned about if consecutive */
10245 if (lastub && s == lastub + 1)
10246 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10247 "Misplaced _ in number");
10251 /* 8 and 9 are not octal */
10252 case '8': case '9':
10254 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10258 case '2': case '3': case '4':
10259 case '5': case '6': case '7':
10261 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10264 case '0': case '1':
10265 b = *s++ & 15; /* ASCII digit -> value of digit */
10269 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10270 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10271 /* make sure they said 0x */
10274 b = (*s++ & 7) + 9;
10276 /* Prepare to put the digit we have onto the end
10277 of the number so far. We check for overflows.
10283 x = u << shift; /* make room for the digit */
10285 if ((x >> shift) != u
10286 && !(PL_hints & HINT_NEW_BINARY)) {
10289 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10290 "Integer overflow in %s number",
10293 u = x | b; /* add the digit to the end */
10296 n *= nvshift[shift];
10297 /* If an NV has not enough bits in its
10298 * mantissa to represent an UV this summing of
10299 * small low-order numbers is a waste of time
10300 * (because the NV cannot preserve the
10301 * low-order bits anyway): we could just
10302 * remember when did we overflow and in the
10303 * end just multiply n by the right
10311 /* if we get here, we had success: make a scalar value from
10316 /* final misplaced underbar check */
10317 if (s[-1] == '_') {
10318 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10322 if (n > 4294967295.0)
10323 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10324 "%s number > %s non-portable",
10330 if (u > 0xffffffff)
10331 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10332 "%s number > %s non-portable",
10337 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10338 sv = new_constant(start, s - start, "integer",
10339 sv, NULL, NULL, 0);
10340 else if (PL_hints & HINT_NEW_BINARY)
10341 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10346 handle decimal numbers.
10347 we're also sent here when we read a 0 as the first digit
10349 case '1': case '2': case '3': case '4': case '5':
10350 case '6': case '7': case '8': case '9': case '.':
10353 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10356 /* read next group of digits and _ and copy into d */
10357 while (isDIGIT(*s) || *s == '_') {
10358 /* skip underscores, checking for misplaced ones
10362 if (lastub && s == lastub + 1)
10363 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10364 "Misplaced _ in number");
10368 /* check for end of fixed-length buffer */
10370 Perl_croak(aTHX_ number_too_long);
10371 /* if we're ok, copy the character */
10376 /* final misplaced underbar check */
10377 if (lastub && s == lastub + 1) {
10378 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10381 /* read a decimal portion if there is one. avoid
10382 3..5 being interpreted as the number 3. followed
10385 if (*s == '.' && s[1] != '.') {
10390 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10391 "Misplaced _ in number");
10395 /* copy, ignoring underbars, until we run out of digits.
10397 for (; isDIGIT(*s) || *s == '_'; s++) {
10398 /* fixed length buffer check */
10400 Perl_croak(aTHX_ number_too_long);
10402 if (lastub && s == lastub + 1)
10403 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10404 "Misplaced _ in number");
10410 /* fractional part ending in underbar? */
10411 if (s[-1] == '_') {
10412 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10413 "Misplaced _ in number");
10415 if (*s == '.' && isDIGIT(s[1])) {
10416 /* oops, it's really a v-string, but without the "v" */
10422 /* read exponent part, if present */
10423 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
10427 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
10428 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
10430 /* stray preinitial _ */
10432 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10433 "Misplaced _ in number");
10437 /* allow positive or negative exponent */
10438 if (*s == '+' || *s == '-')
10441 /* stray initial _ */
10443 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10444 "Misplaced _ in number");
10448 /* read digits of exponent */
10449 while (isDIGIT(*s) || *s == '_') {
10452 Perl_croak(aTHX_ number_too_long);
10456 if (((lastub && s == lastub + 1) ||
10457 (!isDIGIT(s[1]) && s[1] != '_')))
10458 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10459 "Misplaced _ in number");
10467 We try to do an integer conversion first if no characters
10468 indicating "float" have been found.
10473 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10475 if (flags == IS_NUMBER_IN_UV) {
10477 sv = newSViv(uv); /* Prefer IVs over UVs. */
10480 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10481 if (uv <= (UV) IV_MIN)
10482 sv = newSViv(-(IV)uv);
10489 /* terminate the string */
10491 nv = Atof(PL_tokenbuf);
10496 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10497 const char *const key = floatit ? "float" : "integer";
10498 const STRLEN keylen = floatit ? 5 : 7;
10499 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10500 key, keylen, sv, NULL, NULL, 0);
10504 /* if it starts with a v, it could be a v-string */
10507 sv = newSV(5); /* preallocate storage space */
10508 s = scan_vstring(s, PL_bufend, sv);
10512 /* make the op for the constant and return */
10515 lvalp->opval = newSVOP(OP_CONST, 0, sv);
10517 lvalp->opval = NULL;
10523 S_scan_formline(pTHX_ register char *s)
10526 register char *eol;
10528 SV * const stuff = newSVpvs("");
10529 bool needargs = FALSE;
10530 bool eofmt = FALSE;
10532 char *tokenstart = s;
10533 SV* savewhite = NULL;
10535 if (PL_madskills) {
10536 savewhite = PL_thiswhite;
10541 PERL_ARGS_ASSERT_SCAN_FORMLINE;
10543 while (!needargs) {
10546 #ifdef PERL_STRICT_CR
10547 while (SPACE_OR_TAB(*t))
10550 while (SPACE_OR_TAB(*t) || *t == '\r')
10553 if (*t == '\n' || t == PL_bufend) {
10558 if (PL_in_eval && !PL_rsfp && !PL_parser->filtered) {
10559 eol = (char *) memchr(s,'\n',PL_bufend-s);
10564 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10566 for (t = s; t < eol; t++) {
10567 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10569 goto enough; /* ~~ must be first line in formline */
10571 if (*t == '@' || *t == '^')
10575 sv_catpvn(stuff, s, eol-s);
10576 #ifndef PERL_STRICT_CR
10577 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10578 char *end = SvPVX(stuff) + SvCUR(stuff);
10581 SvCUR_set(stuff, SvCUR(stuff) - 1);
10589 if (PL_rsfp || PL_parser->filtered) {
10592 if (PL_madskills) {
10594 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
10596 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
10599 PL_bufptr = PL_bufend;
10600 CopLINE_inc(PL_curcop);
10601 got_some = lex_next_chunk(0);
10602 CopLINE_dec(PL_curcop);
10605 tokenstart = PL_bufptr;
10613 if (SvCUR(stuff)) {
10616 PL_lex_state = LEX_NORMAL;
10617 start_force(PL_curforce);
10618 NEXTVAL_NEXTTOKE.ival = 0;
10622 PL_lex_state = LEX_FORMLINE;
10624 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10626 else if (PL_encoding)
10627 sv_recode_to_utf8(stuff, PL_encoding);
10629 start_force(PL_curforce);
10630 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10632 start_force(PL_curforce);
10633 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
10637 SvREFCNT_dec(stuff);
10639 PL_lex_formbrack = 0;
10643 if (PL_madskills) {
10645 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
10647 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
10648 PL_thiswhite = savewhite;
10655 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10658 const I32 oldsavestack_ix = PL_savestack_ix;
10659 CV* const outsidecv = PL_compcv;
10662 assert(SvTYPE(PL_compcv) == SVt_PVCV);
10664 SAVEI32(PL_subline);
10665 save_item(PL_subname);
10666 SAVESPTR(PL_compcv);
10668 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
10669 CvFLAGS(PL_compcv) |= flags;
10671 PL_subline = CopLINE(PL_curcop);
10672 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10673 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
10674 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10676 return oldsavestack_ix;
10680 #pragma segment Perl_yylex
10683 S_yywarn(pTHX_ const char *const s)
10687 PERL_ARGS_ASSERT_YYWARN;
10689 PL_in_eval |= EVAL_WARNONLY;
10691 PL_in_eval &= ~EVAL_WARNONLY;
10696 Perl_yyerror(pTHX_ const char *const s)
10699 const char *where = NULL;
10700 const char *context = NULL;
10703 int yychar = PL_parser->yychar;
10705 PERL_ARGS_ASSERT_YYERROR;
10707 if (!yychar || (yychar == ';' && !PL_rsfp))
10709 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10710 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10711 PL_oldbufptr != PL_bufptr) {
10714 The code below is removed for NetWare because it abends/crashes on NetWare
10715 when the script has error such as not having the closing quotes like:
10716 if ($var eq "value)
10717 Checking of white spaces is anyway done in NetWare code.
10720 while (isSPACE(*PL_oldoldbufptr))
10723 context = PL_oldoldbufptr;
10724 contlen = PL_bufptr - PL_oldoldbufptr;
10726 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10727 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
10730 The code below is removed for NetWare because it abends/crashes on NetWare
10731 when the script has error such as not having the closing quotes like:
10732 if ($var eq "value)
10733 Checking of white spaces is anyway done in NetWare code.
10736 while (isSPACE(*PL_oldbufptr))
10739 context = PL_oldbufptr;
10740 contlen = PL_bufptr - PL_oldbufptr;
10742 else if (yychar > 255)
10743 where = "next token ???";
10744 else if (yychar == -2) { /* YYEMPTY */
10745 if (PL_lex_state == LEX_NORMAL ||
10746 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
10747 where = "at end of line";
10748 else if (PL_lex_inpat)
10749 where = "within pattern";
10751 where = "within string";
10754 SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
10756 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
10757 else if (isPRINT_LC(yychar)) {
10758 const char string = yychar;
10759 sv_catpvn(where_sv, &string, 1);
10762 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
10763 where = SvPVX_const(where_sv);
10765 msg = sv_2mortal(newSVpv(s, 0));
10766 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
10767 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
10769 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
10771 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
10772 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
10773 Perl_sv_catpvf(aTHX_ msg,
10774 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
10775 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
10778 if (PL_in_eval & EVAL_WARNONLY) {
10779 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
10783 if (PL_error_count >= 10) {
10784 if (PL_in_eval && SvCUR(ERRSV))
10785 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
10786 SVfARG(ERRSV), OutCopFILE(PL_curcop));
10788 Perl_croak(aTHX_ "%s has too many errors.\n",
10789 OutCopFILE(PL_curcop));
10792 PL_in_my_stash = NULL;
10796 #pragma segment Main
10800 S_swallow_bom(pTHX_ U8 *s)
10803 const STRLEN slen = SvCUR(PL_linestr);
10805 PERL_ARGS_ASSERT_SWALLOW_BOM;
10809 if (s[1] == 0xFE) {
10810 /* UTF-16 little-endian? (or UTF-32LE?) */
10811 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
10812 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
10813 #ifndef PERL_NO_UTF16_FILTER
10814 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
10816 if (PL_bufend > (char*)s) {
10817 s = add_utf16_textfilter(s, TRUE);
10820 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10825 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
10826 #ifndef PERL_NO_UTF16_FILTER
10827 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
10829 if (PL_bufend > (char *)s) {
10830 s = add_utf16_textfilter(s, FALSE);
10833 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10838 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
10839 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10840 s += 3; /* UTF-8 */
10846 if (s[2] == 0xFE && s[3] == 0xFF) {
10847 /* UTF-32 big-endian */
10848 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
10851 else if (s[2] == 0 && s[3] != 0) {
10854 * are a good indicator of UTF-16BE. */
10855 #ifndef PERL_NO_UTF16_FILTER
10856 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
10857 s = add_utf16_textfilter(s, FALSE);
10859 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10865 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
10866 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10867 s += 4; /* UTF-8 */
10873 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
10876 * are a good indicator of UTF-16LE. */
10877 #ifndef PERL_NO_UTF16_FILTER
10878 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
10879 s = add_utf16_textfilter(s, TRUE);
10881 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10889 #ifndef PERL_NO_UTF16_FILTER
10891 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
10894 SV *const filter = FILTER_DATA(idx);
10895 /* We re-use this each time round, throwing the contents away before we
10897 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
10898 SV *const utf8_buffer = filter;
10899 IV status = IoPAGE(filter);
10900 const bool reverse = cBOOL(IoLINES(filter));
10903 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
10905 /* As we're automatically added, at the lowest level, and hence only called
10906 from this file, we can be sure that we're not called in block mode. Hence
10907 don't bother writing code to deal with block mode. */
10909 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
10912 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
10914 DEBUG_P(PerlIO_printf(Perl_debug_log,
10915 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10916 FPTR2DPTR(void *, S_utf16_textfilter),
10917 reverse ? 'l' : 'b', idx, maxlen, status,
10918 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10925 /* First, look in our buffer of existing UTF-8 data: */
10926 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
10930 } else if (status == 0) {
10932 IoPAGE(filter) = 0;
10933 nl = SvEND(utf8_buffer);
10936 STRLEN got = nl - SvPVX(utf8_buffer);
10937 /* Did we have anything to append? */
10939 sv_catpvn(sv, SvPVX(utf8_buffer), got);
10940 /* Everything else in this code works just fine if SVp_POK isn't
10941 set. This, however, needs it, and we need it to work, else
10942 we loop infinitely because the buffer is never consumed. */
10943 sv_chop(utf8_buffer, nl);
10947 /* OK, not a complete line there, so need to read some more UTF-16.
10948 Read an extra octect if the buffer currently has an odd number. */
10952 if (SvCUR(utf16_buffer) >= 2) {
10953 /* Location of the high octet of the last complete code point.
10954 Gosh, UTF-16 is a pain. All the benefits of variable length,
10955 *coupled* with all the benefits of partial reads and
10957 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
10958 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
10960 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
10964 /* We have the first half of a surrogate. Read more. */
10965 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
10968 status = FILTER_READ(idx + 1, utf16_buffer,
10969 160 + (SvCUR(utf16_buffer) & 1));
10970 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
10971 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
10974 IoPAGE(filter) = status;
10979 chars = SvCUR(utf16_buffer) >> 1;
10980 have = SvCUR(utf8_buffer);
10981 SvGROW(utf8_buffer, have + chars * 3 + 1);
10984 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
10985 (U8*)SvPVX_const(utf8_buffer) + have,
10986 chars * 2, &newlen);
10988 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
10989 (U8*)SvPVX_const(utf8_buffer) + have,
10990 chars * 2, &newlen);
10992 SvCUR_set(utf8_buffer, have + newlen);
10995 /* No need to keep this SV "well-formed" with a '\0' after the end, as
10996 it's private to us, and utf16_to_utf8{,reversed} take a
10997 (pointer,length) pair, rather than a NUL-terminated string. */
10998 if(SvCUR(utf16_buffer) & 1) {
10999 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11000 SvCUR_set(utf16_buffer, 1);
11002 SvCUR_set(utf16_buffer, 0);
11005 DEBUG_P(PerlIO_printf(Perl_debug_log,
11006 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11008 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11009 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11014 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11016 SV *filter = filter_add(S_utf16_textfilter, NULL);
11018 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11020 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11021 sv_setpvs(filter, "");
11022 IoLINES(filter) = reversed;
11023 IoPAGE(filter) = 1; /* Not EOF */
11025 /* Sadly, we have to return a valid pointer, come what may, so we have to
11026 ignore any error return from this. */
11027 SvCUR_set(PL_linestr, 0);
11028 if (FILTER_READ(0, PL_linestr, 0)) {
11029 SvUTF8_on(PL_linestr);
11031 SvUTF8_on(PL_linestr);
11033 PL_bufend = SvEND(PL_linestr);
11034 return (U8*)SvPVX(PL_linestr);
11039 Returns a pointer to the next character after the parsed
11040 vstring, as well as updating the passed in sv.
11042 Function must be called like
11045 s = scan_vstring(s,e,sv);
11047 where s and e are the start and end of the string.
11048 The sv should already be large enough to store the vstring
11049 passed in, for performance reasons.
11054 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11057 const char *pos = s;
11058 const char *start = s;
11060 PERL_ARGS_ASSERT_SCAN_VSTRING;
11062 if (*pos == 'v') pos++; /* get past 'v' */
11063 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11065 if ( *pos != '.') {
11066 /* this may not be a v-string if followed by => */
11067 const char *next = pos;
11068 while (next < e && isSPACE(*next))
11070 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11071 /* return string not v-string */
11072 sv_setpvn(sv,(char *)s,pos-s);
11073 return (char *)pos;
11077 if (!isALPHA(*pos)) {
11078 U8 tmpbuf[UTF8_MAXBYTES+1];
11081 s++; /* get past 'v' */
11086 /* this is atoi() that tolerates underscores */
11089 const char *end = pos;
11091 while (--end >= s) {
11093 const UV orev = rev;
11094 rev += (*end - '0') * mult;
11097 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11098 "Integer overflow in decimal number");
11102 if (rev > 0x7FFFFFFF)
11103 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11105 /* Append native character for the rev point */
11106 tmpend = uvchr_to_utf8(tmpbuf, rev);
11107 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11108 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11110 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11116 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11120 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11127 Perl_keyword_plugin_standard(pTHX_
11128 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11130 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11131 PERL_UNUSED_CONTEXT;
11132 PERL_UNUSED_ARG(keyword_ptr);
11133 PERL_UNUSED_ARG(keyword_len);
11134 PERL_UNUSED_ARG(op_ptr);
11135 return KEYWORD_PLUGIN_DECLINE;
11138 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11140 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11142 SAVEI32(PL_lex_brackets);
11143 if (PL_lex_brackets > 100)
11144 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11145 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11146 SAVEI32(PL_lex_allbrackets);
11147 PL_lex_allbrackets = 0;
11148 SAVEI8(PL_lex_fakeeof);
11149 PL_lex_fakeeof = (U8)fakeeof;
11150 if(yyparse(gramtype) && !PL_parser->error_count)
11151 qerror(Perl_mess(aTHX_ "Parse error"));
11154 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11156 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11160 SAVEVPTR(PL_eval_root);
11161 PL_eval_root = NULL;
11162 parse_recdescent(gramtype, fakeeof);
11168 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11170 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11173 if (flags & ~PARSE_OPTIONAL)
11174 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11175 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11176 if (!exprop && !(flags & PARSE_OPTIONAL)) {
11177 if (!PL_parser->error_count)
11178 qerror(Perl_mess(aTHX_ "Parse error"));
11179 exprop = newOP(OP_NULL, 0);
11185 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11187 Parse a Perl arithmetic expression. This may contain operators of precedence
11188 down to the bit shift operators. The expression must be followed (and thus
11189 terminated) either by a comparison or lower-precedence operator or by
11190 something that would normally terminate an expression such as semicolon.
11191 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11192 otherwise it is mandatory. It is up to the caller to ensure that the
11193 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11194 the source of the code to be parsed and the lexical context for the
11197 The op tree representing the expression is returned. If an optional
11198 expression is absent, a null pointer is returned, otherwise the pointer
11201 If an error occurs in parsing or compilation, in most cases a valid op
11202 tree is returned anyway. The error is reflected in the parser state,
11203 normally resulting in a single exception at the top level of parsing
11204 which covers all the compilation errors that occurred. Some compilation
11205 errors, however, will throw an exception immediately.
11211 Perl_parse_arithexpr(pTHX_ U32 flags)
11213 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11217 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11219 Parse a Perl term expression. This may contain operators of precedence
11220 down to the assignment operators. The expression must be followed (and thus
11221 terminated) either by a comma or lower-precedence operator or by
11222 something that would normally terminate an expression such as semicolon.
11223 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11224 otherwise it is mandatory. It is up to the caller to ensure that the
11225 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11226 the source of the code to be parsed and the lexical context for the
11229 The op tree representing the expression is returned. If an optional
11230 expression is absent, a null pointer is returned, otherwise the pointer
11233 If an error occurs in parsing or compilation, in most cases a valid op
11234 tree is returned anyway. The error is reflected in the parser state,
11235 normally resulting in a single exception at the top level of parsing
11236 which covers all the compilation errors that occurred. Some compilation
11237 errors, however, will throw an exception immediately.
11243 Perl_parse_termexpr(pTHX_ U32 flags)
11245 return parse_expr(LEX_FAKEEOF_COMMA, flags);
11249 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11251 Parse a Perl list expression. This may contain operators of precedence
11252 down to the comma operator. The expression must be followed (and thus
11253 terminated) either by a low-precedence logic operator such as C<or> or by
11254 something that would normally terminate an expression such as semicolon.
11255 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11256 otherwise it is mandatory. It is up to the caller to ensure that the
11257 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11258 the source of the code to be parsed and the lexical context for the
11261 The op tree representing the expression is returned. If an optional
11262 expression is absent, a null pointer is returned, otherwise the pointer
11265 If an error occurs in parsing or compilation, in most cases a valid op
11266 tree is returned anyway. The error is reflected in the parser state,
11267 normally resulting in a single exception at the top level of parsing
11268 which covers all the compilation errors that occurred. Some compilation
11269 errors, however, will throw an exception immediately.
11275 Perl_parse_listexpr(pTHX_ U32 flags)
11277 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11281 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
11283 Parse a single complete Perl expression. This allows the full
11284 expression grammar, including the lowest-precedence operators such
11285 as C<or>. The expression must be followed (and thus terminated) by a
11286 token that an expression would normally be terminated by: end-of-file,
11287 closing bracketing punctuation, semicolon, or one of the keywords that
11288 signals a postfix expression-statement modifier. If I<flags> includes
11289 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11290 mandatory. It is up to the caller to ensure that the dynamic parser
11291 state (L</PL_parser> et al) is correctly set to reflect the source of
11292 the code to be parsed and the lexical context for the expression.
11294 The op tree representing the expression is returned. If an optional
11295 expression is absent, a null pointer is returned, otherwise the pointer
11298 If an error occurs in parsing or compilation, in most cases a valid op
11299 tree is returned anyway. The error is reflected in the parser state,
11300 normally resulting in a single exception at the top level of parsing
11301 which covers all the compilation errors that occurred. Some compilation
11302 errors, however, will throw an exception immediately.
11308 Perl_parse_fullexpr(pTHX_ U32 flags)
11310 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11314 =for apidoc Amx|OP *|parse_block|U32 flags
11316 Parse a single complete Perl code block. This consists of an opening
11317 brace, a sequence of statements, and a closing brace. The block
11318 constitutes a lexical scope, so C<my> variables and various compile-time
11319 effects can be contained within it. It is up to the caller to ensure
11320 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11321 reflect the source of the code to be parsed and the lexical context for
11324 The op tree representing the code block is returned. This is always a
11325 real op, never a null pointer. It will normally be a C<lineseq> list,
11326 including C<nextstate> or equivalent ops. No ops to construct any kind
11327 of runtime scope are included by virtue of it being a block.
11329 If an error occurs in parsing or compilation, in most cases a valid op
11330 tree (most likely null) is returned anyway. The error is reflected in
11331 the parser state, normally resulting in a single exception at the top
11332 level of parsing which covers all the compilation errors that occurred.
11333 Some compilation errors, however, will throw an exception immediately.
11335 The I<flags> parameter is reserved for future use, and must always
11342 Perl_parse_block(pTHX_ U32 flags)
11345 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11346 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11350 =for apidoc Amx|OP *|parse_barestmt|U32 flags
11352 Parse a single unadorned Perl statement. This may be a normal imperative
11353 statement or a declaration that has compile-time effect. It does not
11354 include any label or other affixture. It is up to the caller to ensure
11355 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11356 reflect the source of the code to be parsed and the lexical context for
11359 The op tree representing the statement is returned. This may be a
11360 null pointer if the statement is null, for example if it was actually
11361 a subroutine definition (which has compile-time side effects). If not
11362 null, it will be ops directly implementing the statement, suitable to
11363 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
11364 equivalent op (except for those embedded in a scope contained entirely
11365 within the statement).
11367 If an error occurs in parsing or compilation, in most cases a valid op
11368 tree (most likely null) is returned anyway. The error is reflected in
11369 the parser state, normally resulting in a single exception at the top
11370 level of parsing which covers all the compilation errors that occurred.
11371 Some compilation errors, however, will throw an exception immediately.
11373 The I<flags> parameter is reserved for future use, and must always
11380 Perl_parse_barestmt(pTHX_ U32 flags)
11383 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11384 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11388 =for apidoc Amx|SV *|parse_label|U32 flags
11390 Parse a single label, possibly optional, of the type that may prefix a
11391 Perl statement. It is up to the caller to ensure that the dynamic parser
11392 state (L</PL_parser> et al) is correctly set to reflect the source of
11393 the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
11394 label is optional, otherwise it is mandatory.
11396 The name of the label is returned in the form of a fresh scalar. If an
11397 optional label is absent, a null pointer is returned.
11399 If an error occurs in parsing, which can only occur if the label is
11400 mandatory, a valid label is returned anyway. The error is reflected in
11401 the parser state, normally resulting in a single exception at the top
11402 level of parsing which covers all the compilation errors that occurred.
11408 Perl_parse_label(pTHX_ U32 flags)
11410 if (flags & ~PARSE_OPTIONAL)
11411 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11412 if (PL_lex_state == LEX_KNOWNEXT) {
11413 PL_parser->yychar = yylex();
11414 if (PL_parser->yychar == LABEL) {
11415 char *lpv = pl_yylval.pval;
11416 STRLEN llen = strlen(lpv);
11418 PL_parser->yychar = YYEMPTY;
11419 lsv = newSV_type(SVt_PV);
11420 SvPV_set(lsv, lpv);
11421 SvCUR_set(lsv, llen);
11422 SvLEN_set(lsv, llen+1);
11432 STRLEN wlen, bufptr_pos;
11436 if (!isIDFIRST_A(c))
11440 } while(isWORDCHAR_A(c));
11442 if (word_takes_any_delimeter(s, wlen))
11444 bufptr_pos = s - SvPVX(PL_linestr);
11446 lex_read_space(LEX_KEEP_PREVIOUS);
11448 s = SvPVX(PL_linestr) + bufptr_pos;
11449 if (t[0] == ':' && t[1] != ':') {
11450 PL_oldoldbufptr = PL_oldbufptr;
11453 return newSVpvn(s, wlen);
11457 if (flags & PARSE_OPTIONAL) {
11460 qerror(Perl_mess(aTHX_ "Parse error"));
11461 return newSVpvs("x");
11468 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
11470 Parse a single complete Perl statement. This may be a normal imperative
11471 statement or a declaration that has compile-time effect, and may include
11472 optional labels. It is up to the caller to ensure that the dynamic
11473 parser state (L</PL_parser> et al) is correctly set to reflect the source
11474 of the code to be parsed and the lexical context for the statement.
11476 The op tree representing the statement is returned. This may be a
11477 null pointer if the statement is null, for example if it was actually
11478 a subroutine definition (which has compile-time side effects). If not
11479 null, it will be the result of a L</newSTATEOP> call, normally including
11480 a C<nextstate> or equivalent op.
11482 If an error occurs in parsing or compilation, in most cases a valid op
11483 tree (most likely null) is returned anyway. The error is reflected in
11484 the parser state, normally resulting in a single exception at the top
11485 level of parsing which covers all the compilation errors that occurred.
11486 Some compilation errors, however, will throw an exception immediately.
11488 The I<flags> parameter is reserved for future use, and must always
11495 Perl_parse_fullstmt(pTHX_ U32 flags)
11498 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11499 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11503 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
11505 Parse a sequence of zero or more Perl statements. These may be normal
11506 imperative statements, including optional labels, or declarations
11507 that have compile-time effect, or any mixture thereof. The statement
11508 sequence ends when a closing brace or end-of-file is encountered in a
11509 place where a new statement could have validly started. It is up to
11510 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11511 is correctly set to reflect the source of the code to be parsed and the
11512 lexical context for the statements.
11514 The op tree representing the statement sequence is returned. This may
11515 be a null pointer if the statements were all null, for example if there
11516 were no statements or if there were only subroutine definitions (which
11517 have compile-time side effects). If not null, it will be a C<lineseq>
11518 list, normally including C<nextstate> or equivalent ops.
11520 If an error occurs in parsing or compilation, in most cases a valid op
11521 tree is returned anyway. The error is reflected in the parser state,
11522 normally resulting in a single exception at the top level of parsing
11523 which covers all the compilation errors that occurred. Some compilation
11524 errors, however, will throw an exception immediately.
11526 The I<flags> parameter is reserved for future use, and must always
11533 Perl_parse_stmtseq(pTHX_ U32 flags)
11538 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11539 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
11540 c = lex_peek_unichar(0);
11541 if (c != -1 && c != /*{*/'}')
11542 qerror(Perl_mess(aTHX_ "Parse error"));
11547 Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
11549 PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
11550 deprecate("qw(...) as parentheses");
11551 force_next((4<<24)|')');
11552 if (qwlist->op_type == OP_STUB) {
11556 start_force(PL_curforce);
11557 NEXTVAL_NEXTTOKE.opval = qwlist;
11560 force_next((2<<24)|'(');
11565 * c-indentation-style: bsd
11566 * c-basic-offset: 4
11567 * indent-tabs-mode: t
11570 * ex: set ts=8 sts=4 sw=4 noet: