3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 =head1 Lexer interface
26 This is the lower layer of the Perl parser, managing characters and tokens.
28 =for apidoc AmU|yy_parser *|PL_parser
30 Pointer to a structure encapsulating the state of the parsing operation
31 currently in progress. The pointer can be locally changed to perform
32 a nested parse without interfering with the state of an outer parse.
33 Individual members of C<PL_parser> have their own documentation.
39 #define PERL_IN_TOKE_C
41 #include "dquote_inline.h"
43 #define new_constant(a,b,c,d,e,f,g) \
44 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
46 #define pl_yylval (PL_parser->yylval)
48 /* XXX temporary backwards compatibility */
49 #define PL_lex_brackets (PL_parser->lex_brackets)
50 #define PL_lex_allbrackets (PL_parser->lex_allbrackets)
51 #define PL_lex_fakeeof (PL_parser->lex_fakeeof)
52 #define PL_lex_brackstack (PL_parser->lex_brackstack)
53 #define PL_lex_casemods (PL_parser->lex_casemods)
54 #define PL_lex_casestack (PL_parser->lex_casestack)
55 #define PL_lex_dojoin (PL_parser->lex_dojoin)
56 #define PL_lex_formbrack (PL_parser->lex_formbrack)
57 #define PL_lex_inpat (PL_parser->lex_inpat)
58 #define PL_lex_inwhat (PL_parser->lex_inwhat)
59 #define PL_lex_op (PL_parser->lex_op)
60 #define PL_lex_repl (PL_parser->lex_repl)
61 #define PL_lex_starts (PL_parser->lex_starts)
62 #define PL_lex_stuff (PL_parser->lex_stuff)
63 #define PL_multi_start (PL_parser->multi_start)
64 #define PL_multi_open (PL_parser->multi_open)
65 #define PL_multi_close (PL_parser->multi_close)
66 #define PL_preambled (PL_parser->preambled)
67 #define PL_linestr (PL_parser->linestr)
68 #define PL_expect (PL_parser->expect)
69 #define PL_copline (PL_parser->copline)
70 #define PL_bufptr (PL_parser->bufptr)
71 #define PL_oldbufptr (PL_parser->oldbufptr)
72 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
73 #define PL_linestart (PL_parser->linestart)
74 #define PL_bufend (PL_parser->bufend)
75 #define PL_last_uni (PL_parser->last_uni)
76 #define PL_last_lop (PL_parser->last_lop)
77 #define PL_last_lop_op (PL_parser->last_lop_op)
78 #define PL_lex_state (PL_parser->lex_state)
79 #define PL_rsfp (PL_parser->rsfp)
80 #define PL_rsfp_filters (PL_parser->rsfp_filters)
81 #define PL_in_my (PL_parser->in_my)
82 #define PL_in_my_stash (PL_parser->in_my_stash)
83 #define PL_tokenbuf (PL_parser->tokenbuf)
84 #define PL_multi_end (PL_parser->multi_end)
85 #define PL_error_count (PL_parser->error_count)
87 # define PL_nexttoke (PL_parser->nexttoke)
88 # define PL_nexttype (PL_parser->nexttype)
89 # define PL_nextval (PL_parser->nextval)
92 #define SvEVALED(sv) \
93 (SvTYPE(sv) >= SVt_PVNV \
94 && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen)
96 static const char* const ident_too_long = "Identifier too long";
98 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
100 #define XENUMMASK 0x3f
101 #define XFAKEEOF 0x40
102 #define XFAKEBRACK 0x80
104 #ifdef USE_UTF8_SCRIPTS
105 # define UTF cBOOL(!IN_BYTES)
107 # define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
110 /* The maximum number of characters preceding the unrecognized one to display */
111 #define UNRECOGNIZED_PRECEDE_COUNT 10
113 /* In variables named $^X, these are the legal values for X.
114 * 1999-02-27 mjd-perl-patch@plover.com */
115 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
117 #define SPACE_OR_TAB(c) isBLANK_A(c)
119 #define HEXFP_PEEK(s) \
121 (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
122 isALPHA_FOLD_EQ(s[0], 'p'))
124 /* LEX_* are values for PL_lex_state, the state of the lexer.
125 * They are arranged oddly so that the guard on the switch statement
126 * can get by with a single comparison (if the compiler is smart enough).
128 * These values refer to the various states within a sublex parse,
129 * i.e. within a double quotish string
132 /* #define LEX_NOTPARSING 11 is done in perl.h. */
134 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
135 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
136 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
137 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
138 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
140 /* at end of code, eg "$x" followed by: */
141 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
142 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
144 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
145 string or after \E, $foo, etc */
146 #define LEX_INTERPCONST 2 /* NOT USED */
147 #define LEX_FORMLINE 1 /* expecting a format line */
151 static const char* const lex_state_names[] = {
166 #include "keywords.h"
168 /* CLINE is a macro that ensures PL_copline has a sane value */
170 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
173 * Convenience functions to return different tokens and prime the
174 * lexer for the next token. They all take an argument.
176 * TOKEN : generic token (used for '(', DOLSHARP, etc)
177 * OPERATOR : generic operator
178 * AOPERATOR : assignment operator
179 * PREBLOCK : beginning the block after an if, while, foreach, ...
180 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
181 * PREREF : *EXPR where EXPR is not a simple identifier
182 * TERM : expression term
183 * POSTDEREF : postfix dereference (->$* ->@[...] etc.)
184 * LOOPX : loop exiting command (goto, last, dump, etc)
185 * FTST : file test operator
186 * FUN0 : zero-argument function
187 * FUN0OP : zero-argument function, with its op created in this file
188 * FUN1 : not used, except for not, which isn't a UNIOP
189 * BOop : bitwise or or xor
191 * BCop : bitwise complement
192 * SHop : shift operator
193 * PWop : power operator
194 * PMop : pattern-matching operator
195 * Aop : addition-level operator
196 * AopNOASSIGN : addition-level operator that is never part of .=
197 * Mop : multiplication-level operator
198 * Eop : equality-testing operator
199 * Rop : relational operator <= != gt
201 * Also see LOP and lop() below.
204 #ifdef DEBUGGING /* Serve -DT. */
205 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
207 # define REPORT(retval) (retval)
210 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
211 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
212 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
213 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
214 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
215 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
216 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
217 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
218 #define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \
220 PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
222 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
223 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
224 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
225 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
226 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
227 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
228 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
230 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
231 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
232 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
233 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
234 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
235 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
236 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
237 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
239 /* This bit of chicanery makes a unary function followed by
240 * a parenthesis into a function with one argument, highest precedence.
241 * The UNIDOR macro is for unary functions that can be followed by the //
242 * operator (such as C<shift // 0>).
244 #define UNI3(f,x,have_x) { \
245 pl_yylval.ival = f; \
246 if (have_x) PL_expect = x; \
248 PL_last_uni = PL_oldbufptr; \
249 PL_last_lop_op = (f) < 0 ? -(f) : (f); \
251 return REPORT( (int)FUNC1 ); \
253 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
255 #define UNI(f) UNI3(f,XTERM,1)
256 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
257 #define UNIPROTO(f,optional) { \
258 if (optional) PL_last_uni = PL_oldbufptr; \
262 #define UNIBRACK(f) UNI3(f,0,0)
264 /* grandfather return to old style */
267 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
268 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
269 pl_yylval.ival = (f); \
275 #define COPLINE_INC_WITH_HERELINES \
277 CopLINE_inc(PL_curcop); \
278 if (PL_parser->herelines) \
279 CopLINE(PL_curcop) += PL_parser->herelines, \
280 PL_parser->herelines = 0; \
282 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
283 * is no sublex_push to follow. */
284 #define COPLINE_SET_FROM_MULTI_END \
286 CopLINE_set(PL_curcop, PL_multi_end); \
287 if (PL_multi_end != PL_multi_start) \
288 PL_parser->herelines = 0; \
294 /* how to interpret the pl_yylval associated with the token */
298 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
303 static struct debug_tokens {
305 enum token_type type;
307 } const debug_tokens[] =
309 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
310 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
311 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
312 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
313 { ARROW, TOKENTYPE_NONE, "ARROW" },
314 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
315 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
316 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
317 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
318 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
319 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
320 { DO, TOKENTYPE_NONE, "DO" },
321 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
322 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
323 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
324 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
325 { ELSE, TOKENTYPE_NONE, "ELSE" },
326 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
327 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
328 { FOR, TOKENTYPE_IVAL, "FOR" },
329 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
330 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
331 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
332 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
333 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
334 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
335 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
336 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
337 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
338 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
339 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
340 { IF, TOKENTYPE_IVAL, "IF" },
341 { LABEL, TOKENTYPE_PVAL, "LABEL" },
342 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
343 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
344 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
345 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
346 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
347 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
348 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
349 { MY, TOKENTYPE_IVAL, "MY" },
350 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
351 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
352 { OROP, TOKENTYPE_IVAL, "OROP" },
353 { OROR, TOKENTYPE_NONE, "OROR" },
354 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
355 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
356 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
357 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
358 { POSTJOIN, TOKENTYPE_NONE, "POSTJOIN" },
359 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
360 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
361 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
362 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
363 { PREINC, TOKENTYPE_NONE, "PREINC" },
364 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
365 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
366 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
367 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
368 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
369 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
370 { SUB, TOKENTYPE_NONE, "SUB" },
371 { THING, TOKENTYPE_OPVAL, "THING" },
372 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
373 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
374 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
375 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
376 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
377 { USE, TOKENTYPE_IVAL, "USE" },
378 { WHEN, TOKENTYPE_IVAL, "WHEN" },
379 { WHILE, TOKENTYPE_IVAL, "WHILE" },
380 { BAREWORD, TOKENTYPE_OPVAL, "BAREWORD" },
381 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
382 { 0, TOKENTYPE_NONE, NULL }
385 /* dump the returned token in rv, plus any optional arg in pl_yylval */
388 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
390 PERL_ARGS_ASSERT_TOKEREPORT;
393 const char *name = NULL;
394 enum token_type type = TOKENTYPE_NONE;
395 const struct debug_tokens *p;
396 SV* const report = newSVpvs("<== ");
398 for (p = debug_tokens; p->token; p++) {
399 if (p->token == (int)rv) {
406 Perl_sv_catpv(aTHX_ report, name);
407 else if (isGRAPH(rv))
409 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
411 sv_catpvs(report, " (pending identifier)");
414 sv_catpvs(report, "EOF");
416 Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
421 Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
423 case TOKENTYPE_OPNUM:
424 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
425 PL_op_name[lvalp->ival]);
428 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
430 case TOKENTYPE_OPVAL:
432 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
433 PL_op_name[lvalp->opval->op_type]);
434 if (lvalp->opval->op_type == OP_CONST) {
435 Perl_sv_catpvf(aTHX_ report, " %s",
436 SvPEEK(cSVOPx_sv(lvalp->opval)));
441 sv_catpvs(report, "(opval=null)");
444 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
450 /* print the buffer with suitable escapes */
453 S_printbuf(pTHX_ const char *const fmt, const char *const s)
455 SV* const tmp = newSVpvs("");
457 PERL_ARGS_ASSERT_PRINTBUF;
459 GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */
460 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
468 S_deprecate_commaless_var_list(pTHX) {
470 deprecate("comma-less variable list");
471 return REPORT(','); /* grandfather non-comma-format format */
477 * This subroutine looks for an '=' next to the operator that has just been
478 * parsed and turns it into an ASSIGNOP if it finds one.
482 S_ao(pTHX_ int toketype)
484 if (*PL_bufptr == '=') {
486 if (toketype == ANDAND)
487 pl_yylval.ival = OP_ANDASSIGN;
488 else if (toketype == OROR)
489 pl_yylval.ival = OP_ORASSIGN;
490 else if (toketype == DORDOR)
491 pl_yylval.ival = OP_DORASSIGN;
494 return REPORT(toketype);
499 * When Perl expects an operator and finds something else, no_op
500 * prints the warning. It always prints "<something> found where
501 * operator expected. It prints "Missing semicolon on previous line?"
502 * if the surprise occurs at the start of the line. "do you need to
503 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
504 * where the compiler doesn't know if foo is a method call or a function.
505 * It prints "Missing operator before end of line" if there's nothing
506 * after the missing operator, or "... before <...>" if there is something
507 * after the missing operator.
509 * PL_bufptr is expected to point to the start of the thing that was found,
510 * and s after the next token or partial token.
514 S_no_op(pTHX_ const char *const what, char *s)
516 char * const oldbp = PL_bufptr;
517 const bool is_first = (PL_oldbufptr == PL_linestart);
519 PERL_ARGS_ASSERT_NO_OP;
525 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
526 if (ckWARN_d(WARN_SYNTAX)) {
528 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
529 "\t(Missing semicolon on previous line?)\n");
530 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
532 for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
533 t += UTF ? UTF8SKIP(t) : 1)
535 if (t < PL_bufptr && isSPACE(*t))
536 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
537 "\t(Do you need to predeclare %" UTF8f "?)\n",
538 UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
542 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
543 "\t(Missing operator before %" UTF8f "?)\n",
544 UTF8fARG(UTF, s - oldbp, oldbp));
552 * Complain about missing quote/regexp/heredoc terminator.
553 * If it's called with NULL then it cauterizes the line buffer.
554 * If we're in a delimited string and the delimiter is a control
555 * character, it's reformatted into a two-char sequence like ^C.
560 S_missingterm(pTHX_ char *s)
562 char tmpbuf[UTF8_MAXBYTES + 1];
567 char * const nl = strrchr(s,'\n');
572 else if (PL_multi_close < 32) {
574 tmpbuf[1] = (char)toCTRL(PL_multi_close);
579 if (LIKELY(PL_multi_close < 256)) {
580 *tmpbuf = (char)PL_multi_close;
585 *uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close) = 0;
589 q = strchr(s,'"') ? '\'' : '"';
590 sv = sv_2mortal(newSVpv(s,0));
593 Perl_croak(aTHX_ "Can't find string terminator %c%" SVf
594 "%c anywhere before EOF",q,SVfARG(sv),q);
600 * Check whether the named feature is enabled.
603 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
605 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
607 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
609 assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
611 if (namelen > MAX_FEATURE_LEN)
613 memcpy(&he_name[8], name, namelen);
615 return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
616 REFCOUNTED_HE_EXISTS));
620 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
621 * utf16-to-utf8-reversed.
624 #ifdef PERL_CR_FILTER
628 const char *s = SvPVX_const(sv);
629 const char * const e = s + SvCUR(sv);
631 PERL_ARGS_ASSERT_STRIP_RETURN;
633 /* outer loop optimized to do nothing if there are no CR-LFs */
635 if (*s++ == '\r' && *s == '\n') {
636 /* hit a CR-LF, need to copy the rest */
640 if (*s == '\r' && s[1] == '\n')
651 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
653 const I32 count = FILTER_READ(idx+1, sv, maxlen);
654 if (count > 0 && !maxlen)
661 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
663 Creates and initialises a new lexer/parser state object, supplying
664 a context in which to lex and parse from a new source of Perl code.
665 A pointer to the new state object is placed in L</PL_parser>. An entry
666 is made on the save stack so that upon unwinding the new state object
667 will be destroyed and the former value of L</PL_parser> will be restored.
668 Nothing else need be done to clean up the parsing context.
670 The code to be parsed comes from C<line> and C<rsfp>. C<line>, if
671 non-null, provides a string (in SV form) containing code to be parsed.
672 A copy of the string is made, so subsequent modification of C<line>
673 does not affect parsing. C<rsfp>, if non-null, provides an input stream
674 from which code will be read to be parsed. If both are non-null, the
675 code in C<line> comes first and must consist of complete lines of input,
676 and C<rsfp> supplies the remainder of the source.
678 The C<flags> parameter is reserved for future use. Currently it is only
679 used by perl internally, so extensions should always pass zero.
684 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
685 can share filters with the current parser.
686 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
687 caller, hence isn't owned by the parser, so shouldn't be closed on parser
688 destruction. This is used to handle the case of defaulting to reading the
689 script from the standard input because no filename was given on the command
690 line (without getting confused by situation where STDIN has been closed, so
691 the script handle is opened on fd 0) */
694 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
696 const char *s = NULL;
697 yy_parser *parser, *oparser;
698 if (flags && flags & ~LEX_START_FLAGS)
699 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
701 /* create and initialise a parser */
703 Newxz(parser, 1, yy_parser);
704 parser->old_parser = oparser = PL_parser;
707 parser->stack = NULL;
708 parser->stack_max1 = NULL;
711 /* on scope exit, free this parser and restore any outer one */
713 parser->saved_curcop = PL_curcop;
715 /* initialise lexer state */
717 parser->nexttoke = 0;
718 parser->error_count = oparser ? oparser->error_count : 0;
719 parser->copline = parser->preambling = NOLINE;
720 parser->lex_state = LEX_NORMAL;
721 parser->expect = XSTATE;
723 parser->rsfp_filters =
724 !(flags & LEX_START_SAME_FILTER) || !oparser
726 : MUTABLE_AV(SvREFCNT_inc(
727 oparser->rsfp_filters
728 ? oparser->rsfp_filters
729 : (oparser->rsfp_filters = newAV())
732 Newx(parser->lex_brackstack, 120, char);
733 Newx(parser->lex_casestack, 12, char);
734 *parser->lex_casestack = '\0';
735 Newxz(parser->lex_shared, 1, LEXSHARED);
739 s = SvPV_const(line, len);
740 parser->linestr = flags & LEX_START_COPIED
741 ? SvREFCNT_inc_simple_NN(line)
742 : newSVpvn_flags(s, len, SvUTF8(line));
744 sv_catpvs(parser->linestr, "\n;");
746 parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
748 parser->oldoldbufptr =
751 parser->linestart = SvPVX(parser->linestr);
752 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
753 parser->last_lop = parser->last_uni = NULL;
755 STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
756 |LEX_DONT_CLOSE_RSFP));
757 parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
758 |LEX_DONT_CLOSE_RSFP));
760 parser->in_pod = parser->filtered = 0;
764 /* delete a parser object */
767 Perl_parser_free(pTHX_ const yy_parser *parser)
769 PERL_ARGS_ASSERT_PARSER_FREE;
771 PL_curcop = parser->saved_curcop;
772 SvREFCNT_dec(parser->linestr);
774 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
775 PerlIO_clearerr(parser->rsfp);
776 else if (parser->rsfp && (!parser->old_parser
777 || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
778 PerlIO_close(parser->rsfp);
779 SvREFCNT_dec(parser->rsfp_filters);
780 SvREFCNT_dec(parser->lex_stuff);
781 SvREFCNT_dec(parser->lex_sub_repl);
783 Safefree(parser->lex_brackstack);
784 Safefree(parser->lex_casestack);
785 Safefree(parser->lex_shared);
786 PL_parser = parser->old_parser;
791 Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab)
793 I32 nexttoke = parser->nexttoke;
794 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
796 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
797 && parser->nextval[nexttoke].opval
798 && parser->nextval[nexttoke].opval->op_slabbed
799 && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
800 op_free(parser->nextval[nexttoke].opval);
801 parser->nextval[nexttoke].opval = NULL;
808 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
810 Buffer scalar containing the chunk currently under consideration of the
811 text currently being lexed. This is always a plain string scalar (for
812 which C<SvPOK> is true). It is not intended to be used as a scalar by
813 normal scalar means; instead refer to the buffer directly by the pointer
814 variables described below.
816 The lexer maintains various C<char*> pointers to things in the
817 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
818 reallocated, all of these pointers must be updated. Don't attempt to
819 do this manually, but rather use L</lex_grow_linestr> if you need to
820 reallocate the buffer.
822 The content of the text chunk in the buffer is commonly exactly one
823 complete line of input, up to and including a newline terminator,
824 but there are situations where it is otherwise. The octets of the
825 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
826 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
827 flag on this scalar, which may disagree with it.
829 For direct examination of the buffer, the variable
830 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
831 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
832 of these pointers is usually preferable to examination of the scalar
833 through normal scalar means.
835 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
837 Direct pointer to the end of the chunk of text currently being lexed, the
838 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
839 + SvCUR(PL_parser-E<gt>linestr)>. A C<NUL> character (zero octet) is
840 always located at the end of the buffer, and does not count as part of
841 the buffer's contents.
843 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
845 Points to the current position of lexing inside the lexer buffer.
846 Characters around this point may be freely examined, within
847 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
848 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
849 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
851 Lexing code (whether in the Perl core or not) moves this pointer past
852 the characters that it consumes. It is also expected to perform some
853 bookkeeping whenever a newline character is consumed. This movement
854 can be more conveniently performed by the function L</lex_read_to>,
855 which handles newlines appropriately.
857 Interpretation of the buffer's octets can be abstracted out by
858 using the slightly higher-level functions L</lex_peek_unichar> and
859 L</lex_read_unichar>.
861 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
863 Points to the start of the current line inside the lexer buffer.
864 This is useful for indicating at which column an error occurred, and
865 not much else. This must be updated by any lexing code that consumes
866 a newline; the function L</lex_read_to> handles this detail.
872 =for apidoc Amx|bool|lex_bufutf8
874 Indicates whether the octets in the lexer buffer
875 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
876 of Unicode characters. If not, they should be interpreted as Latin-1
877 characters. This is analogous to the C<SvUTF8> flag for scalars.
879 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
880 contains valid UTF-8. Lexing code must be robust in the face of invalid
883 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
884 is significant, but not the whole story regarding the input character
885 encoding. Normally, when a file is being read, the scalar contains octets
886 and its C<SvUTF8> flag is off, but the octets should be interpreted as
887 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
888 however, the scalar may have the C<SvUTF8> flag on, and in this case its
889 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
890 is in effect. This logic may change in the future; use this function
891 instead of implementing the logic yourself.
897 Perl_lex_bufutf8(pTHX)
903 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
905 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
906 at least C<len> octets (including terminating C<NUL>). Returns a
907 pointer to the reallocated buffer. This is necessary before making
908 any direct modification of the buffer that would increase its length.
909 L</lex_stuff_pvn> provides a more convenient way to insert text into
912 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
913 this function updates all of the lexer's variables that point directly
920 Perl_lex_grow_linestr(pTHX_ STRLEN len)
924 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
925 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
928 linestr = PL_parser->linestr;
929 buf = SvPVX(linestr);
930 if (len <= SvLEN(linestr))
933 /* Is the lex_shared linestr SV the same as the current linestr SV?
934 * Only in this case does re_eval_start need adjusting, since it
935 * points within lex_shared->ls_linestr's buffer */
936 current = (linestr == PL_parser->lex_shared->ls_linestr);
938 bufend_pos = PL_parser->bufend - buf;
939 bufptr_pos = PL_parser->bufptr - buf;
940 oldbufptr_pos = PL_parser->oldbufptr - buf;
941 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
942 linestart_pos = PL_parser->linestart - buf;
943 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
944 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
945 re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ?
946 PL_parser->lex_shared->re_eval_start - buf : 0;
948 buf = sv_grow(linestr, len);
950 PL_parser->bufend = buf + bufend_pos;
951 PL_parser->bufptr = buf + bufptr_pos;
952 PL_parser->oldbufptr = buf + oldbufptr_pos;
953 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
954 PL_parser->linestart = buf + linestart_pos;
955 if (PL_parser->last_uni)
956 PL_parser->last_uni = buf + last_uni_pos;
957 if (PL_parser->last_lop)
958 PL_parser->last_lop = buf + last_lop_pos;
959 if (current && PL_parser->lex_shared->re_eval_start)
960 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
965 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
967 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
968 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
969 reallocating the buffer if necessary. This means that lexing code that
970 runs later will see the characters as if they had appeared in the input.
971 It is not recommended to do this as part of normal parsing, and most
972 uses of this facility run the risk of the inserted characters being
973 interpreted in an unintended manner.
975 The string to be inserted is represented by C<len> octets starting
976 at C<pv>. These octets are interpreted as either UTF-8 or Latin-1,
977 according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
978 The characters are recoded for the lexer buffer, according to how the
979 buffer is currently being interpreted (L</lex_bufutf8>). If a string
980 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
981 function is more convenient.
987 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
991 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
992 if (flags & ~(LEX_STUFF_UTF8))
993 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
995 if (flags & LEX_STUFF_UTF8) {
998 STRLEN highhalf = 0; /* Count of variants */
999 const char *p, *e = pv+len;
1000 for (p = pv; p != e; p++) {
1001 if (! UTF8_IS_INVARIANT(*p)) {
1007 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1008 bufptr = PL_parser->bufptr;
1009 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1010 SvCUR_set(PL_parser->linestr,
1011 SvCUR(PL_parser->linestr) + len+highhalf);
1012 PL_parser->bufend += len+highhalf;
1013 for (p = pv; p != e; p++) {
1015 if (! UTF8_IS_INVARIANT(c)) {
1016 *bufptr++ = UTF8_TWO_BYTE_HI(c);
1017 *bufptr++ = UTF8_TWO_BYTE_LO(c);
1019 *bufptr++ = (char)c;
1024 if (flags & LEX_STUFF_UTF8) {
1025 STRLEN highhalf = 0;
1026 const char *p, *e = pv+len;
1027 for (p = pv; p != e; p++) {
1029 if (UTF8_IS_ABOVE_LATIN1(c)) {
1030 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1031 "non-Latin-1 character into Latin-1 input");
1032 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1035 } else if (! UTF8_IS_INVARIANT(c)) {
1036 /* malformed UTF-8 */
1038 SAVESPTR(PL_warnhook);
1039 PL_warnhook = PERL_WARNHOOK_FATAL;
1040 utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
1046 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1047 bufptr = PL_parser->bufptr;
1048 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1049 SvCUR_set(PL_parser->linestr,
1050 SvCUR(PL_parser->linestr) + len-highhalf);
1051 PL_parser->bufend += len-highhalf;
1054 if (UTF8_IS_INVARIANT(*p)) {
1060 *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
1066 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1067 bufptr = PL_parser->bufptr;
1068 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1069 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1070 PL_parser->bufend += len;
1071 Copy(pv, bufptr, len, char);
1077 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1079 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1080 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1081 reallocating the buffer if necessary. This means that lexing code that
1082 runs later will see the characters as if they had appeared in the input.
1083 It is not recommended to do this as part of normal parsing, and most
1084 uses of this facility run the risk of the inserted characters being
1085 interpreted in an unintended manner.
1087 The string to be inserted is represented by octets starting at C<pv>
1088 and continuing to the first nul. These octets are interpreted as either
1089 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1090 in C<flags>. The characters are recoded for the lexer buffer, according
1091 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1092 If it is not convenient to nul-terminate a string to be inserted, the
1093 L</lex_stuff_pvn> function is more appropriate.
1099 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1101 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1102 lex_stuff_pvn(pv, strlen(pv), flags);
1106 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1108 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1109 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1110 reallocating the buffer if necessary. This means that lexing code that
1111 runs later will see the characters as if they had appeared in the input.
1112 It is not recommended to do this as part of normal parsing, and most
1113 uses of this facility run the risk of the inserted characters being
1114 interpreted in an unintended manner.
1116 The string to be inserted is the string value of C<sv>. The characters
1117 are recoded for the lexer buffer, according to how the buffer is currently
1118 being interpreted (L</lex_bufutf8>). If a string to be inserted is
1119 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1120 need to construct a scalar.
1126 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1130 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1132 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1134 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1138 =for apidoc Amx|void|lex_unstuff|char *ptr
1140 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1141 C<ptr>. Text following C<ptr> will be moved, and the buffer shortened.
1142 This hides the discarded text from any lexing code that runs later,
1143 as if the text had never appeared.
1145 This is not the normal way to consume lexed text. For that, use
1152 Perl_lex_unstuff(pTHX_ char *ptr)
1156 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1157 buf = PL_parser->bufptr;
1159 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1162 bufend = PL_parser->bufend;
1164 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1165 unstuff_len = ptr - buf;
1166 Move(ptr, buf, bufend+1-ptr, char);
1167 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1168 PL_parser->bufend = bufend - unstuff_len;
1172 =for apidoc Amx|void|lex_read_to|char *ptr
1174 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1175 to C<ptr>. This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
1176 performing the correct bookkeeping whenever a newline character is passed.
1177 This is the normal way to consume lexed text.
1179 Interpretation of the buffer's octets can be abstracted out by
1180 using the slightly higher-level functions L</lex_peek_unichar> and
1181 L</lex_read_unichar>.
1187 Perl_lex_read_to(pTHX_ char *ptr)
1190 PERL_ARGS_ASSERT_LEX_READ_TO;
1191 s = PL_parser->bufptr;
1192 if (ptr < s || ptr > PL_parser->bufend)
1193 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1194 for (; s != ptr; s++)
1196 COPLINE_INC_WITH_HERELINES;
1197 PL_parser->linestart = s+1;
1199 PL_parser->bufptr = ptr;
1203 =for apidoc Amx|void|lex_discard_to|char *ptr
1205 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1206 up to C<ptr>. The remaining content of the buffer will be moved, and
1207 all pointers into the buffer updated appropriately. C<ptr> must not
1208 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1209 it is not permitted to discard text that has yet to be lexed.
1211 Normally it is not necessarily to do this directly, because it suffices to
1212 use the implicit discarding behaviour of L</lex_next_chunk> and things
1213 based on it. However, if a token stretches across multiple lines,
1214 and the lexing code has kept multiple lines of text in the buffer for
1215 that purpose, then after completion of the token it would be wise to
1216 explicitly discard the now-unneeded earlier lines, to avoid future
1217 multi-line tokens growing the buffer without bound.
1223 Perl_lex_discard_to(pTHX_ char *ptr)
1227 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1228 buf = SvPVX(PL_parser->linestr);
1230 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1233 if (ptr > PL_parser->bufptr)
1234 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1235 discard_len = ptr - buf;
1236 if (PL_parser->oldbufptr < ptr)
1237 PL_parser->oldbufptr = ptr;
1238 if (PL_parser->oldoldbufptr < ptr)
1239 PL_parser->oldoldbufptr = ptr;
1240 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1241 PL_parser->last_uni = NULL;
1242 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1243 PL_parser->last_lop = NULL;
1244 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1245 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1246 PL_parser->bufend -= discard_len;
1247 PL_parser->bufptr -= discard_len;
1248 PL_parser->oldbufptr -= discard_len;
1249 PL_parser->oldoldbufptr -= discard_len;
1250 if (PL_parser->last_uni)
1251 PL_parser->last_uni -= discard_len;
1252 if (PL_parser->last_lop)
1253 PL_parser->last_lop -= discard_len;
1257 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1259 Reads in the next chunk of text to be lexed, appending it to
1260 L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1261 looked to the end of the current chunk and wants to know more. It is
1262 usual, but not necessary, for lexing to have consumed the entirety of
1263 the current chunk at this time.
1265 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1266 chunk (i.e., the current chunk has been entirely consumed), normally the
1267 current chunk will be discarded at the same time that the new chunk is
1268 read in. If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
1269 will not be discarded. If the current chunk has not been entirely
1270 consumed, then it will not be discarded regardless of the flag.
1272 Returns true if some new text was added to the buffer, or false if the
1273 buffer has reached the end of the input text.
1278 #define LEX_FAKE_EOF 0x80000000
1279 #define LEX_NO_TERM 0x40000000 /* here-doc */
1282 Perl_lex_next_chunk(pTHX_ U32 flags)
1286 STRLEN old_bufend_pos, new_bufend_pos;
1287 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1288 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1289 bool got_some_for_debugger = 0;
1291 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1292 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1293 if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1295 linestr = PL_parser->linestr;
1296 buf = SvPVX(linestr);
1297 if (!(flags & LEX_KEEP_PREVIOUS)
1298 && PL_parser->bufptr == PL_parser->bufend)
1300 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1302 if (PL_parser->last_uni != PL_parser->bufend)
1303 PL_parser->last_uni = NULL;
1304 if (PL_parser->last_lop != PL_parser->bufend)
1305 PL_parser->last_lop = NULL;
1306 last_uni_pos = last_lop_pos = 0;
1310 old_bufend_pos = PL_parser->bufend - buf;
1311 bufptr_pos = PL_parser->bufptr - buf;
1312 oldbufptr_pos = PL_parser->oldbufptr - buf;
1313 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1314 linestart_pos = PL_parser->linestart - buf;
1315 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1316 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1318 if (flags & LEX_FAKE_EOF) {
1320 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1322 } else if (filter_gets(linestr, old_bufend_pos)) {
1324 got_some_for_debugger = 1;
1325 } else if (flags & LEX_NO_TERM) {
1328 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1331 /* End of real input. Close filehandle (unless it was STDIN),
1332 * then add implicit termination.
1334 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1335 PerlIO_clearerr(PL_parser->rsfp);
1336 else if (PL_parser->rsfp)
1337 (void)PerlIO_close(PL_parser->rsfp);
1338 PL_parser->rsfp = NULL;
1339 PL_parser->in_pod = PL_parser->filtered = 0;
1340 if (!PL_in_eval && PL_minus_p) {
1342 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1343 PL_minus_n = PL_minus_p = 0;
1344 } else if (!PL_in_eval && PL_minus_n) {
1345 sv_catpvs(linestr, /*{*/";}");
1348 sv_catpvs(linestr, ";");
1351 buf = SvPVX(linestr);
1352 new_bufend_pos = SvCUR(linestr);
1353 PL_parser->bufend = buf + new_bufend_pos;
1354 PL_parser->bufptr = buf + bufptr_pos;
1355 PL_parser->oldbufptr = buf + oldbufptr_pos;
1356 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1357 PL_parser->linestart = buf + linestart_pos;
1358 if (PL_parser->last_uni)
1359 PL_parser->last_uni = buf + last_uni_pos;
1360 if (PL_parser->last_lop)
1361 PL_parser->last_lop = buf + last_lop_pos;
1362 if (PL_parser->preambling != NOLINE) {
1363 CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1364 PL_parser->preambling = NOLINE;
1366 if ( got_some_for_debugger
1367 && PERLDB_LINE_OR_SAVESRC
1368 && PL_curstash != PL_debstash)
1370 /* debugger active and we're not compiling the debugger code,
1371 * so store the line into the debugger's array of lines
1373 update_debugger_info(NULL, buf+old_bufend_pos,
1374 new_bufend_pos-old_bufend_pos);
1380 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1382 Looks ahead one (Unicode) character in the text currently being lexed.
1383 Returns the codepoint (unsigned integer value) of the next character,
1384 or -1 if lexing has reached the end of the input text. To consume the
1385 peeked character, use L</lex_read_unichar>.
1387 If the next character is in (or extends into) the next chunk of input
1388 text, the next chunk will be read in. Normally the current chunk will be
1389 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1390 bit set, then the current chunk will not be discarded.
1392 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1393 is encountered, an exception is generated.
1399 Perl_lex_peek_unichar(pTHX_ U32 flags)
1403 if (flags & ~(LEX_KEEP_PREVIOUS))
1404 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1405 s = PL_parser->bufptr;
1406 bufend = PL_parser->bufend;
1412 if (!lex_next_chunk(flags))
1414 s = PL_parser->bufptr;
1415 bufend = PL_parser->bufend;
1418 if (UTF8_IS_INVARIANT(head))
1420 if (UTF8_IS_START(head)) {
1421 len = UTF8SKIP(&head);
1422 while ((STRLEN)(bufend-s) < len) {
1423 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1425 s = PL_parser->bufptr;
1426 bufend = PL_parser->bufend;
1429 unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1430 if (retlen == (STRLEN)-1) {
1431 /* malformed UTF-8 */
1433 SAVESPTR(PL_warnhook);
1434 PL_warnhook = PERL_WARNHOOK_FATAL;
1435 utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
1441 if (!lex_next_chunk(flags))
1443 s = PL_parser->bufptr;
1450 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1452 Reads the next (Unicode) character in the text currently being lexed.
1453 Returns the codepoint (unsigned integer value) of the character read,
1454 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1455 if lexing has reached the end of the input text. To non-destructively
1456 examine the next character, use L</lex_peek_unichar> instead.
1458 If the next character is in (or extends into) the next chunk of input
1459 text, the next chunk will be read in. Normally the current chunk will be
1460 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1461 bit set, then the current chunk will not be discarded.
1463 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1464 is encountered, an exception is generated.
1470 Perl_lex_read_unichar(pTHX_ U32 flags)
1473 if (flags & ~(LEX_KEEP_PREVIOUS))
1474 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1475 c = lex_peek_unichar(flags);
1478 COPLINE_INC_WITH_HERELINES;
1480 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1482 ++(PL_parser->bufptr);
1488 =for apidoc Amx|void|lex_read_space|U32 flags
1490 Reads optional spaces, in Perl style, in the text currently being
1491 lexed. The spaces may include ordinary whitespace characters and
1492 Perl-style comments. C<#line> directives are processed if encountered.
1493 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1494 at a non-space character (or the end of the input text).
1496 If spaces extend into the next chunk of input text, the next chunk will
1497 be read in. Normally the current chunk will be discarded at the same
1498 time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
1499 chunk will not be discarded.
1504 #define LEX_NO_INCLINE 0x40000000
1505 #define LEX_NO_NEXT_CHUNK 0x80000000
1508 Perl_lex_read_space(pTHX_ U32 flags)
1511 const bool can_incline = !(flags & LEX_NO_INCLINE);
1512 bool need_incline = 0;
1513 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1514 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1515 s = PL_parser->bufptr;
1516 bufend = PL_parser->bufend;
1522 } while (!(c == '\n' || (c == 0 && s == bufend)));
1523 } else if (c == '\n') {
1526 PL_parser->linestart = s;
1532 } else if (isSPACE(c)) {
1534 } else if (c == 0 && s == bufend) {
1537 if (flags & LEX_NO_NEXT_CHUNK)
1539 PL_parser->bufptr = s;
1540 l = CopLINE(PL_curcop);
1541 CopLINE(PL_curcop) += PL_parser->herelines + 1;
1542 got_more = lex_next_chunk(flags);
1543 CopLINE_set(PL_curcop, l);
1544 s = PL_parser->bufptr;
1545 bufend = PL_parser->bufend;
1548 if (can_incline && need_incline && PL_parser->rsfp) {
1558 PL_parser->bufptr = s;
1563 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1565 This function performs syntax checking on a prototype, C<proto>.
1566 If C<warn> is true, any illegal characters or mismatched brackets
1567 will trigger illegalproto warnings, declaring that they were
1568 detected in the prototype for C<name>.
1570 The return value is C<true> if this is a valid prototype, and
1571 C<false> if it is not, regardless of whether C<warn> was C<true> or
1574 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1581 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
1583 STRLEN len, origlen;
1585 bool bad_proto = FALSE;
1586 bool in_brackets = FALSE;
1587 bool after_slash = FALSE;
1588 char greedy_proto = ' ';
1589 bool proto_after_greedy_proto = FALSE;
1590 bool must_be_last = FALSE;
1591 bool underscore = FALSE;
1592 bool bad_proto_after_underscore = FALSE;
1594 PERL_ARGS_ASSERT_VALIDATE_PROTO;
1599 p = SvPV(proto, len);
1601 for (; len--; p++) {
1604 proto_after_greedy_proto = TRUE;
1606 if (!strchr(";@%", *p))
1607 bad_proto_after_underscore = TRUE;
1610 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1617 in_brackets = FALSE;
1618 else if ((*p == '@' || *p == '%')
1622 must_be_last = TRUE;
1631 after_slash = FALSE;
1636 SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1639 ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1640 origlen, UNI_DISPLAY_ISPRINT)
1641 : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1643 if (proto_after_greedy_proto)
1644 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1645 "Prototype after '%c' for %" SVf " : %s",
1646 greedy_proto, SVfARG(name), p);
1648 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1649 "Missing ']' in prototype for %" SVf " : %s",
1652 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1653 "Illegal character in prototype for %" SVf " : %s",
1655 if (bad_proto_after_underscore)
1656 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1657 "Illegal character after '_' in prototype for %" SVf " : %s",
1661 return (! (proto_after_greedy_proto || bad_proto) );
1666 * This subroutine has nothing to do with tilting, whether at windmills
1667 * or pinball tables. Its name is short for "increment line". It
1668 * increments the current line number in CopLINE(PL_curcop) and checks
1669 * to see whether the line starts with a comment of the form
1670 * # line 500 "foo.pm"
1671 * If so, it sets the current line number and file to the values in the comment.
1675 S_incline(pTHX_ const char *s)
1683 PERL_ARGS_ASSERT_INCLINE;
1685 COPLINE_INC_WITH_HERELINES;
1686 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1687 && s+1 == PL_bufend && *s == ';') {
1688 /* fake newline in string eval */
1689 CopLINE_dec(PL_curcop);
1694 while (SPACE_OR_TAB(*s))
1696 if (strEQs(s, "line"))
1700 if (SPACE_OR_TAB(*s))
1704 while (SPACE_OR_TAB(*s))
1712 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1714 while (SPACE_OR_TAB(*s))
1716 if (*s == '"' && (t = strchr(s+1, '"'))) {
1722 while (*t && !isSPACE(*t))
1726 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1728 if (*e != '\n' && *e != '\0')
1729 return; /* false alarm */
1731 if (!grok_atoUV(n, &uv, &e))
1733 line_num = ((line_t)uv) - 1;
1736 const STRLEN len = t - s;
1738 if (!PL_rsfp && !PL_parser->filtered) {
1739 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1740 * to *{"::_<newfilename"} */
1741 /* However, the long form of evals is only turned on by the
1742 debugger - usually they're "(eval %lu)" */
1743 GV * const cfgv = CopFILEGV(PL_curcop);
1746 STRLEN tmplen2 = len;
1750 if (tmplen2 + 2 <= sizeof smallbuf)
1753 Newx(tmpbuf2, tmplen2 + 2, char);
1758 memcpy(tmpbuf2 + 2, s, tmplen2);
1761 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1763 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1764 /* adjust ${"::_<newfilename"} to store the new file name */
1765 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1766 /* The line number may differ. If that is the case,
1767 alias the saved lines that are in the array.
1768 Otherwise alias the whole array. */
1769 if (CopLINE(PL_curcop) == line_num) {
1770 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1771 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1773 else if (GvAV(cfgv)) {
1774 AV * const av = GvAV(cfgv);
1775 const I32 start = CopLINE(PL_curcop)+1;
1776 I32 items = AvFILLp(av) - start;
1778 AV * const av2 = GvAVn(gv2);
1779 SV **svp = AvARRAY(av) + start;
1780 I32 l = (I32)line_num+1;
1782 av_store(av2, l++, SvREFCNT_inc(*svp++));
1787 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1790 CopFILE_free(PL_curcop);
1791 CopFILE_setn(PL_curcop, s, len);
1793 CopLINE_set(PL_curcop, line_num);
1797 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1799 AV *av = CopFILEAVx(PL_curcop);
1802 if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1804 sv = *av_fetch(av, 0, 1);
1805 SvUPGRADE(sv, SVt_PVMG);
1807 if (!SvPOK(sv)) SvPVCLEAR(sv);
1809 sv_catsv(sv, orig_sv);
1811 sv_catpvn(sv, buf, len);
1816 if (PL_parser->preambling == NOLINE)
1817 av_store(av, CopLINE(PL_curcop), sv);
1823 * Called to gobble the appropriate amount and type of whitespace.
1824 * Skips comments as well.
1825 * Returns the next character after the whitespace that is skipped.
1828 * Same thing, but look ahead without incrementing line numbers or
1829 * adjusting PL_linestart.
1832 #define skipspace(s) skipspace_flags(s, 0)
1833 #define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
1836 S_skipspace_flags(pTHX_ char *s, U32 flags)
1838 PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1839 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1840 while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
1843 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1845 lex_read_space(flags | LEX_KEEP_PREVIOUS |
1846 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
1847 LEX_NO_NEXT_CHUNK : 0));
1849 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1850 if (PL_linestart > PL_bufptr)
1851 PL_bufptr = PL_linestart;
1859 * Check the unary operators to ensure there's no ambiguity in how they're
1860 * used. An ambiguous piece of code would be:
1862 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1863 * the +5 is its argument.
1872 if (PL_oldoldbufptr != PL_last_uni)
1874 while (isSPACE(*PL_last_uni))
1877 while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
1878 s += UTF ? UTF8SKIP(s) : 1;
1879 if ((t = strchr(s, '(')) && t < PL_bufptr)
1882 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1883 "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
1884 UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
1888 * LOP : macro to build a list operator. Its behaviour has been replaced
1889 * with a subroutine, S_lop() for which LOP is just another name.
1892 #define LOP(f,x) return lop(f,x,s)
1896 * Build a list operator (or something that might be one). The rules:
1897 * - if we have a next token, then it's a list operator (no parens) for
1898 * which the next token has already been parsed; e.g.,
1901 * - if the next thing is an opening paren, then it's a function
1902 * - else it's a list operator
1906 S_lop(pTHX_ I32 f, U8 x, char *s)
1908 PERL_ARGS_ASSERT_LOP;
1913 PL_last_lop = PL_oldbufptr;
1914 PL_last_lop_op = (OPCODE)f;
1919 return REPORT(FUNC);
1922 return REPORT(FUNC);
1925 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1926 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1927 return REPORT(LSTOP);
1933 * When the lexer realizes it knows the next token (for instance,
1934 * it is reordering tokens for the parser) then it can call S_force_next
1935 * to know what token to return the next time the lexer is called. Caller
1936 * will need to set PL_nextval[] and possibly PL_expect to ensure
1937 * the lexer handles the token correctly.
1941 S_force_next(pTHX_ I32 type)
1945 PerlIO_printf(Perl_debug_log, "### forced token:\n");
1946 tokereport(type, &NEXTVAL_NEXTTOKE);
1949 assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
1950 PL_nexttype[PL_nexttoke] = type;
1957 * This subroutine handles postfix deref syntax after the arrow has already
1958 * been emitted. @* $* etc. are emitted as two separate token right here.
1959 * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
1960 * only the first, leaving yylex to find the next.
1964 S_postderef(pTHX_ int const funny, char const next)
1966 assert(funny == DOLSHARP || strchr("$@%&*", funny));
1968 PL_expect = XOPERATOR;
1969 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
1970 assert('@' == funny || '$' == funny || DOLSHARP == funny);
1971 PL_lex_state = LEX_INTERPEND;
1973 force_next(POSTJOIN);
1979 if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
1980 && !PL_lex_brackets)
1982 PL_expect = XOPERATOR;
1991 int yyc = PL_parser->yychar;
1992 if (yyc != YYEMPTY) {
1994 NEXTVAL_NEXTTOKE = PL_parser->yylval;
1995 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
1996 PL_lex_allbrackets--;
1998 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1999 } else if (yyc == '('/*)*/) {
2000 PL_lex_allbrackets--;
2005 PL_parser->yychar = YYEMPTY;
2010 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2012 SV * const sv = newSVpvn_utf8(start, len,
2015 && !is_utf8_invariant_string((const U8*)start, len)
2016 && is_utf8_string((const U8*)start, len));
2022 * When the lexer knows the next thing is a word (for instance, it has
2023 * just seen -> and it knows that the next char is a word char, then
2024 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2028 * char *start : buffer position (must be within PL_linestr)
2029 * int token : PL_next* will be this type of bare word
2030 * (e.g., METHOD,BAREWORD)
2031 * int check_keyword : if true, Perl checks to make sure the word isn't
2032 * a keyword (do this if the word is a label, e.g. goto FOO)
2033 * int allow_pack : if true, : characters will also be allowed (require,
2034 * use, etc. do this)
2038 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2043 PERL_ARGS_ASSERT_FORCE_WORD;
2045 start = skipspace(start);
2047 if (isIDFIRST_lazy_if(s,UTF)
2048 || (allow_pack && *s == ':' && s[1] == ':') )
2050 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2051 if (check_keyword) {
2052 char *s2 = PL_tokenbuf;
2054 if (allow_pack && len > 6 && strEQs(s2, "CORE::"))
2056 if (keyword(s2, len2, 0))
2059 if (token == METHOD) {
2064 PL_expect = XOPERATOR;
2067 NEXTVAL_NEXTTOKE.opval
2068 = newSVOP(OP_CONST,0,
2069 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2070 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2078 * Called when the lexer wants $foo *foo &foo etc, but the program
2079 * text only contains the "foo" portion. The first argument is a pointer
2080 * to the "foo", and the second argument is the type symbol to prefix.
2081 * Forces the next token to be a "BAREWORD".
2082 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2086 S_force_ident(pTHX_ const char *s, int kind)
2088 PERL_ARGS_ASSERT_FORCE_IDENT;
2091 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2092 OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2093 UTF ? SVf_UTF8 : 0));
2094 NEXTVAL_NEXTTOKE.opval = o;
2095 force_next(BAREWORD);
2097 o->op_private = OPpCONST_ENTERED;
2098 /* XXX see note in pp_entereval() for why we forgo typo
2099 warnings if the symbol must be introduced in an eval.
2101 gv_fetchpvn_flags(s, len,
2102 (PL_in_eval ? GV_ADDMULTI
2103 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2104 kind == '$' ? SVt_PV :
2105 kind == '@' ? SVt_PVAV :
2106 kind == '%' ? SVt_PVHV :
2114 S_force_ident_maybe_lex(pTHX_ char pit)
2116 NEXTVAL_NEXTTOKE.ival = pit;
2121 Perl_str_to_version(pTHX_ SV *sv)
2126 const char *start = SvPV_const(sv,len);
2127 const char * const end = start + len;
2128 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2130 PERL_ARGS_ASSERT_STR_TO_VERSION;
2132 while (start < end) {
2136 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2141 retval += ((NV)n)/nshift;
2150 * Forces the next token to be a version number.
2151 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2152 * and if "guessing" is TRUE, then no new token is created (and the caller
2153 * must use an alternative parsing method).
2157 S_force_version(pTHX_ char *s, int guessing)
2162 PERL_ARGS_ASSERT_FORCE_VERSION;
2170 while (isDIGIT(*d) || *d == '_' || *d == '.')
2172 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2174 s = scan_num(s, &pl_yylval);
2175 version = pl_yylval.opval;
2176 ver = cSVOPx(version)->op_sv;
2177 if (SvPOK(ver) && !SvNIOK(ver)) {
2178 SvUPGRADE(ver, SVt_PVNV);
2179 SvNV_set(ver, str_to_version(ver));
2180 SvNOK_on(ver); /* hint that it is a version */
2183 else if (guessing) {
2188 /* NOTE: The parser sees the package name and the VERSION swapped */
2189 NEXTVAL_NEXTTOKE.opval = version;
2190 force_next(BAREWORD);
2196 * S_force_strict_version
2197 * Forces the next token to be a version number using strict syntax rules.
2201 S_force_strict_version(pTHX_ char *s)
2204 const char *errstr = NULL;
2206 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2208 while (isSPACE(*s)) /* leading whitespace */
2211 if (is_STRICT_VERSION(s,&errstr)) {
2213 s = (char *)scan_version(s, ver, 0);
2214 version = newSVOP(OP_CONST, 0, ver);
2216 else if ((*s != ';' && *s != '{' && *s != '}' )
2217 && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
2221 yyerror(errstr); /* version required */
2225 /* NOTE: The parser sees the package name and the VERSION swapped */
2226 NEXTVAL_NEXTTOKE.opval = version;
2227 force_next(BAREWORD);
2234 * Tokenize a quoted string passed in as an SV. It finds the next
2235 * chunk, up to end of string or a backslash. It may make a new
2236 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2241 S_tokeq(pTHX_ SV *sv)
2248 PERL_ARGS_ASSERT_TOKEQ;
2252 assert (!SvIsCOW(sv));
2253 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2257 /* This is relying on the SV being "well formed" with a trailing '\0' */
2258 while (s < send && !(*s == '\\' && s[1] == '\\'))
2263 if ( PL_hints & HINT_NEW_STRING ) {
2264 pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2265 SVs_TEMP | SvUTF8(sv));
2269 if (s + 1 < send && (s[1] == '\\'))
2270 s++; /* all that, just for this */
2275 SvCUR_set(sv, d - SvPVX_const(sv));
2277 if ( PL_hints & HINT_NEW_STRING )
2278 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2283 * Now come three functions related to double-quote context,
2284 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2285 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2286 * interact with PL_lex_state, and create fake ( ... ) argument lists
2287 * to handle functions and concatenation.
2291 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
2296 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2298 * Pattern matching will set PL_lex_op to the pattern-matching op to
2299 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2301 * OP_CONST is easy--just make the new op and return.
2303 * Everything else becomes a FUNC.
2305 * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we
2306 * had an OP_CONST. This just sets us up for a
2307 * call to S_sublex_push().
2311 S_sublex_start(pTHX)
2313 const I32 op_type = pl_yylval.ival;
2315 if (op_type == OP_NULL) {
2316 pl_yylval.opval = PL_lex_op;
2320 if (op_type == OP_CONST) {
2321 SV *sv = PL_lex_stuff;
2322 PL_lex_stuff = NULL;
2325 if (SvTYPE(sv) == SVt_PVIV) {
2326 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2328 const char * const p = SvPV_const(sv, len);
2329 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2333 pl_yylval.opval = newSVOP(op_type, 0, sv);
2337 PL_parser->lex_super_state = PL_lex_state;
2338 PL_parser->lex_sub_inwhat = (U16)op_type;
2339 PL_parser->lex_sub_op = PL_lex_op;
2340 PL_lex_state = LEX_INTERPPUSH;
2344 pl_yylval.opval = PL_lex_op;
2354 * Create a new scope to save the lexing state. The scope will be
2355 * ended in S_sublex_done. Returns a '(', starting the function arguments
2356 * to the uc, lc, etc. found before.
2357 * Sets PL_lex_state to LEX_INTERPCONCAT.
2364 const bool is_heredoc = PL_multi_close == '<';
2367 PL_lex_state = PL_parser->lex_super_state;
2368 SAVEI8(PL_lex_dojoin);
2369 SAVEI32(PL_lex_brackets);
2370 SAVEI32(PL_lex_allbrackets);
2371 SAVEI32(PL_lex_formbrack);
2372 SAVEI8(PL_lex_fakeeof);
2373 SAVEI32(PL_lex_casemods);
2374 SAVEI32(PL_lex_starts);
2375 SAVEI8(PL_lex_state);
2376 SAVESPTR(PL_lex_repl);
2377 SAVEVPTR(PL_lex_inpat);
2378 SAVEI16(PL_lex_inwhat);
2381 SAVECOPLINE(PL_curcop);
2382 SAVEI32(PL_multi_end);
2383 SAVEI32(PL_parser->herelines);
2384 PL_parser->herelines = 0;
2386 SAVEIV(PL_multi_close);
2387 SAVEPPTR(PL_bufptr);
2388 SAVEPPTR(PL_bufend);
2389 SAVEPPTR(PL_oldbufptr);
2390 SAVEPPTR(PL_oldoldbufptr);
2391 SAVEPPTR(PL_last_lop);
2392 SAVEPPTR(PL_last_uni);
2393 SAVEPPTR(PL_linestart);
2394 SAVESPTR(PL_linestr);
2395 SAVEGENERICPV(PL_lex_brackstack);
2396 SAVEGENERICPV(PL_lex_casestack);
2397 SAVEGENERICPV(PL_parser->lex_shared);
2398 SAVEBOOL(PL_parser->lex_re_reparsing);
2399 SAVEI32(PL_copline);
2401 /* The here-doc parser needs to be able to peek into outer lexing
2402 scopes to find the body of the here-doc. So we put PL_linestr and
2403 PL_bufptr into lex_shared, to ‘share’ those values.
2405 PL_parser->lex_shared->ls_linestr = PL_linestr;
2406 PL_parser->lex_shared->ls_bufptr = PL_bufptr;
2408 PL_linestr = PL_lex_stuff;
2409 PL_lex_repl = PL_parser->lex_sub_repl;
2410 PL_lex_stuff = NULL;
2411 PL_parser->lex_sub_repl = NULL;
2413 /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2414 set for an inner quote-like operator and then an error causes scope-
2415 popping. We must not have a PL_lex_stuff value left dangling, as
2416 that breaks assumptions elsewhere. See bug #123617. */
2417 SAVEGENERICSV(PL_lex_stuff);
2418 SAVEGENERICSV(PL_parser->lex_sub_repl);
2420 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2421 = SvPVX(PL_linestr);
2422 PL_bufend += SvCUR(PL_linestr);
2423 PL_last_lop = PL_last_uni = NULL;
2424 SAVEFREESV(PL_linestr);
2425 if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2427 PL_lex_dojoin = FALSE;
2428 PL_lex_brackets = PL_lex_formbrack = 0;
2429 PL_lex_allbrackets = 0;
2430 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2431 Newx(PL_lex_brackstack, 120, char);
2432 Newx(PL_lex_casestack, 12, char);
2433 PL_lex_casemods = 0;
2434 *PL_lex_casestack = '\0';
2436 PL_lex_state = LEX_INTERPCONCAT;
2438 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2439 PL_copline = NOLINE;
2441 Newxz(shared, 1, LEXSHARED);
2442 shared->ls_prev = PL_parser->lex_shared;
2443 PL_parser->lex_shared = shared;
2445 PL_lex_inwhat = PL_parser->lex_sub_inwhat;
2446 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2447 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2448 PL_lex_inpat = PL_parser->lex_sub_op;
2450 PL_lex_inpat = NULL;
2452 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2453 PL_in_eval &= ~EVAL_RE_REPARSING;
2460 * Restores lexer state after a S_sublex_push.
2466 if (!PL_lex_starts++) {
2467 SV * const sv = newSVpvs("");
2468 if (SvUTF8(PL_linestr))
2470 PL_expect = XOPERATOR;
2471 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
2475 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2476 PL_lex_state = LEX_INTERPCASEMOD;
2480 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2481 assert(PL_lex_inwhat != OP_TRANSR);
2483 assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2484 PL_linestr = PL_lex_repl;
2486 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2487 PL_bufend += SvCUR(PL_linestr);
2488 PL_last_lop = PL_last_uni = NULL;
2489 PL_lex_dojoin = FALSE;
2490 PL_lex_brackets = 0;
2491 PL_lex_allbrackets = 0;
2492 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2493 PL_lex_casemods = 0;
2494 *PL_lex_casestack = '\0';
2496 if (SvEVALED(PL_lex_repl)) {
2497 PL_lex_state = LEX_INTERPNORMAL;
2499 /* we don't clear PL_lex_repl here, so that we can check later
2500 whether this is an evalled subst; that means we rely on the
2501 logic to ensure sublex_done() is called again only via the
2502 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2505 PL_lex_state = LEX_INTERPCONCAT;
2508 if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2509 CopLINE(PL_curcop) +=
2510 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines
2511 + PL_parser->herelines;
2512 PL_parser->herelines = 0;
2517 const line_t l = CopLINE(PL_curcop);
2519 if (PL_multi_close == '<')
2520 PL_parser->herelines += l - PL_multi_end;
2521 PL_bufend = SvPVX(PL_linestr);
2522 PL_bufend += SvCUR(PL_linestr);
2523 PL_expect = XOPERATOR;
2528 PERL_STATIC_INLINE SV*
2529 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2531 /* <s> points to first character of interior of \N{}, <e> to one beyond the
2532 * interior, hence to the "}". Finds what the name resolves to, returning
2533 * an SV* containing it; NULL if no valid one found */
2535 SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2542 const U8* first_bad_char_loc;
2543 const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2545 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2548 Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
2549 "Unknown charname '' is deprecated");
2553 if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2555 &first_bad_char_loc))
2557 /* If warnings are on, this will print a more detailed analysis of what
2558 * is wrong than the error message below */
2559 utf8n_to_uvchr(first_bad_char_loc,
2560 e - ((char *) first_bad_char_loc),
2563 /* We deliberately don't try to print the malformed character, which
2564 * might not print very well; it also may be just the first of many
2565 * malformations, so don't print what comes after it */
2566 yyerror_pv(Perl_form(aTHX_
2567 "Malformed UTF-8 character immediately after '%.*s'",
2568 (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr),
2573 res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2574 /* include the <}> */
2575 e - backslash_ptr + 1);
2577 SvREFCNT_dec_NN(res);
2581 /* See if the charnames handler is the Perl core's, and if so, we can skip
2582 * the validation needed for a user-supplied one, as Perl's does its own
2584 table = GvHV(PL_hintgv); /* ^H */
2585 cvp = hv_fetchs(table, "charnames", FALSE);
2586 if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2587 SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2589 const char * const name = HvNAME(stash);
2590 if (HvNAMELEN(stash) == sizeof("_charnames")-1
2591 && strEQ(name, "_charnames")) {
2596 /* Here, it isn't Perl's charname handler. We can't rely on a
2597 * user-supplied handler to validate the input name. For non-ut8 input,
2598 * look to see that the first character is legal. Then loop through the
2599 * rest checking that each is a continuation */
2601 /* This code makes the reasonable assumption that the only Latin1-range
2602 * characters that begin a character name alias are alphabetic, otherwise
2603 * would have to create a isCHARNAME_BEGIN macro */
2606 if (! isALPHAU(*s)) {
2611 if (! isCHARNAME_CONT(*s)) {
2614 if (*s == ' ' && *(s-1) == ' ') {
2621 /* Similarly for utf8. For invariants can check directly; for other
2622 * Latin1, can calculate their code point and check; otherwise use a
2624 if (UTF8_IS_INVARIANT(*s)) {
2625 if (! isALPHAU(*s)) {
2629 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2630 if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
2636 if (! PL_utf8_charname_begin) {
2637 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2638 PL_utf8_charname_begin = _core_swash_init("utf8",
2639 "_Perl_Charname_Begin",
2641 1, 0, NULL, &flags);
2643 if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2650 if (UTF8_IS_INVARIANT(*s)) {
2651 if (! isCHARNAME_CONT(*s)) {
2654 if (*s == ' ' && *(s-1) == ' ') {
2659 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2660 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
2667 if (! PL_utf8_charname_continue) {
2668 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2669 PL_utf8_charname_continue = _core_swash_init("utf8",
2670 "_Perl_Charname_Continue",
2672 1, 0, NULL, &flags);
2674 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2681 if (*(s-1) == ' ') {
2684 "charnames alias definitions may not contain trailing "
2685 "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2686 (int)(s - backslash_ptr + 1), backslash_ptr,
2687 (int)(e - s + 1), s + 1
2689 UTF ? SVf_UTF8 : 0);
2693 if (SvUTF8(res)) { /* Don't accept malformed input */
2694 const U8* first_bad_char_loc;
2696 const char* const str = SvPV_const(res, len);
2697 if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2698 /* If warnings are on, this will print a more detailed analysis of
2699 * what is wrong than the error message below */
2700 utf8n_to_uvchr(first_bad_char_loc,
2701 (char *) first_bad_char_loc - str,
2704 /* We deliberately don't try to print the malformed character,
2705 * which might not print very well; it also may be just the first
2706 * of many malformations, so don't print what comes after it */
2709 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2710 (int) (e - backslash_ptr + 1), backslash_ptr,
2711 (int) ((char *) first_bad_char_loc - str), str
2722 /* The final %.*s makes sure that should the trailing NUL be missing
2723 * that this print won't run off the end of the string */
2726 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2727 (int)(s - backslash_ptr + 1), backslash_ptr,
2728 (int)(e - s + 1), s + 1
2730 UTF ? SVf_UTF8 : 0);
2737 "charnames alias definitions may not contain a sequence of "
2738 "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
2739 (int)(s - backslash_ptr + 1), backslash_ptr,
2740 (int)(e - s + 1), s + 1
2742 UTF ? SVf_UTF8 : 0);
2749 Extracts the next constant part of a pattern, double-quoted string,
2750 or transliteration. This is terrifying code.
2752 For example, in parsing the double-quoted string "ab\x63$d", it would
2753 stop at the '$' and return an OP_CONST containing 'abc'.
2755 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2756 processing a pattern (PL_lex_inpat is true), a transliteration
2757 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2759 Returns a pointer to the character scanned up to. If this is
2760 advanced from the start pointer supplied (i.e. if anything was
2761 successfully parsed), will leave an OP_CONST for the substring scanned
2762 in pl_yylval. Caller must intuit reason for not parsing further
2763 by looking at the next characters herself.
2767 \N{FOO} => \N{U+hex_for_character_FOO}
2768 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2771 all other \-char, including \N and \N{ apart from \N{ABC}
2774 @ and $ where it appears to be a var, but not for $ as tail anchor
2778 In transliterations:
2779 characters are VERY literal, except for - not at the start or end
2780 of the string, which indicates a range. If the range is in bytes,
2781 scan_const expands the range to the full set of intermediate
2782 characters. If the range is in utf8, the hyphen is replaced with
2783 a certain range mark which will be handled by pmtrans() in op.c.
2785 In double-quoted strings:
2787 double-quoted style: \r and \n
2788 constants: \x31, etc.
2789 deprecated backrefs: \1 (in substitution replacements)
2790 case and quoting: \U \Q \E
2793 scan_const does *not* construct ops to handle interpolated strings.
2794 It stops processing as soon as it finds an embedded $ or @ variable
2795 and leaves it to the caller to work out what's going on.
2797 embedded arrays (whether in pattern or not) could be:
2798 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2800 $ in double-quoted strings must be the symbol of an embedded scalar.
2802 $ in pattern could be $foo or could be tail anchor. Assumption:
2803 it's a tail anchor if $ is the last thing in the string, or if it's
2804 followed by one of "()| \r\n\t"
2806 \1 (backreferences) are turned into $1 in substitutions
2808 The structure of the code is
2809 while (there's a character to process) {
2810 handle transliteration ranges
2811 skip regexp comments /(?#comment)/ and codes /(?{code})/
2812 skip #-initiated comments in //x patterns
2813 check for embedded arrays
2814 check for embedded scalars
2816 deprecate \1 in substitution replacements
2817 handle string-changing backslashes \l \U \Q \E, etc.
2818 switch (what was escaped) {
2819 handle \- in a transliteration (becomes a literal -)
2820 if a pattern and not \N{, go treat as regular character
2821 handle \132 (octal characters)
2822 handle \x15 and \x{1234} (hex characters)
2823 handle \N{name} (named characters, also \N{3,5} in a pattern)
2824 handle \cV (control characters)
2825 handle printf-style backslashes (\f, \r, \n, etc)
2828 } (end if backslash)
2829 handle regular character
2830 } (end while character to read)
2835 S_scan_const(pTHX_ char *start)
2837 char *send = PL_bufend; /* end of the constant */
2838 SV *sv = newSV(send - start); /* sv for the constant. See note below
2840 char *s = start; /* start of the constant */
2841 char *d = SvPVX(sv); /* destination for copies */
2842 bool dorange = FALSE; /* are we in a translit range? */
2843 bool didrange = FALSE; /* did we just finish a range? */
2844 bool in_charclass = FALSE; /* within /[...]/ */
2845 bool has_utf8 = FALSE; /* Output constant is UTF8 */
2846 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed to be
2847 UTF8? But, this can show as true
2848 when the source isn't utf8, as for
2849 example when it is entirely composed
2851 SV *res; /* result from charnames */
2852 STRLEN offset_to_max; /* The offset in the output to where the range
2853 high-end character is temporarily placed */
2855 /* Note on sizing: The scanned constant is placed into sv, which is
2856 * initialized by newSV() assuming one byte of output for every byte of
2857 * input. This routine expects newSV() to allocate an extra byte for a
2858 * trailing NUL, which this routine will append if it gets to the end of
2859 * the input. There may be more bytes of input than output (eg., \N{LATIN
2860 * CAPITAL LETTER A}), or more output than input if the constant ends up
2861 * recoded to utf8, but each time a construct is found that might increase
2862 * the needed size, SvGROW() is called. Its size parameter each time is
2863 * based on the best guess estimate at the time, namely the length used so
2864 * far, plus the length the current construct will occupy, plus room for
2865 * the trailing NUL, plus one byte for every input byte still unscanned */
2867 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
2870 int backslash_N = 0; /* ? was the character from \N{} */
2871 int non_portable_endpoint = 0; /* ? In a range is an endpoint
2872 platform-specific like \x65 */
2875 PERL_ARGS_ASSERT_SCAN_CONST;
2877 assert(PL_lex_inwhat != OP_TRANSR);
2878 if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
2879 /* If we are doing a trans and we know we want UTF8 set expectation */
2880 has_utf8 = PL_parser->lex_sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2881 this_utf8 = PL_parser->lex_sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2884 /* Protect sv from errors and fatal warnings. */
2885 ENTER_with_name("scan_const");
2889 || dorange /* Handle tr/// range at right edge of input */
2892 /* get transliterations out of the way (they're most literal) */
2893 if (PL_lex_inwhat == OP_TRANS) {
2895 /* But there isn't any special handling necessary unless there is a
2896 * range, so for most cases we just drop down and handle the value
2897 * as any other. There are two exceptions.
2899 * 1. A minus sign indicates that we are actually going to have
2900 * a range. In this case, skip the '-', set a flag, then drop
2901 * down to handle what should be the end range value.
2902 * 2. After we've handled that value, the next time through, that
2903 * flag is set and we fix up the range.
2905 * Ranges entirely within Latin1 are expanded out entirely, in
2906 * order to avoid the significant overhead of making a swash.
2907 * Ranges that extend above Latin1 have to have a swash, so there
2908 * is no advantage to abbreviating them here, so they are stored
2909 * here as Min, ILLEGAL_UTF8_BYTE, Max. The illegal byte signifies
2910 * a hyphen without any possible ambiguity. On EBCDIC machines, if
2911 * the range is expressed as Unicode, the Latin1 portion is
2912 * expanded out even if the entire range extends above Latin1.
2913 * This is because each code point in it has to be processed here
2914 * individually to get its native translation */
2918 /* Here, we don't think we're in a range. If we've processed
2919 * at least one character, then see if this next one is a '-',
2920 * indicating the previous one was the start of a range. But
2921 * don't bother if we're too close to the end for the minus to
2923 if (*s != '-' || s >= send - 1 || s == start) {
2925 /* A regular character. Process like any other, but first
2926 * clear any flags */
2930 non_portable_endpoint = 0;
2933 /* Drops down to generic code to process current byte */
2936 if (didrange) { /* Something like y/A-C-Z// */
2937 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2942 s++; /* Skip past the minus */
2944 /* d now points to where the end-range character will be
2945 * placed. Save it so won't have to go finding it later,
2946 * and drop down to get that character. (Actually we
2947 * instead save the offset, to handle the case where a
2948 * realloc in the meantime could change the actual
2949 * pointer). We'll finish processing the range the next
2950 * time through the loop */
2951 offset_to_max = d - SvPVX_const(sv);
2953 } /* End of not a range */
2955 /* Here we have parsed a range. Now must handle it. At this
2957 * 'sv' is a SV* that contains the output string we are
2958 * constructing. The final two characters in that string
2959 * are the range start and range end, in order.
2960 * 'd' points to just beyond the range end in the 'sv' string,
2961 * where we would next place something
2962 * 'offset_to_max' is the offset in 'sv' at which the character
2963 * before 'd' begins.
2965 const char * max_ptr = SvPVX_const(sv) + offset_to_max;
2966 const char * min_ptr;
2968 IV range_max; /* last character in range */
2972 bool convert_unicode;
2973 IV real_range_max = 0;
2976 /* Get the range-ends code point values. */
2978 /* We know the utf8 is valid, because we just constructed
2979 * it ourselves in previous loop iterations */
2980 min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
2981 range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
2982 range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
2985 min_ptr = max_ptr - 1;
2986 range_min = * (U8*) min_ptr;
2987 range_max = * (U8*) max_ptr;
2991 /* On EBCDIC platforms, we may have to deal with portable
2992 * ranges. These happen if at least one range endpoint is a
2993 * Unicode value (\N{...}), or if the range is a subset of
2994 * [A-Z] or [a-z], and both ends are literal characters,
2995 * like 'A', and not like \x{C1} */
2996 if ((convert_unicode
2997 = cBOOL(backslash_N) /* \N{} forces Unicode, hence
2999 || ( ! non_portable_endpoint
3000 && (( isLOWER_A(range_min) && isLOWER_A(range_max))
3001 || (isUPPER_A(range_min) && isUPPER_A(range_max))))
3004 /* Special handling is needed for these portable ranges.
3005 * They are defined to all be in Unicode terms, which
3006 * include all Unicode code points between the end points.
3007 * Convert to Unicode to get the Unicode range. Later we
3008 * will convert each code point in the range back to
3010 range_min = NATIVE_TO_UNI(range_min);
3011 range_max = NATIVE_TO_UNI(range_max);
3015 if (range_min > range_max) {
3017 if (convert_unicode) {
3018 /* Need to convert back to native for meaningful
3019 * messages for this platform */
3020 range_min = UNI_TO_NATIVE(range_min);
3021 range_max = UNI_TO_NATIVE(range_max);
3025 /* Use the characters themselves for the error message if
3026 * ASCII printables; otherwise some visible representation
3028 if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
3030 "Invalid range \"%c-%c\" in transliteration operator",
3031 (char)range_min, (char)range_max);
3034 else if (convert_unicode) {
3035 /* diag_listed_as: Invalid range "%s" in transliteration operator */
3037 "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04" UVXf "}\""
3038 " in transliteration operator",
3039 range_min, range_max);
3043 /* diag_listed_as: Invalid range "%s" in transliteration operator */
3045 "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
3046 " in transliteration operator",
3047 range_min, range_max);
3053 /* We try to avoid creating a swash. If the upper end of
3054 * this range is below 256, this range won't force a swash;
3055 * otherwise it does force a swash, and as long as we have
3056 * to have one, we might as well not expand things out.
3057 * But if it's EBCDIC, we may have to look at each
3058 * character below 256 if we have to convert to/from
3062 && (range_min > 255 || ! convert_unicode)
3065 /* Move the high character one byte to the right; then
3066 * insert between it and the range begin, an illegal
3067 * byte which serves to indicate this is a range (using
3068 * a '-' could be ambiguous). */
3070 while (e-- > max_ptr) {
3073 *(e + 1) = (char) ILLEGAL_UTF8_BYTE;
3077 /* Here, we're going to expand out the range. For EBCDIC
3078 * the range can extend above 255 (not so in ASCII), so
3079 * for EBCDIC, split it into the parts above and below
3082 if (range_max > 255) {
3083 real_range_max = range_max;
3089 /* Here we need to expand out the string to contain each
3090 * character in the range. Grow the output to handle this */
3092 save_offset = min_ptr - SvPVX_const(sv);
3094 /* The base growth is the number of code points in the range */
3095 grow = range_max - range_min + 1;
3098 /* But if the output is UTF-8, some of those characters may
3099 * need two bytes (since the maximum range value here is
3100 * 255, the max bytes per character is two). On ASCII
3101 * platforms, it's not much trouble to get an accurate
3102 * count of what's needed. But on EBCDIC, the ones that
3103 * need 2 bytes are scattered around, so just use a worst
3104 * case value instead of calculating for that platform. */
3108 /* Only those above 127 require 2 bytes. This may be
3109 * everything in the range, or not */
3110 if (range_min > 127) {
3113 else if (range_max > 127) {
3114 grow += range_max - 127;
3119 /* Subtract 3 for the bytes that were already accounted for
3120 * (min, max, and the hyphen) */
3121 d = save_offset + SvGROW(sv, SvLEN(sv) + grow - 3);
3124 /* Here, we expand out the range. */
3125 if (convert_unicode) {
3128 /* Recall that the min and max are now in Unicode terms, so
3129 * we have to convert each character to its native
3132 for (i = range_min; i <= range_max; i++) {
3133 append_utf8_from_native_byte(LATIN1_TO_NATIVE((U8) i),
3138 for (i = range_min; i <= range_max; i++) {
3139 *d++ = (char)LATIN1_TO_NATIVE((U8) i);
3145 /* Always gets run for ASCII, and sometimes for EBCDIC. */
3149 /* Here, no conversions are necessary, which means that the
3150 * first character in the range is already in 'd' and
3151 * valid, so we can skip overwriting it */
3154 for (i = range_min + 1; i <= range_max; i++) {
3155 append_utf8_from_native_byte((U8) i, (U8 **) &d);
3160 for (i = range_min + 1; i <= range_max; i++) {
3167 /* If the original range extended above 255, add in that portion. */
3168 if (real_range_max) {
3169 *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
3170 *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
3171 if (real_range_max > 0x101)
3172 *d++ = (char) ILLEGAL_UTF8_BYTE;
3173 if (real_range_max > 0x100)
3174 d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
3179 /* mark the range as done, and continue */
3183 non_portable_endpoint = 0;
3187 } /* End of is a range */
3188 } /* End of transliteration. Joins main code after these else's */
3189 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3192 while (s1 >= start && *s1-- == '\\')
3195 in_charclass = TRUE;
3198 else if (*s == ']' && PL_lex_inpat && in_charclass) {
3201 while (s1 >= start && *s1-- == '\\')
3204 in_charclass = FALSE;
3207 /* skip for regexp comments /(?#comment)/, except for the last
3208 * char, which will be done separately.
3209 * Stop on (?{..}) and friends */
3211 else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3213 while (s+1 < send && *s != ')')
3216 else if (!PL_lex_casemods
3217 && ( s[2] == '{' /* This should match regcomp.c */
3218 || (s[2] == '?' && s[3] == '{')))
3224 /* likewise skip #-initiated comments in //x patterns */
3228 && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
3230 while (s+1 < send && *s != '\n')
3234 /* no further processing of single-quoted regex */
3235 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3236 goto default_action;
3238 /* check for embedded arrays
3239 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3241 else if (*s == '@' && s[1]) {
3242 if (UTF ? isIDFIRST_utf8((U8*)s+1) : isWORDCHAR_A(s[1]))
3244 if (strchr(":'{$", s[1]))
3246 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3247 break; /* in regexp, neither @+ nor @- are interpolated */
3250 /* check for embedded scalars. only stop if we're sure it's a
3253 else if (*s == '$') {
3254 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
3256 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3258 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3259 "Possible unintended interpolation of $\\ in regex");
3261 break; /* in regexp, $ might be tail anchor */
3265 /* End of else if chain - OP_TRANS rejoin rest */
3268 if (*s == '\\' && s+1 < send) {
3269 char* e; /* Can be used for ending '}', etc. */
3273 /* warn on \1 - \9 in substitution replacements, but note that \11
3274 * is an octal; and \19 is \1 followed by '9' */
3275 if (PL_lex_inwhat == OP_SUBST
3281 /* diag_listed_as: \%d better written as $%d */
3282 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3287 /* string-change backslash escapes */
3288 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3292 /* In a pattern, process \N, but skip any other backslash escapes.
3293 * This is because we don't want to translate an escape sequence
3294 * into a meta symbol and have the regex compiler use the meta
3295 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3296 * in spite of this, we do have to process \N here while the proper
3297 * charnames handler is in scope. See bugs #56444 and #62056.
3299 * There is a complication because \N in a pattern may also stand
3300 * for 'match a non-nl', and not mean a charname, in which case its
3301 * processing should be deferred to the regex compiler. To be a
3302 * charname it must be followed immediately by a '{', and not look
3303 * like \N followed by a curly quantifier, i.e., not something like
3304 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3306 else if (PL_lex_inpat
3309 || regcurly(s + 1)))
3312 goto default_action;
3318 if ((isALPHANUMERIC(*s)))
3319 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3320 "Unrecognized escape \\%c passed through",
3322 /* default action is to copy the quoted character */
3323 goto default_action;
3326 /* eg. \132 indicates the octal constant 0132 */
3327 case '0': case '1': case '2': case '3':
3328 case '4': case '5': case '6': case '7':
3330 I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3332 uv = grok_oct(s, &len, &flags, NULL);
3334 if (len < 3 && s < send && isDIGIT(*s)
3335 && ckWARN(WARN_MISC))
3337 Perl_warner(aTHX_ packWARN(WARN_MISC),
3338 "%s", form_short_octal_warning(s, len));
3341 goto NUM_ESCAPE_INSERT;
3343 /* eg. \o{24} indicates the octal constant \024 */
3348 bool valid = grok_bslash_o(&s, &uv, &error,
3349 TRUE, /* Output warning */
3350 FALSE, /* Not strict */
3351 TRUE, /* Output warnings for
3358 goto NUM_ESCAPE_INSERT;
3361 /* eg. \x24 indicates the hex constant 0x24 */
3366 bool valid = grok_bslash_x(&s, &uv, &error,
3367 TRUE, /* Output warning */
3368 FALSE, /* Not strict */
3369 TRUE, /* Output warnings for
3379 /* Insert oct or hex escaped character. */
3381 /* Here uv is the ordinal of the next character being added */
3382 if (UVCHR_IS_INVARIANT(uv)) {
3386 if (!has_utf8 && uv > 255) {
3387 /* Might need to recode whatever we have accumulated so
3388 * far if it contains any chars variant in utf8 or
3391 SvCUR_set(sv, d - SvPVX_const(sv));
3394 /* See Note on sizing above. */
3395 sv_utf8_upgrade_flags_grow(
3397 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE
3398 /* Above-latin1 in string
3399 * implies no encoding */
3400 |SV_UTF8_NO_ENCODING,
3401 UVCHR_SKIP(uv) + (STRLEN)(send - s) + 1);
3402 d = SvPVX(sv) + SvCUR(sv);
3407 /* Usually, there will already be enough room in 'sv'
3408 * since such escapes are likely longer than any UTF-8
3409 * sequence they can end up as. This isn't the case on
3410 * EBCDIC where \x{40000000} contains 12 bytes, and the
3411 * UTF-8 for it contains 14. And, we have to allow for
3412 * a trailing NUL. It probably can't happen on ASCII
3413 * platforms, but be safe */
3414 const STRLEN needed = d - SvPVX(sv) + UVCHR_SKIP(uv)
3416 if (UNLIKELY(needed > SvLEN(sv))) {
3417 SvCUR_set(sv, d - SvPVX_const(sv));
3418 d = sv_grow(sv, needed) + SvCUR(sv);
3421 d = (char*)uvchr_to_utf8((U8*)d, uv);
3422 if (PL_lex_inwhat == OP_TRANS
3423 && PL_parser->lex_sub_op)
3425 PL_parser->lex_sub_op->op_private |=
3426 (PL_lex_repl ? OPpTRANS_FROM_UTF
3435 non_portable_endpoint++;
3440 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3441 * named character, like \N{LATIN SMALL LETTER A}, or a named
3442 * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3443 * GRAVE} (except y/// can't handle the latter, croaking). For
3444 * convenience all three forms are referred to as "named
3445 * characters" below.
3447 * For patterns, \N also can mean to match a non-newline. Code
3448 * before this 'switch' statement should already have handled
3449 * this situation, and hence this code only has to deal with
3450 * the named character cases.
3452 * For non-patterns, the named characters are converted to
3453 * their string equivalents. In patterns, named characters are
3454 * not converted to their ultimate forms for the same reasons
3455 * that other escapes aren't. Instead, they are converted to
3456 * the \N{U+...} form to get the value from the charnames that
3457 * is in effect right now, while preserving the fact that it
3458 * was a named character, so that the regex compiler knows
3461 * The structure of this section of code (besides checking for
3462 * errors and upgrading to utf8) is:
3463 * If the named character is of the form \N{U+...}, pass it
3464 * through if a pattern; otherwise convert the code point
3466 * Otherwise must be some \N{NAME}: convert to
3467 * \N{U+c1.c2...} if a pattern; otherwise convert to utf8
3469 * Transliteration is an exception. The conversion to utf8 is
3470 * only done if the code point requires it to be representable.
3472 * Here, 's' points to the 'N'; the test below is guaranteed to
3473 * succeed if we are being called on a pattern, as we already
3474 * know from a test above that the next character is a '{'. A
3475 * non-pattern \N must mean 'named character', which requires
3479 yyerror("Missing braces on \\N{}");
3484 /* If there is no matching '}', it is an error. */
3485 if (! (e = strchr(s, '}'))) {
3486 if (! PL_lex_inpat) {
3487 yyerror("Missing right brace on \\N{}");
3489 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3494 /* Here it looks like a named character */
3496 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3497 s += 2; /* Skip to next char after the 'U+' */
3500 /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3501 /* Check the syntax. */
3504 if (!isXDIGIT(*s)) {
3507 "Invalid hexadecimal number in \\N{U+...}"
3515 else if ((*s == '.' || *s == '_')
3521 /* Pass everything through unchanged.
3522 * +1 is for the '}' */
3523 Copy(orig_s, d, e - orig_s + 1, char);
3524 d += e - orig_s + 1;
3526 else { /* Not a pattern: convert the hex to string */
3527 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3528 | PERL_SCAN_SILENT_ILLDIGIT
3529 | PERL_SCAN_DISALLOW_PREFIX;
3531 uv = grok_hex(s, &len, &flags, NULL);
3532 if (len == 0 || (len != (STRLEN)(e - s)))
3535 /* For non-tr///, if the destination is not in utf8,
3536 * unconditionally recode it to be so. This is
3537 * because \N{} implies Unicode semantics, and scalars
3538 * have to be in utf8 to guarantee those semantics.
3539 * tr/// doesn't care about Unicode rules, so no need
3540 * there to upgrade to UTF-8 for small enough code
3542 if (! has_utf8 && ( uv > 0xFF
3543 || PL_lex_inwhat != OP_TRANS))
3545 SvCUR_set(sv, d - SvPVX_const(sv));
3548 /* See Note on sizing above. */
3549 sv_utf8_upgrade_flags_grow(
3551 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3552 OFFUNISKIP(uv) + (STRLEN)(send - e) + 1);
3553 d = SvPVX(sv) + SvCUR(sv);
3557 /* Add the (Unicode) code point to the output. */
3558 if (! has_utf8 || OFFUNI_IS_INVARIANT(uv)) {
3559 *d++ = (char) LATIN1_TO_NATIVE(uv);
3562 d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3566 else /* Here is \N{NAME} but not \N{U+...}. */
3567 if ((res = get_and_check_backslash_N_name(s, e)))
3570 const char *str = SvPV_const(res, len);
3573 if (! len) { /* The name resolved to an empty string */
3574 Copy("\\N{}", d, 4, char);
3578 /* In order to not lose information for the regex
3579 * compiler, pass the result in the specially made
3580 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3581 * the code points in hex of each character
3582 * returned by charnames */
3584 const char *str_end = str + len;
3585 const STRLEN off = d - SvPVX_const(sv);
3587 if (! SvUTF8(res)) {
3588 /* For the non-UTF-8 case, we can determine the
3589 * exact length needed without having to parse
3590 * through the string. Each character takes up
3591 * 2 hex digits plus either a trailing dot or
3593 const char initial_text[] = "\\N{U+";
3594 const STRLEN initial_len = sizeof(initial_text)
3596 d = off + SvGROW(sv, off
3599 /* +1 for trailing NUL */
3602 + (STRLEN)(send - e));
3603 Copy(initial_text, d, initial_len, char);
3605 while (str < str_end) {
3608 my_snprintf(hex_string,
3612 /* The regex compiler is
3613 * expecting Unicode, not
3615 NATIVE_TO_LATIN1(*str));
3616 PERL_MY_SNPRINTF_POST_GUARD(len,
3617 sizeof(hex_string));
3618 Copy(hex_string, d, 3, char);
3622 d--; /* Below, we will overwrite the final
3623 dot with a right brace */
3626 STRLEN char_length; /* cur char's byte length */
3628 /* and the number of bytes after this is
3629 * translated into hex digits */
3630 STRLEN output_length;
3632 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3633 * for max('U+', '.'); and 1 for NUL */
3634 char hex_string[2 * UTF8_MAXBYTES + 5];
3636 /* Get the first character of the result. */
3637 U32 uv = utf8n_to_uvchr((U8 *) str,
3641 /* Convert first code point to Unicode hex,
3642 * including the boiler plate before it. */
3644 my_snprintf(hex_string, sizeof(hex_string),
3646 (unsigned int) NATIVE_TO_UNI(uv));
3648 /* Make sure there is enough space to hold it */
3649 d = off + SvGROW(sv, off
3651 + (STRLEN)(send - e)
3652 + 2); /* '}' + NUL */
3654 Copy(hex_string, d, output_length, char);
3657 /* For each subsequent character, append dot and
3658 * its Unicode code point in hex */
3659 while ((str += char_length) < str_end) {
3660 const STRLEN off = d - SvPVX_const(sv);
3661 U32 uv = utf8n_to_uvchr((U8 *) str,
3666 my_snprintf(hex_string,
3669 (unsigned int) NATIVE_TO_UNI(uv));
3671 d = off + SvGROW(sv, off
3673 + (STRLEN)(send - e)
3674 + 2); /* '}' + NUL */
3675 Copy(hex_string, d, output_length, char);
3680 *d++ = '}'; /* Done. Add the trailing brace */
3683 else { /* Here, not in a pattern. Convert the name to a
3686 if (PL_lex_inwhat == OP_TRANS) {
3687 str = SvPV_const(res, len);
3688 if (len > ((SvUTF8(res))
3692 yyerror(Perl_form(aTHX_
3693 "%.*s must not be a named sequence"
3694 " in transliteration operator",
3695 /* +1 to include the "}" */
3696 (int) (e + 1 - start), start));
3697 goto end_backslash_N;
3700 else if (! SvUTF8(res)) {
3701 /* Make sure \N{} return is UTF-8. This is because
3702 * \N{} implies Unicode semantics, and scalars have
3703 * to be in utf8 to guarantee those semantics; but
3704 * not needed in tr/// */
3705 sv_utf8_upgrade_flags(res, SV_UTF8_NO_ENCODING);
3706 str = SvPV_const(res, len);
3709 /* Upgrade destination to be utf8 if this new
3711 if (! has_utf8 && SvUTF8(res)) {
3712 SvCUR_set(sv, d - SvPVX_const(sv));
3715 /* See Note on sizing above. */
3716 sv_utf8_upgrade_flags_grow(sv,
3717 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3718 len + (STRLEN)(send - s) + 1);
3719 d = SvPVX(sv) + SvCUR(sv);
3721 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3723 /* See Note on sizing above. (NOTE: SvCUR() is not
3724 * set correctly here). */
3725 const STRLEN off = d - SvPVX_const(sv);
3726 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3728 Copy(str, d, len, char);
3734 } /* End \N{NAME} */
3738 backslash_N++; /* \N{} is defined to be Unicode */
3740 s = e + 1; /* Point to just after the '}' */
3743 /* \c is a control character */
3747 *d++ = grok_bslash_c(*s++, 1);
3750 yyerror("Missing control char name in \\c");
3753 non_portable_endpoint++;
3757 /* printf-style backslashes, formfeeds, newlines, etc */
3783 } /* end if (backslash) */
3786 /* Just copy the input to the output, though we may have to convert
3789 * If the input has the same representation in UTF-8 as not, it will be
3790 * a single byte, and we don't care about UTF8ness; or if neither
3791 * source nor output is UTF-8, just copy the byte */
3792 if (NATIVE_BYTE_IS_INVARIANT((U8)(*s)) || (! this_utf8 && ! has_utf8))
3799 /* One might think that it is wasted effort in the case of the
3800 * source being utf8 (this_utf8 == TRUE) to take the next character
3801 * in the source, convert it to an unsigned value, and then convert
3802 * it back again. But the source has not been validated here. The
3803 * routine that does the conversion checks for errors like
3806 const UV nextuv = (this_utf8)
3807 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
3809 const STRLEN need = UVCHR_SKIP(nextuv);
3811 SvCUR_set(sv, d - SvPVX_const(sv));
3814 /* See Note on sizing above. */
3815 sv_utf8_upgrade_flags_grow(sv,
3816 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3817 need + (STRLEN)(send - s) + 1);
3818 d = SvPVX(sv) + SvCUR(sv);
3820 } else if (need > len) {
3821 /* encoded value larger than old, may need extra space (NOTE:
3822 * SvCUR() is not set correctly here). See Note on sizing
3824 const STRLEN off = d - SvPVX_const(sv);
3825 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3829 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3831 } /* while loop to process each character */
3833 /* terminate the string and set up the sv */
3835 SvCUR_set(sv, d - SvPVX_const(sv));
3836 if (SvCUR(sv) >= SvLEN(sv))
3837 Perl_croak(aTHX_ "panic: constant overflowed allocated space, %" UVuf
3838 " >= %" UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3843 if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
3844 PL_parser->lex_sub_op->op_private |=
3845 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3849 /* shrink the sv if we allocated more than we used */
3850 if (SvCUR(sv) + 5 < SvLEN(sv)) {
3851 SvPV_shrink_to_cur(sv);
3854 /* return the substring (via pl_yylval) only if we parsed anything */
3857 for (; s2 < s; s2++) {
3859 COPLINE_INC_WITH_HERELINES;
3861 SvREFCNT_inc_simple_void_NN(sv);
3862 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
3863 && ! PL_parser->lex_re_reparsing)
3865 const char *const key = PL_lex_inpat ? "qr" : "q";
3866 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3870 if (PL_lex_inwhat == OP_TRANS) {
3873 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3876 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3884 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3887 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
3889 LEAVE_with_name("scan_const");
3894 * Returns TRUE if there's more to the expression (e.g., a subscript),
3897 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3899 * ->[ and ->{ return TRUE
3900 * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
3901 * { and [ outside a pattern are always subscripts, so return TRUE
3902 * if we're outside a pattern and it's not { or [, then return FALSE
3903 * if we're in a pattern and the first char is a {
3904 * {4,5} (any digits around the comma) returns FALSE
3905 * if we're in a pattern and the first char is a [
3907 * [SOMETHING] has a funky algorithm to decide whether it's a
3908 * character class or not. It has to deal with things like
3909 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3910 * anything else returns TRUE
3913 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3916 S_intuit_more(pTHX_ char *s)
3918 PERL_ARGS_ASSERT_INTUIT_MORE;
3920 if (PL_lex_brackets)
3922 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3924 if (*s == '-' && s[1] == '>'
3925 && FEATURE_POSTDEREF_QQ_IS_ENABLED
3926 && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
3927 ||(s[2] == '@' && strchr("*[{",s[3])) ))
3929 if (*s != '{' && *s != '[')
3934 /* In a pattern, so maybe we have {n,m}. */
3942 /* On the other hand, maybe we have a character class */
3945 if (*s == ']' || *s == '^')
3948 /* this is terrifying, and it works */
3951 const char * const send = strchr(s,']');
3952 unsigned char un_char, last_un_char;
3953 char tmpbuf[sizeof PL_tokenbuf * 4];
3955 if (!send) /* has to be an expression */
3957 weight = 2; /* let's weigh the evidence */
3961 else if (isDIGIT(*s)) {
3963 if (isDIGIT(s[1]) && s[2] == ']')
3969 Zero(seen,256,char);
3971 for (; s < send; s++) {
3972 last_un_char = un_char;
3973 un_char = (unsigned char)*s;
3978 weight -= seen[un_char] * 10;
3979 if (isWORDCHAR_lazy_if(s+1,UTF)) {
3981 char *tmp = PL_bufend;
3982 PL_bufend = (char*)send;
3983 scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
3985 len = (int)strlen(tmpbuf);
3986 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3987 UTF ? SVf_UTF8 : 0, SVt_PV))
3994 && strchr("[#!%*<>()-=",s[1]))
3996 if (/*{*/ strchr("])} =",s[2]))
4005 if (strchr("wds]",s[1]))
4007 else if (seen[(U8)'\''] || seen[(U8)'"'])
4009 else if (strchr("rnftbxcav",s[1]))
4011 else if (isDIGIT(s[1])) {
4013 while (s[1] && isDIGIT(s[1]))
4023 if (strchr("aA01! ",last_un_char))
4025 if (strchr("zZ79~",s[1]))
4027 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4028 weight -= 5; /* cope with negative subscript */
4031 if (!isWORDCHAR(last_un_char)
4032 && !(last_un_char == '$' || last_un_char == '@'
4033 || last_un_char == '&')
4034 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4038 if (keyword(d, s - d, 0))
4041 if (un_char == last_un_char + 1)
4043 weight -= seen[un_char];
4048 if (weight >= 0) /* probably a character class */
4058 * Does all the checking to disambiguate
4060 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
4061 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4063 * First argument is the stuff after the first token, e.g. "bar".
4065 * Not a method if foo is a filehandle.
4066 * Not a method if foo is a subroutine prototyped to take a filehandle.
4067 * Not a method if it's really "Foo $bar"
4068 * Method if it's "foo $bar"
4069 * Not a method if it's really "print foo $bar"
4070 * Method if it's really "foo package::" (interpreted as package->foo)
4071 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4072 * Not a method if bar is a filehandle or package, but is quoted with
4077 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
4079 char *s = start + (*start == '$');
4080 char tmpbuf[sizeof PL_tokenbuf];
4083 /* Mustn't actually add anything to a symbol table.
4084 But also don't want to "initialise" any placeholder
4085 constants that might already be there into full
4086 blown PVGVs with attached PVCV. */
4088 ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
4090 PERL_ARGS_ASSERT_INTUIT_METHOD;
4092 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4094 if (cv && SvPOK(cv)) {
4095 const char *proto = CvPROTO(cv);
4097 while (*proto && (isSPACE(*proto) || *proto == ';'))
4104 if (*start == '$') {
4105 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
4106 || isUPPER(*PL_tokenbuf))
4111 return *s == '(' ? FUNCMETH : METHOD;
4114 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4115 /* start is the beginning of the possible filehandle/object,
4116 * and s is the end of it
4117 * tmpbuf is a copy of it (but with single quotes as double colons)
4120 if (!keyword(tmpbuf, len, 0)) {
4121 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4126 indirgv = gv_fetchpvn_flags(tmpbuf, len,
4127 GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
4129 if (indirgv && SvTYPE(indirgv) != SVt_NULL
4130 && (!isGV(indirgv) || GvCVu(indirgv)))
4132 /* filehandle or package name makes it a method */
4133 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4135 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4136 return 0; /* no assumptions -- "=>" quotes bareword */
4138 NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0,
4139 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4140 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4142 force_next(BAREWORD);
4144 return *s == '(' ? FUNCMETH : METHOD;
4150 /* Encoded script support. filter_add() effectively inserts a
4151 * 'pre-processing' function into the current source input stream.
4152 * Note that the filter function only applies to the current source file
4153 * (e.g., it will not affect files 'require'd or 'use'd by this one).
4155 * The datasv parameter (which may be NULL) can be used to pass
4156 * private data to this instance of the filter. The filter function
4157 * can recover the SV using the FILTER_DATA macro and use it to
4158 * store private buffers and state information.
4160 * The supplied datasv parameter is upgraded to a PVIO type
4161 * and the IoDIRP/IoANY field is used to store the function pointer,
4162 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4163 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4164 * private use must be set using malloc'd pointers.
4168 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4176 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4177 Perl_croak(aTHX_ "Source filters apply only to byte streams");
4179 if (!PL_rsfp_filters)
4180 PL_rsfp_filters = newAV();
4183 SvUPGRADE(datasv, SVt_PVIO);
4184 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4185 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4186 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4187 FPTR2DPTR(void *, IoANY(datasv)),
4188 SvPV_nolen(datasv)));
4189 av_unshift(PL_rsfp_filters, 1);
4190 av_store(PL_rsfp_filters, 0, datasv) ;
4192 !PL_parser->filtered
4193 && PL_parser->lex_flags & LEX_EVALBYTES
4194 && PL_bufptr < PL_bufend
4196 const char *s = PL_bufptr;
4197 while (s < PL_bufend) {
4199 SV *linestr = PL_parser->linestr;
4200 char *buf = SvPVX(linestr);
4201 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4202 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4203 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4204 STRLEN const linestart_pos = PL_parser->linestart - buf;
4205 STRLEN const last_uni_pos =
4206 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4207 STRLEN const last_lop_pos =
4208 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4209 av_push(PL_rsfp_filters, linestr);
4210 PL_parser->linestr =
4211 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4212 buf = SvPVX(PL_parser->linestr);
4213 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4214 PL_parser->bufptr = buf + bufptr_pos;
4215 PL_parser->oldbufptr = buf + oldbufptr_pos;
4216 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4217 PL_parser->linestart = buf + linestart_pos;
4218 if (PL_parser->last_uni)
4219 PL_parser->last_uni = buf + last_uni_pos;
4220 if (PL_parser->last_lop)
4221 PL_parser->last_lop = buf + last_lop_pos;
4222 SvLEN(linestr) = SvCUR(linestr);
4223 SvCUR(linestr) = s-SvPVX(linestr);
4224 PL_parser->filtered = 1;
4234 /* Delete most recently added instance of this filter function. */
4236 Perl_filter_del(pTHX_ filter_t funcp)
4240 PERL_ARGS_ASSERT_FILTER_DEL;
4243 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4244 FPTR2DPTR(void*, funcp)));
4246 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4248 /* if filter is on top of stack (usual case) just pop it off */
4249 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4250 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4251 sv_free(av_pop(PL_rsfp_filters));
4255 /* we need to search for the correct entry and clear it */
4256 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4260 /* Invoke the idxth filter function for the current rsfp. */
4261 /* maxlen 0 = read one text line */
4263 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4267 /* This API is bad. It should have been using unsigned int for maxlen.
4268 Not sure if we want to change the API, but if not we should sanity
4269 check the value here. */
4270 unsigned int correct_length = maxlen < 0 ? PERL_INT_MAX : maxlen;
4272 PERL_ARGS_ASSERT_FILTER_READ;
4274 if (!PL_parser || !PL_rsfp_filters)
4276 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
4277 /* Provide a default input filter to make life easy. */
4278 /* Note that we append to the line. This is handy. */
4279 DEBUG_P(PerlIO_printf(Perl_debug_log,
4280 "filter_read %d: from rsfp\n", idx));
4281 if (correct_length) {
4284 const int old_len = SvCUR(buf_sv);
4286 /* ensure buf_sv is large enough */
4287 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4288 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4289 correct_length)) <= 0) {
4290 if (PerlIO_error(PL_rsfp))
4291 return -1; /* error */
4293 return 0 ; /* end of file */
4295 SvCUR_set(buf_sv, old_len + len) ;
4296 SvPVX(buf_sv)[old_len + len] = '\0';
4299 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4300 if (PerlIO_error(PL_rsfp))
4301 return -1; /* error */
4303 return 0 ; /* end of file */
4306 return SvCUR(buf_sv);
4308 /* Skip this filter slot if filter has been deleted */
4309 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4310 DEBUG_P(PerlIO_printf(Perl_debug_log,
4311 "filter_read %d: skipped (filter deleted)\n",
4313 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4315 if (SvTYPE(datasv) != SVt_PVIO) {
4316 if (correct_length) {
4318 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4319 if (!remainder) return 0; /* eof */
4320 if (correct_length > remainder) correct_length = remainder;
4321 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4322 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4325 const char *s = SvEND(datasv);
4326 const char *send = SvPVX(datasv) + SvLEN(datasv);
4334 if (s == send) return 0; /* eof */
4335 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4336 SvCUR_set(datasv, s-SvPVX(datasv));
4338 return SvCUR(buf_sv);
4340 /* Get function pointer hidden within datasv */
4341 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4342 DEBUG_P(PerlIO_printf(Perl_debug_log,
4343 "filter_read %d: via function %p (%s)\n",
4344 idx, (void*)datasv, SvPV_nolen_const(datasv)));
4345 /* Call function. The function is expected to */
4346 /* call "FILTER_READ(idx+1, buf_sv)" first. */
4347 /* Return: <0:error, =0:eof, >0:not eof */
4348 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4352 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4354 PERL_ARGS_ASSERT_FILTER_GETS;
4356 #ifdef PERL_CR_FILTER
4357 if (!PL_rsfp_filters) {
4358 filter_add(S_cr_textfilter,NULL);
4361 if (PL_rsfp_filters) {
4363 SvCUR_set(sv, 0); /* start with empty line */
4364 if (FILTER_READ(0, sv, 0) > 0)
4365 return ( SvPVX(sv) ) ;
4370 return (sv_gets(sv, PL_rsfp, append));
4374 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4378 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4380 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4384 && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
4385 && (gv = gv_fetchpvn_flags(pkgname,
4387 ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4389 return GvHV(gv); /* Foo:: */
4392 /* use constant CLASS => 'MyClass' */
4393 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4394 if (gv && GvCV(gv)) {
4395 SV * const sv = cv_const_sv(GvCV(gv));
4397 return gv_stashsv(sv, 0);
4400 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4405 S_tokenize_use(pTHX_ int is_use, char *s) {
4406 PERL_ARGS_ASSERT_TOKENIZE_USE;
4408 if (PL_expect != XSTATE)
4409 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4410 is_use ? "use" : "no"));
4413 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4414 s = force_version(s, TRUE);
4415 if (*s == ';' || *s == '}'
4416 || (s = skipspace(s), (*s == ';' || *s == '}'))) {
4417 NEXTVAL_NEXTTOKE.opval = NULL;
4418 force_next(BAREWORD);
4420 else if (*s == 'v') {
4421 s = force_word(s,BAREWORD,FALSE,TRUE);
4422 s = force_version(s, FALSE);
4426 s = force_word(s,BAREWORD,FALSE,TRUE);
4427 s = force_version(s, FALSE);
4429 pl_yylval.ival = is_use;
4433 static const char* const exp_name[] =
4434 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4435 "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
4436 "SIGVAR", "TERMORDORDOR"
4440 #define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
4442 S_word_takes_any_delimiter(char *p, STRLEN len)
4444 return (len == 1 && strchr("msyq", p[0]))
4446 && ((p[0] == 't' && p[1] == 'r')
4447 || (p[0] == 'q' && strchr("qwxr", p[1]))));
4451 S_check_scalar_slice(pTHX_ char *s)
4454 while (*s == ' ' || *s == '\t') s++;
4455 if (*s == 'q' && s[1] == 'w'
4456 && !isWORDCHAR_lazy_if(s+2,UTF))
4458 while (*s && (isWORDCHAR_lazy_if(s,UTF) || strchr(" \t$#+-'\"", *s)))
4459 s += UTF ? UTF8SKIP(s) : 1;
4460 if (*s == '}' || *s == ']')
4461 pl_yylval.ival = OPpSLICEWARNING;
4464 #define lex_token_boundary() S_lex_token_boundary(aTHX)
4466 S_lex_token_boundary(pTHX)
4468 PL_oldoldbufptr = PL_oldbufptr;
4469 PL_oldbufptr = PL_bufptr;
4472 #define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s)
4474 S_vcs_conflict_marker(pTHX_ char *s)
4476 lex_token_boundary();
4478 yyerror("Version control conflict marker");
4479 while (s < PL_bufend && *s != '\n')
4487 Works out what to call the token just pulled out of the input
4488 stream. The yacc parser takes care of taking the ops we return and
4489 stitching them into a tree.
4492 The type of the next token
4495 Check if we have already built the token; if so, use it.
4496 Switch based on the current state:
4497 - if we have a case modifier in a string, deal with that
4498 - handle other cases of interpolation inside a string
4499 - scan the next line if we are inside a format
4500 In the normal state, switch on the next character:
4502 if alphabetic, go to key lookup
4503 unrecognized character - croak
4504 - 0/4/26: handle end-of-line or EOF
4505 - cases for whitespace
4506 - \n and #: handle comments and line numbers
4507 - various operators, brackets and sigils
4510 - 'v': vstrings (or go to key lookup)
4511 - 'x' repetition operator (or go to key lookup)
4512 - other ASCII alphanumerics (key lookup begins here):
4515 scan built-in keyword (but do nothing with it yet)
4516 check for statement label
4517 check for lexical subs
4518 goto just_a_word if there is one
4519 see whether built-in keyword is overridden
4520 switch on keyword number:
4521 - default: just_a_word:
4522 not a built-in keyword; handle bareword lookup
4523 disambiguate between method and sub call
4524 fall back to bareword
4525 - cases for built-in keywords
4533 char *s = PL_bufptr;
4537 const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil);
4541 /* orig_keyword, gvp, and gv are initialized here because
4542 * jump to the label just_a_word_zero can bypass their
4543 * initialization later. */
4544 I32 orig_keyword = 0;
4549 SV* tmp = newSVpvs("");
4550 PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n",
4551 (IV)CopLINE(PL_curcop),
4552 lex_state_names[PL_lex_state],
4553 exp_name[PL_expect],
4554 pv_display(tmp, s, strlen(s), 0, 60));
4558 /* when we've already built the next token, just pull it out of the queue */
4561 pl_yylval = PL_nextval[PL_nexttoke];
4564 next_type = PL_nexttype[PL_nexttoke];
4565 if (next_type & (7<<24)) {
4566 if (next_type & (1<<24)) {
4567 if (PL_lex_brackets > 100)
4568 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4569 PL_lex_brackstack[PL_lex_brackets++] =
4570 (char) ((next_type >> 16) & 0xff);
4572 if (next_type & (2<<24))
4573 PL_lex_allbrackets++;
4574 if (next_type & (4<<24))
4575 PL_lex_allbrackets--;
4576 next_type &= 0xffff;
4578 return REPORT(next_type == 'p' ? pending_ident() : next_type);
4582 switch (PL_lex_state) {
4584 case LEX_INTERPNORMAL:
4587 /* interpolated case modifiers like \L \U, including \Q and \E.
4588 when we get here, PL_bufptr is at the \
4590 case LEX_INTERPCASEMOD:
4592 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4594 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4595 PL_bufptr, PL_bufend, *PL_bufptr);
4597 /* handle \E or end of string */
4598 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4600 if (PL_lex_casemods) {
4601 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4602 PL_lex_casestack[PL_lex_casemods] = '\0';
4604 if (PL_bufptr != PL_bufend
4605 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4606 || oldmod == 'F')) {
4608 PL_lex_state = LEX_INTERPCONCAT;
4610 PL_lex_allbrackets--;
4613 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4614 /* Got an unpaired \E */
4615 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4616 "Useless use of \\E");
4618 if (PL_bufptr != PL_bufend)
4620 PL_lex_state = LEX_INTERPCONCAT;
4624 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4625 "### Saw case modifier\n"); });
4627 if (s[1] == '\\' && s[2] == 'E') {
4629 PL_lex_state = LEX_INTERPCONCAT;
4634 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4635 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
4636 if ((*s == 'L' || *s == 'U' || *s == 'F')
4637 && (strpbrk(PL_lex_casestack, "LUF")))
4639 PL_lex_casestack[--PL_lex_casemods] = '\0';
4640 PL_lex_allbrackets--;
4643 if (PL_lex_casemods > 10)
4644 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4645 PL_lex_casestack[PL_lex_casemods++] = *s;
4646 PL_lex_casestack[PL_lex_casemods] = '\0';
4647 PL_lex_state = LEX_INTERPCONCAT;
4648 NEXTVAL_NEXTTOKE.ival = 0;
4649 force_next((2<<24)|'(');
4651 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4653 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4655 NEXTVAL_NEXTTOKE.ival = OP_LC;
4657 NEXTVAL_NEXTTOKE.ival = OP_UC;
4659 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4661 NEXTVAL_NEXTTOKE.ival = OP_FC;
4663 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4667 if (PL_lex_starts) {
4670 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4671 if (PL_lex_casemods == 1 && PL_lex_inpat)
4674 AopNOASSIGN(OP_CONCAT);
4680 case LEX_INTERPPUSH:
4681 return REPORT(sublex_push());
4683 case LEX_INTERPSTART:
4684 if (PL_bufptr == PL_bufend)
4685 return REPORT(sublex_done());
4686 DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4687 "### Interpolated variable\n"); });
4689 /* for /@a/, we leave the joining for the regex engine to do
4690 * (unless we're within \Q etc) */
4691 PL_lex_dojoin = (*PL_bufptr == '@'
4692 && (!PL_lex_inpat || PL_lex_casemods));
4693 PL_lex_state = LEX_INTERPNORMAL;
4694 if (PL_lex_dojoin) {
4695 NEXTVAL_NEXTTOKE.ival = 0;
4697 force_ident("\"", '$');
4698 NEXTVAL_NEXTTOKE.ival = 0;
4700 NEXTVAL_NEXTTOKE.ival = 0;
4701 force_next((2<<24)|'(');
4702 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
4705 /* Convert (?{...}) and friends to 'do {...}' */
4706 if (PL_lex_inpat && *PL_bufptr == '(') {
4707 PL_parser->lex_shared->re_eval_start = PL_bufptr;
4709 if (*PL_bufptr != '{')
4711 PL_expect = XTERMBLOCK;
4715 if (PL_lex_starts++) {
4717 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4718 if (!PL_lex_casemods && PL_lex_inpat)
4721 AopNOASSIGN(OP_CONCAT);
4725 case LEX_INTERPENDMAYBE:
4726 if (intuit_more(PL_bufptr)) {
4727 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
4733 if (PL_lex_dojoin) {
4734 const U8 dojoin_was = PL_lex_dojoin;
4735 PL_lex_dojoin = FALSE;
4736 PL_lex_state = LEX_INTERPCONCAT;
4737 PL_lex_allbrackets--;
4738 return REPORT(dojoin_was == 1 ? (int)')' : (int)POSTJOIN);
4740 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4741 && SvEVALED(PL_lex_repl))
4743 if (PL_bufptr != PL_bufend)
4744 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4747 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
4748 re_eval_str. If the here-doc body’s length equals the previous
4749 value of re_eval_start, re_eval_start will now be null. So
4750 check re_eval_str as well. */
4751 if (PL_parser->lex_shared->re_eval_start
4752 || PL_parser->lex_shared->re_eval_str) {
4754 if (*PL_bufptr != ')')
4755 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
4757 /* having compiled a (?{..}) expression, return the original
4758 * text too, as a const */
4759 if (PL_parser->lex_shared->re_eval_str) {
4760 sv = PL_parser->lex_shared->re_eval_str;
4761 PL_parser->lex_shared->re_eval_str = NULL;
4763 PL_bufptr - PL_parser->lex_shared->re_eval_start);
4764 SvPV_shrink_to_cur(sv);
4766 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
4767 PL_bufptr - PL_parser->lex_shared->re_eval_start);
4768 NEXTVAL_NEXTTOKE.opval =
4769 newSVOP(OP_CONST, 0,
4772 PL_parser->lex_shared->re_eval_start = NULL;
4778 case LEX_INTERPCONCAT:
4780 if (PL_lex_brackets)
4781 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
4782 (long) PL_lex_brackets);
4784 if (PL_bufptr == PL_bufend)
4785 return REPORT(sublex_done());
4787 /* m'foo' still needs to be parsed for possible (?{...}) */
4788 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
4789 SV *sv = newSVsv(PL_linestr);
4791 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
4795 s = scan_const(PL_bufptr);
4797 PL_lex_state = LEX_INTERPCASEMOD;
4799 PL_lex_state = LEX_INTERPSTART;
4802 if (s != PL_bufptr) {
4803 NEXTVAL_NEXTTOKE = pl_yylval;
4806 if (PL_lex_starts++) {
4807 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4808 if (!PL_lex_casemods && PL_lex_inpat)
4811 AopNOASSIGN(OP_CONCAT);
4821 s = scan_formline(PL_bufptr);
4822 if (!PL_lex_formbrack)
4831 /* We really do *not* want PL_linestr ever becoming a COW. */
4832 assert (!SvIsCOW(PL_linestr));
4834 PL_oldoldbufptr = PL_oldbufptr;
4836 PL_parser->saw_infix_sigil = 0;
4838 if (PL_in_my == KEY_sigvar) {
4839 /* we expect the sigil and optional var name part of a
4840 * signature element here. Since a '$' is not necessarily
4841 * followed by a var name, handle it specially here; the general
4842 * yylex code would otherwise try to interpret whatever follows
4843 * as a var; e.g. ($, ...) would be seen as the var '$,'
4850 PL_bufptr = s; /* for error reporting */
4855 /* spot stuff that looks like an prototype */
4856 if (strchr("$:@%&*;\\[]", *s)) {
4857 yyerror("Illegal character following sigil in a subroutine signature");
4860 /* '$#' is banned, while '$ # comment' isn't */
4862 yyerror("'#' not allowed immediately following a sigil in a subroutine signature");
4866 if (isIDFIRST_lazy_if(s, UTF)) {
4867 char *dest = PL_tokenbuf + 1;
4868 /* read var name, including sigil, into PL_tokenbuf */
4869 PL_tokenbuf[0] = sigil;
4870 parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
4871 0, cBOOL(UTF), FALSE);
4873 assert(PL_tokenbuf[1]); /* we have a variable name */
4874 NEXTVAL_NEXTTOKE.ival = sigil;
4875 force_next('p'); /* force a signature pending identifier */
4879 PL_expect = XOPERATOR;
4885 case ',': /* handle ($a,,$b) */
4890 yyerror("A signature parameter must start with '$', '@' or '%'");
4891 /* very crude error recovery: skip to likely next signature
4893 while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
4904 if (! isUTF8_CHAR((U8 *) s, (U8 *) PL_bufend)) {
4906 SAVESPTR(PL_warnhook);
4907 PL_warnhook = PERL_WARNHOOK_FATAL;
4908 utf8n_to_uvchr((U8*)s, PL_bufend-s, NULL, 0);
4911 if (isIDFIRST_utf8((U8*)s)) {
4915 else if (isALNUMC(*s)) {
4919 SV *dsv = newSVpvs_flags("", SVs_TEMP);
4922 STRLEN skiplen = UTF8SKIP(s);
4923 STRLEN stravail = PL_bufend - s;
4924 c = sv_uni_display(dsv, newSVpvn_flags(s,
4925 skiplen > stravail ? stravail : skiplen,
4926 SVs_TEMP | SVf_UTF8),
4927 10, UNI_DISPLAY_ISPRINT);
4930 c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
4932 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4933 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4934 d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)PL_linestart) : s - UNRECOGNIZED_PRECEDE_COUNT;
4938 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
4939 UTF8fARG(UTF, (s - d), d),
4944 goto fake_eof; /* emulate EOF on ^D or ^Z */
4946 if ((!PL_rsfp || PL_lex_inwhat)
4947 && (!PL_parser->filtered || s+1 < PL_bufend)) {
4951 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
4953 yyerror((const char *)
4955 ? "Format not terminated"
4956 : "Missing right curly or square bracket"));
4958 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4959 "### Tokener got EOF\n");
4963 if (s++ < PL_bufend)
4964 goto retry; /* ignore stray nulls */
4967 if (!PL_in_eval && !PL_preambled) {
4968 PL_preambled = TRUE;
4970 /* Generate a string of Perl code to load the debugger.
4971 * If PERL5DB is set, it will return the contents of that,
4972 * otherwise a compile-time require of perl5db.pl. */
4974 const char * const pdb = PerlEnv_getenv("PERL5DB");
4977 sv_setpv(PL_linestr, pdb);
4978 sv_catpvs(PL_linestr,";");
4980 SETERRNO(0,SS_NORMAL);
4981 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4983 PL_parser->preambling = CopLINE(PL_curcop);
4985 SvPVCLEAR(PL_linestr);
4986 if (PL_preambleav) {
4987 SV **svp = AvARRAY(PL_preambleav);
4988 SV **const end = svp + AvFILLp(PL_preambleav);
4990 sv_catsv(PL_linestr, *svp);
4992 sv_catpvs(PL_linestr, ";");
4994 sv_free(MUTABLE_SV(PL_preambleav));
4995 PL_preambleav = NULL;
4998 sv_catpvs(PL_linestr,
4999 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
5000 if (PL_minus_n || PL_minus_p) {
5001 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
5003 sv_catpvs(PL_linestr,"chomp;");
5006 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
5007 || *PL_splitstr == '"')
5008 && strchr(PL_splitstr + 1, *PL_splitstr))
5009 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
5011 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
5012 bytes can be used as quoting characters. :-) */
5013 const char *splits = PL_splitstr;
5014 sv_catpvs(PL_linestr, "our @F=split(q\0");
5017 if (*splits == '\\')
5018 sv_catpvn(PL_linestr, splits, 1);
5019 sv_catpvn(PL_linestr, splits, 1);
5020 } while (*splits++);
5021 /* This loop will embed the trailing NUL of
5022 PL_linestr as the last thing it does before
5024 sv_catpvs(PL_linestr, ");");
5028 sv_catpvs(PL_linestr,"our @F=split(' ');");
5031 sv_catpvs(PL_linestr, "\n");
5032 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5033 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5034 PL_last_lop = PL_last_uni = NULL;
5035 if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
5036 update_debugger_info(PL_linestr, NULL, 0);
5041 bof = PL_rsfp ? TRUE : FALSE;
5044 fake_eof = LEX_FAKE_EOF;
5046 PL_bufptr = PL_bufend;
5047 COPLINE_INC_WITH_HERELINES;
5048 if (!lex_next_chunk(fake_eof)) {
5049 CopLINE_dec(PL_curcop);
5051 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
5053 CopLINE_dec(PL_curcop);
5055 /* If it looks like the start of a BOM or raw UTF-16,
5056 * check if it in fact is. */
5059 || *(U8*)s == BOM_UTF8_FIRST_BYTE
5063 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
5064 bof = (offset == (Off_t)SvCUR(PL_linestr));
5065 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5066 /* offset may include swallowed CR */
5068 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
5071 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5072 s = swallow_bom((U8*)s);
5075 if (PL_parser->in_pod) {
5076 /* Incest with pod. */
5077 if (*s == '=' && strEQs(s, "=cut") && !isALPHA(s[4])) {
5078 SvPVCLEAR(PL_linestr);
5079 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5080 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5081 PL_last_lop = PL_last_uni = NULL;
5082 PL_parser->in_pod = 0;
5085 if (PL_rsfp || PL_parser->filtered)
5087 } while (PL_parser->in_pod);
5088 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5089 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5090 PL_last_lop = PL_last_uni = NULL;
5091 if (CopLINE(PL_curcop) == 1) {
5092 while (s < PL_bufend && isSPACE(*s))
5094 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5098 if (*s == '#' && *(s+1) == '!')
5100 #ifdef ALTERNATE_SHEBANG
5102 static char const as[] = ALTERNATE_SHEBANG;
5103 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5104 d = s + (sizeof(as) - 1);
5106 #endif /* ALTERNATE_SHEBANG */
5115 while (*d && !isSPACE(*d))
5119 #ifdef ARG_ZERO_IS_SCRIPT
5120 if (ipathend > ipath) {
5122 * HP-UX (at least) sets argv[0] to the script name,
5123 * which makes $^X incorrect. And Digital UNIX and Linux,
5124 * at least, set argv[0] to the basename of the Perl
5125 * interpreter. So, having found "#!", we'll set it right.
5127 SV* copfilesv = CopFILESV(PL_curcop);
5130 GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5132 assert(SvPOK(x) || SvGMAGICAL(x));
5133 if (sv_eq(x, copfilesv)) {
5134 sv_setpvn(x, ipath, ipathend - ipath);
5140 const char *bstart = SvPV_const(copfilesv, blen);
5141 const char * const lstart = SvPV_const(x, llen);
5143 bstart += blen - llen;
5144 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5145 sv_setpvn(x, ipath, ipathend - ipath);
5152 /* Anything to do if no copfilesv? */
5154 TAINT_NOT; /* $^X is always tainted, but that's OK */
5156 #endif /* ARG_ZERO_IS_SCRIPT */
5161 d = instr(s,"perl -");
5163 d = instr(s,"perl");
5164 if (d && d[4] == '6')
5167 /* avoid getting into infinite loops when shebang
5168 * line contains "Perl" rather than "perl" */
5170 for (d = ipathend-4; d >= ipath; --d) {
5171 if (isALPHA_FOLD_EQ(*d, 'p')
5172 && !ibcmp(d, "perl", 4))
5182 #ifdef ALTERNATE_SHEBANG
5184 * If the ALTERNATE_SHEBANG on this system starts with a
5185 * character that can be part of a Perl expression, then if
5186 * we see it but not "perl", we're probably looking at the
5187 * start of Perl code, not a request to hand off to some
5188 * other interpreter. Similarly, if "perl" is there, but
5189 * not in the first 'word' of the line, we assume the line
5190 * contains the start of the Perl program.
5192 if (d && *s != '#') {
5193 const char *c = ipath;
5194 while (*c && !strchr("; \t\r\n\f\v#", *c))
5197 d = NULL; /* "perl" not in first word; ignore */
5199 *s = '#'; /* Don't try to parse shebang line */
5201 #endif /* ALTERNATE_SHEBANG */
5206 && !instr(s,"indir")
5207 && instr(PL_origargv[0],"perl"))
5214 while (s < PL_bufend && isSPACE(*s))
5216 if (s < PL_bufend) {
5217 Newx(newargv,PL_origargc+3,char*);
5219 while (s < PL_bufend && !isSPACE(*s))
5222 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5225 newargv = PL_origargv;
5228 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5230 Perl_croak(aTHX_ "Can't exec %s", ipath);
5233 while (*d && !isSPACE(*d))
5235 while (SPACE_OR_TAB(*d))
5239 const bool switches_done = PL_doswitches;
5240 const U32 oldpdb = PL_perldb;
5241 const bool oldn = PL_minus_n;
5242 const bool oldp = PL_minus_p;
5246 bool baduni = FALSE;
5248 const char *d2 = d1 + 1;
5249 if (parse_unicode_opts((const char **)&d2)
5253 if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
5254 const char * const m = d1;
5255 while (*d1 && !isSPACE(*d1))
5257 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5260 d1 = moreswitches(d1);
5262 if (PL_doswitches && !switches_done) {
5263 int argc = PL_origargc;
5264 char **argv = PL_origargv;
5267 } while (argc && argv[0][0] == '-' && argv[0][1]);
5268 init_argv_symbols(argc,argv);
5270 if ( (PERLDB_LINE_OR_SAVESRC && !oldpdb)
5271 || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5272 /* if we have already added "LINE: while (<>) {",
5273 we must not do it again */
5275 SvPVCLEAR(PL_linestr);
5276 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5277 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5278 PL_last_lop = PL_last_uni = NULL;
5279 PL_preambled = FALSE;
5280 if (PERLDB_LINE_OR_SAVESRC)
5281 (void)gv_fetchfile(PL_origfilename);
5288 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5289 PL_lex_state = LEX_FORMLINE;
5290 force_next(FORMRBRACK);
5295 #ifdef PERL_STRICT_CR
5296 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5298 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5300 case ' ': case '\t': case '\f': case '\v':
5305 if (PL_lex_state != LEX_NORMAL
5306 || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
5308 const bool in_comment = *s == '#';
5309 if (*s == '#' && s == PL_linestart && PL_in_eval
5310 && !PL_rsfp && !PL_parser->filtered) {
5311 /* handle eval qq[#line 1 "foo"\n ...] */
5312 CopLINE_dec(PL_curcop);
5316 while (d < PL_bufend && *d != '\n')
5320 else if (d > PL_bufend)
5321 /* Found by Ilya: feed random input to Perl. */
5322 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5325 if (in_comment && d == PL_bufend
5326 && PL_lex_state == LEX_INTERPNORMAL
5327 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5328 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5331 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5332 PL_lex_state = LEX_FORMLINE;
5333 force_next(FORMRBRACK);
5338 while (s < PL_bufend && *s != '\n')
5346 else if (s > PL_bufend)
5347 /* Found by Ilya: feed random input to Perl. */
5348 Perl_croak(aTHX_ "panic: input overflow");
5352 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5360 while (s < PL_bufend && SPACE_OR_TAB(*s))
5363 if (strEQs(s,"=>")) {
5364 s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE);
5365 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5366 OPERATOR('-'); /* unary minus */
5369 case 'r': ftst = OP_FTEREAD; break;
5370 case 'w': ftst = OP_FTEWRITE; break;
5371 case 'x': ftst = OP_FTEEXEC; break;
5372 case 'o': ftst = OP_FTEOWNED; break;
5373 case 'R': ftst = OP_FTRREAD; break;
5374 case 'W': ftst = OP_FTRWRITE; break;
5375 case 'X': ftst = OP_FTREXEC; break;
5376 case 'O': ftst = OP_FTROWNED; break;
5377 case 'e': ftst = OP_FTIS; break;
5378 case 'z': ftst = OP_FTZERO; break;
5379 case 's': ftst = OP_FTSIZE; break;
5380 case 'f': ftst = OP_FTFILE; break;
5381 case 'd': ftst = OP_FTDIR; break;
5382 case 'l': ftst = OP_FTLINK; break;
5383 case 'p': ftst = OP_FTPIPE; break;
5384 case 'S': ftst = OP_FTSOCK; break;
5385 case 'u': ftst = OP_FTSUID; break;
5386 case 'g': ftst = OP_FTSGID; break;
5387 case 'k': ftst = OP_FTSVTX; break;
5388 case 'b': ftst = OP_FTBLK; break;
5389 case 'c': ftst = OP_FTCHR; break;
5390 case 't': ftst = OP_FTTTY; break;
5391 case 'T': ftst = OP_FTTEXT; break;
5392 case 'B': ftst = OP_FTBINARY; break;
5393 case 'M': case 'A': case 'C':
5394 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5396 case 'M': ftst = OP_FTMTIME; break;
5397 case 'A': ftst = OP_FTATIME; break;
5398 case 'C': ftst = OP_FTCTIME; break;
5406 PL_last_uni = PL_oldbufptr;
5407 PL_last_lop_op = (OPCODE)ftst;
5408 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5409 "### Saw file test %c\n", (int)tmp);
5414 /* Assume it was a minus followed by a one-letter named
5415 * subroutine call (or a -bareword), then. */
5416 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5417 "### '-%c' looked like a file test but was not\n",
5424 const char tmp = *s++;
5427 if (PL_expect == XOPERATOR)
5432 else if (*s == '>') {
5435 if (((*s == '$' || *s == '&') && s[1] == '*')
5436 ||(*s == '$' && s[1] == '#' && s[2] == '*')
5437 ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
5438 ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5441 PL_expect = XPOSTDEREF;
5444 if (isIDFIRST_lazy_if(s,UTF)) {
5445 s = force_word(s,METHOD,FALSE,TRUE);
5453 if (PL_expect == XOPERATOR) {
5455 && !PL_lex_allbrackets
5456 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5464 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5466 OPERATOR('-'); /* unary minus */
5472 const char tmp = *s++;
5475 if (PL_expect == XOPERATOR)
5480 if (PL_expect == XOPERATOR) {
5482 && !PL_lex_allbrackets
5483 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5491 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5498 if (PL_expect == XPOSTDEREF) POSTDEREF('*');
5499 if (PL_expect != XOPERATOR) {
5500 s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5501 PL_expect = XOPERATOR;
5502 force_ident(PL_tokenbuf, '*');
5510 if (*s == '=' && !PL_lex_allbrackets
5511 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5519 && !PL_lex_allbrackets
5520 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5525 PL_parser->saw_infix_sigil = 1;
5530 if (PL_expect == XOPERATOR) {
5532 && !PL_lex_allbrackets
5533 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5538 PL_parser->saw_infix_sigil = 1;
5541 else if (PL_expect == XPOSTDEREF) POSTDEREF('%');
5542 PL_tokenbuf[0] = '%';
5543 s = scan_ident(s, PL_tokenbuf + 1,
5544 sizeof PL_tokenbuf - 1, FALSE);
5546 if (!PL_tokenbuf[1]) {
5549 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5551 PL_tokenbuf[0] = '@';
5553 PL_expect = XOPERATOR;
5554 force_ident_maybe_lex('%');
5559 bof = FEATURE_BITWISE_IS_ENABLED;
5560 if (bof && s[1] == '.')
5562 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5563 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5569 BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
5571 if (PL_lex_brackets > 100)
5572 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5573 PL_lex_brackstack[PL_lex_brackets++] = 0;
5574 PL_lex_allbrackets++;
5576 const char tmp = *s++;
5581 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5583 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5586 Perl_ck_warner_d(aTHX_
5587 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5588 "Smartmatch is experimental");
5592 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
5594 BCop(OP_SCOMPLEMENT);
5596 BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
5598 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5605 goto just_a_word_zero_gv;
5611 switch (PL_expect) {
5613 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5615 PL_bufptr = s; /* update in case we back off */
5618 "Use of := for an empty attribute list is not allowed");
5625 PL_expect = XTERMBLOCK;
5629 while (isIDFIRST_lazy_if(s,UTF)) {
5632 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5633 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5634 if (tmp < 0) tmp = -tmp;
5649 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5651 d = scan_str(d,TRUE,TRUE,FALSE,NULL);
5656 Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list");
5658 COPLINE_SET_FROM_MULTI_END;
5661 sv_catsv(sv, PL_lex_stuff);
5662 attrs = op_append_elem(OP_LIST, attrs,
5663 newSVOP(OP_CONST, 0, sv));
5664 SvREFCNT_dec_NN(PL_lex_stuff);
5665 PL_lex_stuff = NULL;
5668 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5670 if (PL_in_my == KEY_our) {
5671 deprecate(":unique");
5674 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5677 /* NOTE: any CV attrs applied here need to be part of
5678 the CVf_BUILTIN_ATTRS define in cv.h! */
5679 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5681 CvLVALUE_on(PL_compcv);
5683 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5685 deprecate(":locked");
5687 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5689 CvMETHOD_on(PL_compcv);
5691 else if (!PL_in_my && len == 5
5692 && strnEQ(SvPVX(sv), "const", len))
5695 Perl_ck_warner_d(aTHX_
5696 packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
5697 ":const is experimental"
5699 CvANONCONST_on(PL_compcv);
5700 if (!CvANON(PL_compcv))
5701 yyerror(":const is not permitted on named "
5704 /* After we've set the flags, it could be argued that
5705 we don't need to do the attributes.pm-based setting
5706 process, and shouldn't bother appending recognized
5707 flags. To experiment with that, uncomment the
5708 following "else". (Note that's already been
5709 uncommented. That keeps the above-applied built-in
5710 attributes from being intercepted (and possibly
5711 rejected) by a package's attribute routines, but is
5712 justified by the performance win for the common case
5713 of applying only built-in attributes.) */
5715 attrs = op_append_elem(OP_LIST, attrs,
5716 newSVOP(OP_CONST, 0,
5720 if (*s == ':' && s[1] != ':')
5723 break; /* require real whitespace or :'s */
5724 /* XXX losing whitespace on sequential attributes here */
5729 && !(PL_expect == XOPERATOR
5730 ? (*s == '=' || *s == ')')
5731 : (*s == '{' || *s == '(')))
5733 const char q = ((*s == '\'') ? '"' : '\'');
5734 /* If here for an expression, and parsed no attrs, back
5736 if (PL_expect == XOPERATOR && !attrs) {
5740 /* MUST advance bufptr here to avoid bogus "at end of line"
5741 context messages from yyerror().
5744 yyerror( (const char *)
5746 ? Perl_form(aTHX_ "Invalid separator character "
5747 "%c%c%c in attribute list", q, *s, q)
5748 : "Unterminated attribute list" ) );
5756 NEXTVAL_NEXTTOKE.opval = attrs;
5762 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5766 PL_lex_allbrackets--;
5770 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5771 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
5775 PL_lex_allbrackets++;
5778 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5785 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5788 PL_lex_allbrackets--;
5794 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5797 if (PL_lex_brackets <= 0)
5798 /* diag_listed_as: Unmatched right %s bracket */
5799 yyerror("Unmatched right square bracket");
5802 PL_lex_allbrackets--;
5803 if (PL_lex_state == LEX_INTERPNORMAL) {
5804 if (PL_lex_brackets == 0) {
5805 if (*s == '-' && s[1] == '>')
5806 PL_lex_state = LEX_INTERPENDMAYBE;
5807 else if (*s != '[' && *s != '{')
5808 PL_lex_state = LEX_INTERPEND;
5815 if (PL_lex_brackets > 100) {
5816 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5818 switch (PL_expect) {
5821 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5822 PL_lex_allbrackets++;
5823 OPERATOR(HASHBRACK);
5825 while (s < PL_bufend && SPACE_OR_TAB(*s))
5828 PL_tokenbuf[0] = '\0';
5829 if (d < PL_bufend && *d == '-') {
5830 PL_tokenbuf[0] = '-';
5832 while (d < PL_bufend && SPACE_OR_TAB(*d))
5835 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5836 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5838 while (d < PL_bufend && SPACE_OR_TAB(*d))
5841 const char minus = (PL_tokenbuf[0] == '-');
5842 s = force_word(s + minus, BAREWORD, FALSE, TRUE);
5850 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5851 PL_lex_allbrackets++;
5856 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5857 PL_lex_allbrackets++;
5861 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5862 PL_lex_allbrackets++;
5867 if (PL_oldoldbufptr == PL_last_lop)
5868 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5870 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5871 PL_lex_allbrackets++;
5874 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5876 /* This hack is to get the ${} in the message. */
5878 yyerror("syntax error");
5881 OPERATOR(HASHBRACK);
5883 if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
5884 /* ${...} or @{...} etc., but not print {...}
5885 * Skip the disambiguation and treat this as a block.
5887 goto block_expectation;
5889 /* This hack serves to disambiguate a pair of curlies
5890 * as being a block or an anon hash. Normally, expectation
5891 * determines that, but in cases where we're not in a
5892 * position to expect anything in particular (like inside
5893 * eval"") we have to resolve the ambiguity. This code
5894 * covers the case where the first term in the curlies is a
5895 * quoted string. Most other cases need to be explicitly
5896 * disambiguated by prepending a "+" before the opening
5897 * curly in order to force resolution as an anon hash.
5899 * XXX should probably propagate the outer expectation
5900 * into eval"" to rely less on this hack, but that could
5901 * potentially break current behavior of eval"".
5905 if (*s == '\'' || *s == '"' || *s == '`') {
5906 /* common case: get past first string, handling escapes */
5907 for (t++; t < PL_bufend && *t != *s;)
5912 else if (*s == 'q') {
5915 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5916 && !isWORDCHAR(*t))))
5918 /* skip q//-like construct */
5920 char open, close, term;
5923 while (t < PL_bufend && isSPACE(*t))
5925 /* check for q => */
5926 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5927 OPERATOR(HASHBRACK);
5931 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5935 for (t++; t < PL_bufend; t++) {
5936 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5938 else if (*t == open)
5942 for (t++; t < PL_bufend; t++) {
5943 if (*t == '\\' && t+1 < PL_bufend)
5945 else if (*t == close && --brackets <= 0)
5947 else if (*t == open)
5954 /* skip plain q word */
5955 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
5956 t += UTF ? UTF8SKIP(t) : 1;
5958 else if (isWORDCHAR_lazy_if(t,UTF)) {
5959 t += UTF ? UTF8SKIP(t) : 1;
5960 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
5961 t += UTF ? UTF8SKIP(t) : 1;
5963 while (t < PL_bufend && isSPACE(*t))
5965 /* if comma follows first term, call it an anon hash */
5966 /* XXX it could be a comma expression with loop modifiers */
5967 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5968 || (*t == '=' && t[1] == '>')))
5969 OPERATOR(HASHBRACK);
5970 if (PL_expect == XREF)
5973 /* If there is an opening brace or 'sub:', treat it
5974 as a term to make ${{...}}{k} and &{sub:attr...}
5975 dwim. Otherwise, treat it as a statement, so
5976 map {no strict; ...} works.
5983 if (strEQs(s, "sub")) {
5994 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6000 pl_yylval.ival = CopLINE(PL_curcop);
6001 PL_copline = NOLINE; /* invalidate current command line number */
6002 TOKEN(formbrack ? '=' : '{');
6004 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6008 if (PL_lex_brackets <= 0)
6009 /* diag_listed_as: Unmatched right %s bracket */
6010 yyerror("Unmatched right curly bracket");
6012 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6013 PL_lex_allbrackets--;
6014 if (PL_lex_state == LEX_INTERPNORMAL) {
6015 if (PL_lex_brackets == 0) {
6016 if (PL_expect & XFAKEBRACK) {
6017 PL_expect &= XENUMMASK;
6018 PL_lex_state = LEX_INTERPEND;
6020 return yylex(); /* ignore fake brackets */
6022 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6023 && SvEVALED(PL_lex_repl))
6024 PL_lex_state = LEX_INTERPEND;
6025 else if (*s == '-' && s[1] == '>')
6026 PL_lex_state = LEX_INTERPENDMAYBE;
6027 else if (*s != '[' && *s != '{')
6028 PL_lex_state = LEX_INTERPEND;
6031 if (PL_expect & XFAKEBRACK) {
6032 PL_expect &= XENUMMASK;
6034 return yylex(); /* ignore fake brackets */
6036 force_next(formbrack ? '.' : '}');
6037 if (formbrack) LEAVE;
6038 if (formbrack == 2) { /* means . where arguments were expected */
6044 if (PL_expect == XPOSTDEREF) POSTDEREF('&');
6047 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6048 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6055 if (PL_expect == XOPERATOR) {
6056 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
6057 && isIDFIRST_lazy_if(s,UTF))
6059 CopLINE_dec(PL_curcop);
6060 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6061 CopLINE_inc(PL_curcop);
6064 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6066 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6067 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6073 PL_parser->saw_infix_sigil = 1;
6074 BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
6080 PL_tokenbuf[0] = '&';
6081 s = scan_ident(s - 1, PL_tokenbuf + 1,
6082 sizeof PL_tokenbuf - 1, TRUE);
6083 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6084 if (PL_tokenbuf[1]) {
6085 force_ident_maybe_lex('&');
6094 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6095 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6103 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6105 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6106 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6110 BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
6114 const char tmp = *s++;
6116 if ((s == PL_linestart+2 || s[-3] == '\n') && strEQs(s, "=====")) {
6117 s = vcs_conflict_marker(s + 5);
6120 if (!PL_lex_allbrackets
6121 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6129 if (!PL_lex_allbrackets
6130 && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
6139 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6140 && strchr("+-*/%.^&|<",tmp))
6141 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6142 "Reversed %c= operator",(int)tmp);
6144 if (PL_expect == XSTATE
6146 && (s == PL_linestart+1 || s[-2] == '\n') )
6148 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6149 || PL_lex_state != LEX_NORMAL) {
6154 if (strEQs(s,"=cut")) {
6168 PL_parser->in_pod = 1;
6172 if (PL_expect == XBLOCK) {
6174 #ifdef PERL_STRICT_CR
6175 while (SPACE_OR_TAB(*t))
6177 while (SPACE_OR_TAB(*t) || *t == '\r')
6180 if (*t == '\n' || *t == '#') {
6183 SAVEI8(PL_parser->form_lex_state);
6184 SAVEI32(PL_lex_formbrack);
6185 PL_parser->form_lex_state = PL_lex_state;
6186 PL_lex_formbrack = PL_lex_brackets + 1;
6190 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6199 const char tmp = *s++;
6201 /* was this !=~ where !~ was meant?
6202 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6204 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6205 const char *t = s+1;
6207 while (t < PL_bufend && isSPACE(*t))
6210 if (*t == '/' || *t == '?'
6211 || ((*t == 'm' || *t == 's' || *t == 'y')
6212 && !isWORDCHAR(t[1]))
6213 || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6214 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6215 "!=~ should be !~");
6217 if (!PL_lex_allbrackets
6218 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6231 if (PL_expect != XOPERATOR) {
6232 if (s[1] != '<' && !strchr(s,'>'))
6234 if (s[1] == '<' && s[2] != '>') {
6235 if ((s == PL_linestart || s[-1] == '\n') && strEQs(s+2, "<<<<<")) {
6236 s = vcs_conflict_marker(s + 7);
6239 s = scan_heredoc(s);
6242 s = scan_inputsymbol(s);
6243 PL_expect = XOPERATOR;
6244 TOKEN(sublex_start());
6250 if ((s == PL_linestart+2 || s[-3] == '\n') && strEQs(s, "<<<<<")) {
6251 s = vcs_conflict_marker(s + 5);
6254 if (*s == '=' && !PL_lex_allbrackets
6255 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6260 SHop(OP_LEFT_SHIFT);
6265 if (!PL_lex_allbrackets
6266 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6274 if (!PL_lex_allbrackets
6275 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6284 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6292 const char tmp = *s++;
6294 if ((s == PL_linestart+2 || s[-3] == '\n') && strEQs(s, ">>>>>")) {
6295 s = vcs_conflict_marker(s + 5);
6298 if (*s == '=' && !PL_lex_allbrackets
6299 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6304 SHop(OP_RIGHT_SHIFT);
6306 else if (tmp == '=') {
6307 if (!PL_lex_allbrackets
6308 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6317 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6326 if (PL_expect == XOPERATOR) {
6327 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6328 return deprecate_commaless_var_list();
6331 else if (PL_expect == XPOSTDEREF) {
6334 POSTDEREF(DOLSHARP);
6339 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6340 PL_tokenbuf[0] = '@';
6341 s = scan_ident(s + 1, PL_tokenbuf + 1,
6342 sizeof PL_tokenbuf - 1, FALSE);
6343 if (PL_expect == XOPERATOR) {
6345 if (PL_bufptr > s) {
6347 PL_bufptr = PL_oldbufptr;
6349 no_op("Array length", d);
6351 if (!PL_tokenbuf[1])
6353 PL_expect = XOPERATOR;
6354 force_ident_maybe_lex('#');
6358 PL_tokenbuf[0] = '$';
6359 s = scan_ident(s, PL_tokenbuf + 1,
6360 sizeof PL_tokenbuf - 1, FALSE);
6361 if (PL_expect == XOPERATOR) {
6363 if (PL_bufptr > s) {
6365 PL_bufptr = PL_oldbufptr;
6369 if (!PL_tokenbuf[1]) {
6371 yyerror("Final $ should be \\$ or $name");
6377 const char tmp = *s;
6378 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6381 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6382 && intuit_more(s)) {
6384 PL_tokenbuf[0] = '@';
6385 if (ckWARN(WARN_SYNTAX)) {
6388 while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
6389 t += UTF ? UTF8SKIP(t) : 1;
6391 PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
6392 while (t < PL_bufend && *t != ']')
6394 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6395 "Multidimensional syntax %" UTF8f " not supported",
6396 UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
6400 else if (*s == '{') {
6402 PL_tokenbuf[0] = '%';
6403 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6404 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6406 char tmpbuf[sizeof PL_tokenbuf];
6409 } while (isSPACE(*t));
6410 if (isIDFIRST_lazy_if(t,UTF)) {
6412 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6417 && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6418 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6419 "You need to quote \"%" UTF8f "\"",
6420 UTF8fARG(UTF, len, tmpbuf));
6426 PL_expect = XOPERATOR;
6427 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6428 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6429 if (!islop || PL_last_lop_op == OP_GREPSTART)
6430 PL_expect = XOPERATOR;
6431 else if (strchr("$@\"'`q", *s))
6432 PL_expect = XTERM; /* e.g. print $fh "foo" */
6433 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6434 PL_expect = XTERM; /* e.g. print $fh &sub */
6435 else if (isIDFIRST_lazy_if(s,UTF)) {
6436 char tmpbuf[sizeof PL_tokenbuf];
6438 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6439 if ((t2 = keyword(tmpbuf, len, 0))) {
6440 /* binary operators exclude handle interpretations */
6452 PL_expect = XTERM; /* e.g. print $fh length() */
6457 PL_expect = XTERM; /* e.g. print $fh subr() */
6460 else if (isDIGIT(*s))
6461 PL_expect = XTERM; /* e.g. print $fh 3 */
6462 else if (*s == '.' && isDIGIT(s[1]))
6463 PL_expect = XTERM; /* e.g. print $fh .3 */
6464 else if ((*s == '?' || *s == '-' || *s == '+')
6465 && !isSPACE(s[1]) && s[1] != '=')
6466 PL_expect = XTERM; /* e.g. print $fh -1 */
6467 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6469 PL_expect = XTERM; /* e.g. print $fh /.../
6470 XXX except DORDOR operator
6472 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6474 PL_expect = XTERM; /* print $fh <<"EOF" */
6477 force_ident_maybe_lex('$');
6481 if (PL_expect == XPOSTDEREF)
6483 PL_tokenbuf[0] = '@';
6484 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6485 if (PL_expect == XOPERATOR) {
6487 if (PL_bufptr > s) {
6489 PL_bufptr = PL_oldbufptr;
6494 if (!PL_tokenbuf[1]) {
6497 if (PL_lex_state == LEX_NORMAL)
6499 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6501 PL_tokenbuf[0] = '%';
6503 /* Warn about @ where they meant $. */
6504 if (*s == '[' || *s == '{') {
6505 if (ckWARN(WARN_SYNTAX)) {
6506 S_check_scalar_slice(aTHX_ s);
6510 PL_expect = XOPERATOR;
6511 force_ident_maybe_lex('@');
6514 case '/': /* may be division, defined-or, or pattern */
6515 if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6516 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6517 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6522 else if (PL_expect == XOPERATOR) {
6524 if (*s == '=' && !PL_lex_allbrackets
6525 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6533 /* Disable warning on "study /blah/" */
6534 if (PL_oldoldbufptr == PL_last_uni
6535 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6536 || memNE(PL_last_uni, "study", 5)
6537 || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
6540 s = scan_pat(s,OP_MATCH);
6541 TERM(sublex_start());
6544 case '?': /* conditional */
6546 if (!PL_lex_allbrackets
6547 && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
6552 PL_lex_allbrackets++;
6556 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6557 #ifdef PERL_STRICT_CR
6560 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6562 && (s == PL_linestart || s[-1] == '\n') )
6565 formbrack = 2; /* dot seen where arguments expected */
6568 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6572 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6575 if (!PL_lex_allbrackets
6576 && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
6584 pl_yylval.ival = OPf_SPECIAL;
6590 if (*s == '=' && !PL_lex_allbrackets
6591 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6599 case '0': case '1': case '2': case '3': case '4':
6600 case '5': case '6': case '7': case '8': case '9':
6601 s = scan_num(s, &pl_yylval);
6602 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6603 if (PL_expect == XOPERATOR)
6608 if ( PL_expect == XOPERATOR
6609 && (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack))
6610 return deprecate_commaless_var_list();
6612 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6615 COPLINE_SET_FROM_MULTI_END;
6616 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6617 if (PL_expect == XOPERATOR) {
6620 pl_yylval.ival = OP_CONST;
6621 TERM(sublex_start());
6624 if ( PL_expect == XOPERATOR
6625 && (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack))
6626 return deprecate_commaless_var_list();
6628 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6631 printbuf("### Saw string before %s\n", s);
6633 PerlIO_printf(Perl_debug_log,
6634 "### Saw unterminated string\n");
6636 if (PL_expect == XOPERATOR) {
6641 pl_yylval.ival = OP_CONST;
6642 /* FIXME. I think that this can be const if char *d is replaced by
6643 more localised variables. */
6644 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6645 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6646 pl_yylval.ival = OP_STRINGIFY;
6650 if (pl_yylval.ival == OP_CONST)
6651 COPLINE_SET_FROM_MULTI_END;
6652 TERM(sublex_start());
6655 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6658 printbuf("### Saw backtick string before %s\n", s);
6660 PerlIO_printf(Perl_debug_log,
6661 "### Saw unterminated backtick string\n");
6663 if (PL_expect == XOPERATOR)
6664 no_op("Backticks",s);
6667 pl_yylval.ival = OP_BACKTICK;
6668 TERM(sublex_start());
6672 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6674 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6676 if (PL_expect == XOPERATOR)
6677 no_op("Backslash",s);
6681 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6682 char *start = s + 2;
6683 while (isDIGIT(*start) || *start == '_')
6685 if (*start == '.' && isDIGIT(start[1])) {
6686 s = scan_num(s, &pl_yylval);
6689 else if ((*start == ':' && start[1] == ':')
6690 || (PL_expect == XSTATE && *start == ':'))
6692 else if (PL_expect == XSTATE) {
6694 while (d < PL_bufend && isSPACE(*d)) d++;
6695 if (*d == ':') goto keylookup;
6697 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6698 if (!isALPHA(*start) && (PL_expect == XTERM
6699 || PL_expect == XREF || PL_expect == XSTATE
6700 || PL_expect == XTERMORDORDOR)) {
6701 GV *const gv = gv_fetchpvn_flags(s, start - s,
6702 UTF ? SVf_UTF8 : 0, SVt_PVCV);
6704 s = scan_num(s, &pl_yylval);
6711 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6764 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6766 /* Some keywords can be followed by any delimiter, including ':' */
6767 anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
6769 /* x::* is just a word, unless x is "CORE" */
6770 if (!anydelim && *s == ':' && s[1] == ':') {
6771 if (strEQ(PL_tokenbuf, "CORE")) goto case_KEY_CORE;
6776 while (d < PL_bufend && isSPACE(*d))
6777 d++; /* no comments skipped here, or s### is misparsed */
6779 /* Is this a word before a => operator? */
6780 if (*d == '=' && d[1] == '>') {
6784 = newSVOP(OP_CONST, 0,
6785 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6786 pl_yylval.opval->op_private = OPpCONST_BARE;
6790 /* Check for plugged-in keyword */
6794 char *saved_bufptr = PL_bufptr;
6796 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6798 if (result == KEYWORD_PLUGIN_DECLINE) {
6799 /* not a plugged-in keyword */
6800 PL_bufptr = saved_bufptr;
6801 } else if (result == KEYWORD_PLUGIN_STMT) {
6802 pl_yylval.opval = o;
6804 if (!PL_nexttoke) PL_expect = XSTATE;
6805 return REPORT(PLUGSTMT);
6806 } else if (result == KEYWORD_PLUGIN_EXPR) {
6807 pl_yylval.opval = o;
6809 if (!PL_nexttoke) PL_expect = XOPERATOR;
6810 return REPORT(PLUGEXPR);
6812 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6817 /* Check for built-in keyword */
6818 tmp = keyword(PL_tokenbuf, len, 0);
6820 /* Is this a label? */
6821 if (!anydelim && PL_expect == XSTATE
6822 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6824 pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
6825 pl_yylval.pval[len] = '\0';
6826 pl_yylval.pval[len+1] = UTF ? 1 : 0;
6831 /* Check for lexical sub */
6832 if (PL_expect != XOPERATOR) {
6833 char tmpbuf[sizeof PL_tokenbuf + 1];
6835 Copy(PL_tokenbuf, tmpbuf+1, len, char);
6836 off = pad_findmy_pvn(tmpbuf, len+1, 0);
6837 if (off != NOT_IN_PAD) {
6838 assert(off); /* we assume this is boolean-true below */
6839 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
6840 HV * const stash = PAD_COMPNAME_OURSTASH(off);
6841 HEK * const stashname = HvNAME_HEK(stash);
6842 sv = newSVhek(stashname);
6843 sv_catpvs(sv, "::");
6844 sv_catpvn_flags(sv, PL_tokenbuf, len,
6845 (UTF ? SV_CATUTF8 : SV_CATBYTES));
6846 gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
6856 rv2cv_op = newOP(OP_PADANY, 0);
6857 rv2cv_op->op_targ = off;
6858 cv = find_lexical_cv(off);
6866 if (tmp < 0) { /* second-class keyword? */
6867 GV *ogv = NULL; /* override (winner) */
6868 GV *hgv = NULL; /* hidden (loser) */
6869 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6871 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6872 (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
6874 && (cv = GvCVu(gv)))
6876 if (GvIMPORTED_CV(gv))
6878 else if (! CvMETHOD(cv))
6882 && (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
6885 && (isGV_with_GP(gv)
6886 ? GvCVu(gv) && GvIMPORTED_CV(gv)
6887 : SvPCS_IMPORTED(gv)
6888 && (gv_init(gv, PL_globalstash, PL_tokenbuf,
6896 tmp = 0; /* overridden by import or by GLOBAL */
6899 && -tmp==KEY_lock /* XXX generalizable kludge */
6902 tmp = 0; /* any sub overrides "weak" keyword */
6904 else { /* no override */
6906 if (tmp == KEY_dump) {
6907 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6908 "dump() better written as CORE::dump()");
6912 if (hgv && tmp != KEY_x) /* never ambiguous */
6913 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6914 "Ambiguous call resolved as CORE::%s(), "
6915 "qualify as such or use &",
6920 if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
6921 && (!anydelim || *s != '#')) {
6922 /* no override, and not s### either; skipspace is safe here
6923 * check for => on following line */
6925 STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
6926 STRLEN soff = s - SvPVX(PL_linestr);
6928 arrow = *s == '=' && s[1] == '>';
6929 PL_bufptr = SvPVX(PL_linestr) + bufoff;
6930 s = SvPVX(PL_linestr) + soff;
6938 /* Trade off - by using this evil construction we can pull the
6939 variable gv into the block labelled keylookup. If not, then
6940 we have to give it function scope so that the goto from the
6941 earlier ':' case doesn't bypass the initialisation. */
6942 just_a_word_zero_gv:
6951 default: /* not a keyword */
6954 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6958 /* Get the rest if it looks like a package qualifier */
6960 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6962 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6965 Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
6966 UTF8fARG(UTF, len, PL_tokenbuf),
6967 *s == '\'' ? "'" : "::");
6972 if (PL_expect == XOPERATOR) {
6973 if (PL_bufptr == PL_linestart) {
6974 CopLINE_dec(PL_curcop);
6975 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6976 CopLINE_inc(PL_curcop);
6979 no_op("Bareword",s);
6982 /* See if the name is "Foo::",
6983 in which case Foo is a bareword
6984 (and a package name). */
6987 && PL_tokenbuf[len - 2] == ':'
6988 && PL_tokenbuf[len - 1] == ':')
6990 if (ckWARN(WARN_BAREWORD)
6991 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
6992 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6993 "Bareword \"%" UTF8f
6994 "\" refers to nonexistent package",
6995 UTF8fARG(UTF, len, PL_tokenbuf));
6997 PL_tokenbuf[len] = '\0';
7006 /* if we saw a global override before, get the right name */
7009 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
7012 SV * const tmp_sv = sv;
7013 sv = newSVpvs("CORE::GLOBAL::");
7014 sv_catsv(sv, tmp_sv);
7015 SvREFCNT_dec(tmp_sv);
7019 /* Presume this is going to be a bareword of some sort. */
7021 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
7022 pl_yylval.opval->op_private = OPpCONST_BARE;
7024 /* And if "Foo::", then that's what it certainly is. */
7030 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
7031 const_op->op_private = OPpCONST_BARE;
7033 newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
7037 : SvROK(gv) && SvTYPE(SvRV(gv)) == SVt_PVCV
7040 : rv2cv_op_cv(rv2cv_op, RV2CVOPCV_RETURN_STUB);
7043 /* Use this var to track whether intuit_method has been
7044 called. intuit_method returns 0 or > 255. */
7047 /* See if it's the indirect object for a list operator. */
7050 && PL_oldoldbufptr < PL_bufptr
7051 && (PL_oldoldbufptr == PL_last_lop
7052 || PL_oldoldbufptr == PL_last_uni)
7053 && /* NO SKIPSPACE BEFORE HERE! */
7055 || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
7058 bool immediate_paren = *s == '(';
7060 /* (Now we can afford to cross potential line boundary.) */
7063 /* Two barewords in a row may indicate method call. */
7065 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$')
7066 && (tmp = intuit_method(s, lex ? NULL : sv, cv)))
7071 /* If not a declared subroutine, it's an indirect object. */
7072 /* (But it's an indir obj regardless for sort.) */
7073 /* Also, if "_" follows a filetest operator, it's a bareword */
7076 ( !immediate_paren && (PL_last_lop_op == OP_SORT
7078 && (PL_last_lop_op != OP_MAPSTART
7079 && PL_last_lop_op != OP_GREPSTART))))
7080 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7081 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
7085 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7090 PL_expect = XOPERATOR;
7093 /* Is this a word before a => operator? */
7094 if (*s == '=' && s[1] == '>' && !pkgname) {
7097 if (gvp || (lex && !off)) {
7098 assert (cSVOPx(pl_yylval.opval)->op_sv == sv);
7099 /* This is our own scalar, created a few lines
7100 above, so this is safe. */
7102 sv_setpv(sv, PL_tokenbuf);
7103 if (UTF && !IN_BYTES
7104 && is_utf8_string((U8*)PL_tokenbuf, len))
7111 /* If followed by a paren, it's certainly a subroutine. */
7116 while (SPACE_OR_TAB(*d))
7118 if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
7123 NEXTVAL_NEXTTOKE.opval =
7124 off ? rv2cv_op : pl_yylval.opval;
7126 op_free(pl_yylval.opval), force_next(PRIVATEREF);
7127 else op_free(rv2cv_op), force_next(BAREWORD);
7132 /* If followed by var or block, call it a method (unless sub) */
7134 if ((*s == '$' || *s == '{') && !cv) {
7136 PL_last_lop = PL_oldbufptr;
7137 PL_last_lop_op = OP_METHOD;
7138 if (!PL_lex_allbrackets
7139 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7141 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7143 PL_expect = XBLOCKTERM;
7145 return REPORT(METHOD);
7148 /* If followed by a bareword, see if it looks like indir obj. */
7150 if (tmp == 1 && !orig_keyword
7151 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
7152 && (tmp = intuit_method(s, lex ? NULL : sv, cv))) {
7155 assert(cSVOPx(pl_yylval.opval)->op_sv == sv);
7157 sv_setpvn(sv, PL_tokenbuf, len);
7158 if (UTF && !IN_BYTES
7159 && is_utf8_string((U8*)PL_tokenbuf, len))
7161 else SvUTF8_off(sv);
7164 if (tmp == METHOD && !PL_lex_allbrackets
7165 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7167 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7172 /* Not a method, so call it a subroutine (if defined) */
7175 /* Check for a constant sub */
7176 if ((sv = cv_const_sv_or_av(cv))) {
7179 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7180 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7181 if (SvTYPE(sv) == SVt_PVAV)
7182 pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7185 pl_yylval.opval->op_private = 0;
7186 pl_yylval.opval->op_folded = 1;
7187 pl_yylval.opval->op_flags |= OPf_SPECIAL;
7192 op_free(pl_yylval.opval);
7194 off ? newCVREF(0, rv2cv_op) : rv2cv_op;
7195 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7196 PL_last_lop = PL_oldbufptr;
7197 PL_last_lop_op = OP_ENTERSUB;
7198 /* Is there a prototype? */
7202 STRLEN protolen = CvPROTOLEN(cv);
7203 const char *proto = CvPROTO(cv);
7205 proto = S_strip_spaces(aTHX_ proto, &protolen);
7208 if ((optional = *proto == ';'))
7211 while (*proto == ';');
7215 *proto == '$' || *proto == '_'
7216 || *proto == '*' || *proto == '+'
7221 *proto == '\\' && proto[1] && proto[2] == '\0'
7224 UNIPROTO(UNIOPSUB,optional);
7225 if (*proto == '\\' && proto[1] == '[') {
7226 const char *p = proto + 2;
7227 while(*p && *p != ']')
7229 if(*p == ']' && !p[1])
7230 UNIPROTO(UNIOPSUB,optional);
7232 if (*proto == '&' && *s == '{') {
7234 sv_setpvs(PL_subname, "__ANON__");
7236 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7237 if (!PL_lex_allbrackets
7238 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7240 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7245 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7247 force_next(off ? PRIVATEREF : BAREWORD);
7248 if (!PL_lex_allbrackets
7249 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7251 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7256 /* Call it a bare word */
7258 if (PL_hints & HINT_STRICT_SUBS)
7259 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7262 /* after "print" and similar functions (corresponding to
7263 * "F? L" in opcode.pl), whatever wasn't already parsed as
7264 * a filehandle should be subject to "strict subs".
7265 * Likewise for the optional indirect-object argument to system
7266 * or exec, which can't be a bareword */
7267 if ((PL_last_lop_op == OP_PRINT
7268 || PL_last_lop_op == OP_PRTF
7269 || PL_last_lop_op == OP_SAY
7270 || PL_last_lop_op == OP_SYSTEM
7271 || PL_last_lop_op == OP_EXEC)
7272 && (PL_hints & HINT_STRICT_SUBS))
7273 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7274 if (lastchar != '-') {
7275 if (ckWARN(WARN_RESERVED)) {
7279 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7281 /* PL_warn_reserved is constant */
7282 GCC_DIAG_IGNORE(-Wformat-nonliteral);
7283 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7293 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7294 && saw_infix_sigil) {
7295 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7296 "Operator or semicolon missing before %c%" UTF8f,
7298 UTF8fARG(UTF, strlen(PL_tokenbuf),
7300 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7301 "Ambiguous use of %c resolved as operator %c",
7302 lastchar, lastchar);
7309 newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7314 newSVOP(OP_CONST, 0,
7315 Perl_newSVpvf(aTHX_ "%" IVdf, (IV)CopLINE(PL_curcop)))
7318 case KEY___PACKAGE__:
7320 newSVOP(OP_CONST, 0,
7322 ? newSVhek(HvNAME_HEK(PL_curstash))
7329 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7330 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
7333 gv = (GV *)*hv_fetchs(stash, "DATA", 1);
7335 gv_init(gv,stash,"DATA",4,0);
7338 GvIOp(gv) = newIO();
7339 IoIFP(GvIOp(gv)) = PL_rsfp;
7340 #if defined(HAS_FCNTL) && defined(F_SETFD) && defined(FD_CLOEXEC)
7342 const int fd = PerlIO_fileno(PL_rsfp);
7344 fcntl(fd,F_SETFD, FD_CLOEXEC);
7348 /* Mark this internal pseudo-handle as clean */
7349 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7350 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7351 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7353 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7354 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7355 /* if the script was opened in binmode, we need to revert
7356 * it to text mode for compatibility; but only iff it has CRs
7357 * XXX this is a questionable hack at best. */
7358 if (PL_bufend-PL_bufptr > 2
7359 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7362 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7363 loc = PerlIO_tell(PL_rsfp);
7364 (void)PerlIO_seek(PL_rsfp, 0L, 0);
7367 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7369 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7370 #endif /* NETWARE */
7372 PerlIO_seek(PL_rsfp, loc, 0);
7376 #ifdef PERLIO_LAYERS
7379 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7388 FUN0OP(CvCLONE(PL_compcv)
7389 ? newOP(OP_RUNCV, 0)
7390 : newPVOP(OP_RUNCV,0,NULL));
7399 if (PL_expect == XSTATE) {
7410 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7411 if ((*s == ':' && s[1] == ':')
7412 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7416 Copy(PL_bufptr, PL_tokenbuf, olen, char);
7420 Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword",
7421 UTF8fARG(UTF, len, PL_tokenbuf));
7424 else if (tmp == KEY_require || tmp == KEY_do
7426 /* that's a way to remember we saw "CORE::" */
7438 LOP(OP_ACCEPT,XTERM);
7441 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7446 LOP(OP_ATAN2,XTERM);
7452 LOP(OP_BINMODE,XTERM);
7455 LOP(OP_BLESS,XTERM);
7464 /* We have to disambiguate the two senses of
7465 "continue". If the next token is a '{' then
7466 treat it as the start of a continue block;
7467 otherwise treat it as a control operator.
7477 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7487 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7496 if (!PL_cryptseen) {
7497 PL_cryptseen = TRUE;
7501 LOP(OP_CRYPT,XTERM);
7504 LOP(OP_CHMOD,XTERM);
7507 LOP(OP_CHOWN,XTERM);
7510 LOP(OP_CONNECT,XTERM);
7530 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7532 if (len && (len != 4 || strNE(PL_tokenbuf+1, "CORE"))
7533 && !keyword(PL_tokenbuf + 1, len, 0)) {
7534 SSize_t off = s-SvPVX(PL_linestr);
7536 s = SvPVX(PL_linestr)+off;
7538 force_ident_maybe_lex('&');
7543 if (orig_keyword == KEY_do) {
7552 PL_hints |= HINT_BLOCK_SCOPE;
7562 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7563 STR_WITH_LEN("NDBM_File::"),
7564 STR_WITH_LEN("DB_File::"),
7565 STR_WITH_LEN("GDBM_File::"),
7566 STR_WITH_LEN("SDBM_File::"),
7567 STR_WITH_LEN("ODBM_File::"),
7569 LOP(OP_DBMOPEN,XTERM);
7581 pl_yylval.ival = CopLINE(PL_curcop);
7585 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7597 if (*s == '{') { /* block eval */
7598 PL_expect = XTERMBLOCK;
7599 UNIBRACK(OP_ENTERTRY);
7601 else { /* string eval */
7603 UNIBRACK(OP_ENTEREVAL);
7608 UNIBRACK(-OP_ENTEREVAL);
7622 case KEY_endhostent:
7628 case KEY_endservent:
7631 case KEY_endprotoent:
7642 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7644 pl_yylval.ival = CopLINE(PL_curcop);
7646 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7649 if ((PL_bufend - p) >= 3
7650 && strEQs(p, "my") && isSPACE(*(p + 2)))
7654 else if ((PL_bufend - p) >= 4
7655 && strEQs(p, "our") && isSPACE(*(p + 3)))
7658 /* skip optional package name, as in "for my abc $x (..)" */
7659 if (isIDFIRST_lazy_if(p,UTF)) {
7660 p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7663 if (*p != '$' && *p != '\\')
7664 Perl_croak(aTHX_ "Missing $ on loop variable");
7669 LOP(OP_FORMLINE,XTERM);
7678 LOP(OP_FCNTL,XTERM);
7684 LOP(OP_FLOCK,XTERM);
7687 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7692 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7697 LOP(OP_GREPSTART, XREF);
7714 case KEY_getpriority:
7715 LOP(OP_GETPRIORITY,XTERM);
7717 case KEY_getprotobyname:
7720 case KEY_getprotobynumber:
7721 LOP(OP_GPBYNUMBER,XTERM);
7723 case KEY_getprotoent:
7735 case KEY_getpeername:
7736 UNI(OP_GETPEERNAME);
7738 case KEY_gethostbyname:
7741 case KEY_gethostbyaddr:
7742 LOP(OP_GHBYADDR,XTERM);
7744 case KEY_gethostent:
7747 case KEY_getnetbyname:
7750 case KEY_getnetbyaddr:
7751 LOP(OP_GNBYADDR,XTERM);
7756 case KEY_getservbyname:
7757 LOP(OP_GSBYNAME,XTERM);
7759 case KEY_getservbyport:
7760 LOP(OP_GSBYPORT,XTERM);
7762 case KEY_getservent:
7765 case KEY_getsockname:
7766 UNI(OP_GETSOCKNAME);
7768 case KEY_getsockopt:
7769 LOP(OP_GSOCKOPT,XTERM);
7784 pl_yylval.ival = CopLINE(PL_curcop);
7785 Perl_ck_warner_d(aTHX_
7786 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
7787 "given is experimental");
7792 orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB,
7800 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7802 pl_yylval.ival = CopLINE(PL_curcop);
7806 LOP(OP_INDEX,XTERM);
7812 LOP(OP_IOCTL,XTERM);
7839 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7844 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7858 LOP(OP_LISTEN,XTERM);
7867 s = scan_pat(s,OP_MATCH);
7868 TERM(sublex_start());
7871 LOP(OP_MAPSTART, XREF);
7874 LOP(OP_MKDIR,XTERM);
7877 LOP(OP_MSGCTL,XTERM);
7880 LOP(OP_MSGGET,XTERM);
7883 LOP(OP_MSGRCV,XTERM);
7886 LOP(OP_MSGSND,XTERM);
7893 yyerror(Perl_form(aTHX_
7894 "Can't redeclare \"%s\" in \"%s\"",
7895 tmp == KEY_my ? "my" :
7896 tmp == KEY_state ? "state" : "our",
7897 PL_in_my == KEY_my ? "my" :
7898 PL_in_my == KEY_state ? "state" : "our"));
7900 PL_in_my = (U16)tmp;
7902 if (isIDFIRST_lazy_if(s,UTF)) {
7903 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7904 if (len == 3 && strEQs(PL_tokenbuf, "sub"))
7906 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7907 if (!PL_in_my_stash) {
7911 len = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7912 PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(tmpbuf));
7913 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7916 else if (*s == '\\') {
7917 if (!FEATURE_MYREF_IS_ENABLED)
7918 Perl_croak(aTHX_ "The experimental declared_refs "
7919 "feature is not enabled");
7920 Perl_ck_warner_d(aTHX_
7921 packWARN(WARN_EXPERIMENTAL__DECLARED_REFS),
7922 "Declaring references is experimental");
7930 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7935 s = tokenize_use(0, s);
7939 if (*s == '(' || (s = skipspace(s), *s == '('))
7942 if (!PL_lex_allbrackets
7943 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7945 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7952 if (isIDFIRST_lazy_if(s,UTF)) {
7954 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
7956 for (t=d; isSPACE(*t);)
7958 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7960 && !(t[0] == '=' && t[1] == '>')
7961 && !(t[0] == ':' && t[1] == ':')
7962 && !keyword(s, d-s, 0)
7964 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7965 "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")",
7966 UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
7972 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7974 pl_yylval.ival = OP_OR;
7984 LOP(OP_OPEN_DIR,XTERM);
7987 checkcomma(s,PL_tokenbuf,"filehandle");
7991 checkcomma(s,PL_tokenbuf,"filehandle");
8010 s = force_word(s,BAREWORD,FALSE,TRUE);
8012 s = force_strict_version(s);
8016 LOP(OP_PIPE_OP,XTERM);
8019 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8022 COPLINE_SET_FROM_MULTI_END;
8023 pl_yylval.ival = OP_CONST;
8024 TERM(sublex_start());
8031 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8034 COPLINE_SET_FROM_MULTI_END;
8035 PL_expect = XOPERATOR;
8036 if (SvCUR(PL_lex_stuff)) {
8037 int warned_comma = !ckWARN(WARN_QW);
8038 int warned_comment = warned_comma;
8039 d = SvPV_force(PL_lex_stuff, len);
8041 for (; isSPACE(*d) && len; --len, ++d)
8046 if (!warned_comma || !warned_comment) {
8047 for (; !isSPACE(*d) && len; --len, ++d) {
8048 if (!warned_comma && *d == ',') {
8049 Perl_warner(aTHX_ packWARN(WARN_QW),
8050 "Possible attempt to separate words with commas");
8053 else if (!warned_comment && *d == '#') {
8054 Perl_warner(aTHX_ packWARN(WARN_QW),
8055 "Possible attempt to put comments in qw() list");
8061 for (; !isSPACE(*d) && len; --len, ++d)
8064 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8065 words = op_append_elem(OP_LIST, words,
8066 newSVOP(OP_CONST, 0, tokeq(sv)));
8071 words = newNULLLIST();
8072 SvREFCNT_dec_NN(PL_lex_stuff);
8073 PL_lex_stuff = NULL;
8074 PL_expect = XOPERATOR;
8075 pl_yylval.opval = sawparens(words);
8080 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8083 pl_yylval.ival = OP_STRINGIFY;
8084 if (SvIVX(PL_lex_stuff) == '\'')
8085 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
8086 TERM(sublex_start());
8089 s = scan_pat(s,OP_QR);
8090 TERM(sublex_start());
8093 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8096 pl_yylval.ival = OP_BACKTICK;
8097 TERM(sublex_start());
8105 s = force_version(s, FALSE);
8107 else if (*s != 'v' || !isDIGIT(s[1])
8108 || (s = force_version(s, TRUE), *s == 'v'))
8110 *PL_tokenbuf = '\0';
8111 s = force_word(s,BAREWORD,TRUE,TRUE);
8112 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
8113 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8114 GV_ADD | (UTF ? SVf_UTF8 : 0));
8116 yyerror("<> at require-statement should be quotes");
8118 if (orig_keyword == KEY_require) {
8124 PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
8126 PL_last_uni = PL_oldbufptr;
8127 PL_last_lop_op = OP_REQUIRE;
8129 return REPORT( (int)REQUIRE );
8138 LOP(OP_RENAME,XTERM);
8147 LOP(OP_RINDEX,XTERM);
8156 UNIDOR(OP_READLINE);
8159 UNIDOR(OP_BACKTICK);
8168 LOP(OP_REVERSE,XTERM);
8171 UNIDOR(OP_READLINK);
8178 if (pl_yylval.opval)
8179 TERM(sublex_start());
8181 TOKEN(1); /* force error */
8184 checkcomma(s,PL_tokenbuf,"filehandle");
8194 LOP(OP_SELECT,XTERM);
8200 LOP(OP_SEMCTL,XTERM);
8203 LOP(OP_SEMGET,XTERM);
8206 LOP(OP_SEMOP,XTERM);
8212 LOP(OP_SETPGRP,XTERM);
8214 case KEY_setpriority:
8215 LOP(OP_SETPRIORITY,XTERM);
8217 case KEY_sethostent:
8223 case KEY_setservent:
8226 case KEY_setprotoent:
8236 LOP(OP_SEEKDIR,XTERM);
8238 case KEY_setsockopt:
8239 LOP(OP_SSOCKOPT,XTERM);
8245 LOP(OP_SHMCTL,XTERM);
8248 LOP(OP_SHMGET,XTERM);
8251 LOP(OP_SHMREAD,XTERM);
8254 LOP(OP_SHMWRITE,XTERM);
8257 LOP(OP_SHUTDOWN,XTERM);
8266 LOP(OP_SOCKET,XTERM);
8268 case KEY_socketpair:
8269 LOP(OP_SOCKPAIR,XTERM);
8272 checkcomma(s,PL_tokenbuf,"subroutine name");
8275 s = force_word(s,BAREWORD,TRUE,TRUE);
8279 LOP(OP_SPLIT,XTERM);
8282 LOP(OP_SPRINTF,XTERM);
8285 LOP(OP_SPLICE,XTERM);
8300 LOP(OP_SUBSTR,XTERM);
8306 char * const tmpbuf = PL_tokenbuf + 1;
8307 expectation attrful;
8308 bool have_name, have_proto;
8309 const int key = tmp;
8310 SV *format_name = NULL;
8312 SSize_t off = s-SvPVX(PL_linestr);
8314 d = SvPVX(PL_linestr)+off;
8316 if (isIDFIRST_lazy_if(s,UTF)
8318 || (*s == ':' && s[1] == ':'))
8322 attrful = XATTRBLOCK;
8323 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8325 if (key == KEY_format)
8326 format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
8328 if (memchr(tmpbuf, ':', len) || key != KEY_sub
8330 PL_tokenbuf, len + 1, 0
8332 sv_setpvn(PL_subname, tmpbuf, len);
8334 sv_setsv(PL_subname,PL_curstname);
8335 sv_catpvs(PL_subname,"::");
8336 sv_catpvn(PL_subname,tmpbuf,len);
8338 if (SvUTF8(PL_linestr))
8339 SvUTF8_on(PL_subname);
8346 if (key == KEY_my || key == KEY_our || key==KEY_state)
8349 /* diag_listed_as: Missing name in "%s sub" */
8351 "Missing name in \"%s\"", PL_bufptr);
8353 PL_expect = XTERMBLOCK;
8354 attrful = XATTRTERM;
8355 sv_setpvs(PL_subname,"?");
8359 if (key == KEY_format) {
8361 NEXTVAL_NEXTTOKE.opval
8362 = newSVOP(OP_CONST,0, format_name);
8363 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
8364 force_next(BAREWORD);
8369 /* Look for a prototype */
8370 if (*s == '(' && !FEATURE_SIGNATURES_IS_ENABLED) {
8371 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8372 COPLINE_SET_FROM_MULTI_END;
8374 Perl_croak(aTHX_ "Prototype not terminated");
8375 (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
8383 if (*s == ':' && s[1] != ':')
8384 PL_expect = attrful;
8385 else if ((*s != '{' && *s != '(') && key != KEY_format) {
8386 assert(key == KEY_sub || key == KEY_AUTOLOAD ||
8387 key == KEY_DESTROY || key == KEY_BEGIN ||
8388 key == KEY_UNITCHECK || key == KEY_CHECK ||
8389 key == KEY_INIT || key == KEY_END ||
8390 key == KEY_my || key == KEY_state ||
8393 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8394 else if (*s != ';' && *s != '}')
8395 Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname));
8399 NEXTVAL_NEXTTOKE.opval =
8400 newSVOP(OP_CONST, 0, PL_lex_stuff);
8401 PL_lex_stuff = NULL;
8406 sv_setpvs(PL_subname, "__ANON__");
8408 sv_setpvs(PL_subname, "__ANON__::__ANON__");
8411 force_ident_maybe_lex('&');
8416 LOP(OP_SYSTEM,XREF);
8419 LOP(OP_SYMLINK,XTERM);
8422 LOP(OP_SYSCALL,XTERM);
8425 LOP(OP_SYSOPEN,XTERM);
8428 LOP(OP_SYSSEEK,XTERM);
8431 LOP(OP_SYSREAD,XTERM);
8434 LOP(OP_SYSWRITE,XTERM);
8439 TERM(sublex_start());
8460 LOP(OP_TRUNCATE,XTERM);
8472 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8474 pl_yylval.ival = CopLINE(PL_curcop);
8478 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8480 pl_yylval.ival = CopLINE(PL_curcop);
8484 LOP(OP_UNLINK,XTERM);
8490 LOP(OP_UNPACK,XTERM);
8493 LOP(OP_UTIME,XTERM);
8499 LOP(OP_UNSHIFT,XTERM);
8502 s = tokenize_use(1, s);
8512 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8514 pl_yylval.ival = CopLINE(PL_curcop);
8515 Perl_ck_warner_d(aTHX_
8516 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8517 "when is experimental");
8521 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8523 pl_yylval.ival = CopLINE(PL_curcop);
8527 PL_hints |= HINT_BLOCK_SCOPE;
8534 LOP(OP_WAITPID,XTERM);
8540 /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8541 * we use the same number on EBCDIC */
8542 gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8546 if (PL_expect == XOPERATOR) {
8547 if (*s == '=' && !PL_lex_allbrackets
8548 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8558 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8560 pl_yylval.ival = OP_XOR;
8569 Looks up an identifier in the pad or in a package
8571 is_sig indicates that this is a subroutine signature variable
8572 rather than a plain pad var.
8575 PRIVATEREF if this is a lexical name.
8576 BAREWORD if this belongs to a package.
8579 if we're in a my declaration
8580 croak if they tried to say my($foo::bar)
8581 build the ops for a my() declaration
8582 if it's an access to a my() variable
8583 build ops for access to a my() variable
8584 if in a dq string, and they've said @foo and we can't find @foo
8586 build ops for a bareword
8590 S_pending_ident(pTHX)
8593 const char pit = (char)pl_yylval.ival;
8594 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8595 /* All routes through this function want to know if there is a colon. */
8596 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8598 DEBUG_T({ PerlIO_printf(Perl_debug_log,
8599 "### Pending identifier '%s'\n", PL_tokenbuf); });
8601 /* if we're in a my(), we can't allow dynamics here.
8602 $foo'bar has already been turned into $foo::bar, so
8603 just check for colons.
8605 if it's a legal name, the OP is a PADANY.
8608 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
8610 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8611 "variable %s in \"our\"",
8612 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
8613 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8618 /* "my" variable %s can't be in a package */
8619 /* PL_no_myglob is constant */
8620 GCC_DIAG_IGNORE(-Wformat-nonliteral);
8621 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8622 PL_in_my == KEY_my ? "my" : "state",
8623 *PL_tokenbuf == '&' ? "subroutin" : "variabl",
8625 UTF ? SVf_UTF8 : 0);
8629 if (PL_in_my == KEY_sigvar) {
8630 /* A signature 'padop' needs in addition, an op_first to
8631 * point to a child sigdefelem, and an extra field to hold
8632 * the signature index. We can achieve both by using an
8633 * UNOP_AUX and (ab)using the op_aux field to hold the
8634 * index. If we ever need more fields, use a real malloced
8635 * aux strut instead.
8637 o = newUNOP_AUX(OP_ARGELEM, 0, NULL,
8638 INT2PTR(UNOP_AUX_item *,
8639 (PL_parser->sig_elems)));
8640 o->op_private |= ( PL_tokenbuf[0] == '$' ? OPpARGELEM_SV
8641 : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV
8645 o = newOP(OP_PADANY, 0);
8646 o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8647 UTF ? SVf_UTF8 : 0);
8648 if (PL_in_my == KEY_sigvar)
8651 pl_yylval.opval = o;
8657 build the ops for accesses to a my() variable.
8662 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8664 if (tmp != NOT_IN_PAD) {
8665 /* might be an "our" variable" */
8666 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8667 /* build ops for a bareword */
8668 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8669 HEK * const stashname = HvNAME_HEK(stash);
8670 SV * const sym = newSVhek(stashname);
8671 sv_catpvs(sym, "::");
8672 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
8673 pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
8674 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8678 ((PL_tokenbuf[0] == '$') ? SVt_PV
8679 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8684 pl_yylval.opval = newOP(OP_PADANY, 0);
8685 pl_yylval.opval->op_targ = tmp;
8691 Whine if they've said @foo or @foo{key} in a doublequoted string,
8692 and @foo (or %foo) isn't a variable we can find in the symbol
8695 if (ckWARN(WARN_AMBIGUOUS)
8697 && PL_lex_state != LEX_NORMAL
8698 && !PL_lex_brackets)
8700 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8701 ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
8703 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8706 /* Downgraded from fatal to warning 20000522 mjd */
8707 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8708 "Possible unintended interpolation of %" UTF8f
8710 UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
8714 /* build ops for a bareword */
8715 pl_yylval.opval = newSVOP(OP_CONST, 0,
8716 newSVpvn_flags(PL_tokenbuf + 1,
8718 UTF ? SVf_UTF8 : 0 ));
8719 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8721 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8722 (PL_in_eval ? GV_ADDMULTI : GV_ADD)
8723 | ( UTF ? SVf_UTF8 : 0 ),
8724 ((PL_tokenbuf[0] == '$') ? SVt_PV
8725 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8731 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8733 PERL_ARGS_ASSERT_CHECKCOMMA;
8735 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
8736 if (ckWARN(WARN_SYNTAX)) {
8739 for (w = s+2; *w && level; w++) {
8747 /* the list of chars below is for end of statements or
8748 * block / parens, boolean operators (&&, ||, //) and branch
8749 * constructs (or, and, if, until, unless, while, err, for).
8750 * Not a very solid hack... */
8751 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8752 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8753 "%s (...) interpreted as function",name);
8756 while (s < PL_bufend && isSPACE(*s))
8760 while (s < PL_bufend && isSPACE(*s))
8762 if (isIDFIRST_lazy_if(s,UTF)) {
8763 const char * const w = s;
8764 s += UTF ? UTF8SKIP(s) : 1;
8765 while (isWORDCHAR_lazy_if(s,UTF))
8766 s += UTF ? UTF8SKIP(s) : 1;
8767 while (s < PL_bufend && isSPACE(*s))
8772 if (keyword(w, s - w, 0))
8775 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8776 if (gv && GvCVu(gv))
8780 Copy(w, tmpbuf+1, s - w, char);
8782 off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
8783 if (off != NOT_IN_PAD) return;
8785 Perl_croak(aTHX_ "No comma allowed after %s", what);
8790 /* S_new_constant(): do any overload::constant lookup.
8792 Either returns sv, or mortalizes/frees sv and returns a new SV*.
8793 Best used as sv=new_constant(..., sv, ...).
8794 If s, pv are NULL, calls subroutine with one argument,
8795 and <type> is used with error messages only.
8796 <type> is assumed to be well formed UTF-8 */
8799 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8800 SV *sv, SV *pv, const char *type, STRLEN typelen)
8803 HV * table = GvHV(PL_hintgv); /* ^H */
8808 const char *why1 = "", *why2 = "", *why3 = "";
8810 PERL_ARGS_ASSERT_NEW_CONSTANT;
8811 /* We assume that this is true: */
8812 if (*key == 'c') { assert (strEQ(key, "charnames")); }
8815 /* charnames doesn't work well if there have been errors found */
8816 if (PL_error_count > 0 && *key == 'c')
8818 SvREFCNT_dec_NN(sv);
8819 return &PL_sv_undef;
8822 sv_2mortal(sv); /* Parent created it permanently */
8824 || ! (PL_hints & HINT_LOCALIZE_HH)
8825 || ! (cvp = hv_fetch(table, key, keylen, FALSE))
8830 /* Here haven't found what we're looking for. If it is charnames,
8831 * perhaps it needs to be loaded. Try doing that before giving up */
8833 Perl_load_module(aTHX_
8835 newSVpvs("_charnames"),
8836 /* version parameter; no need to specify it, as if
8837 * we get too early a version, will fail anyway,
8838 * not being able to find '_charnames' */
8843 assert(sp == PL_stack_sp);
8844 table = GvHV(PL_hintgv);
8846 && (PL_hints & HINT_LOCALIZE_HH)
8847 && (cvp = hv_fetch(table, key, keylen, FALSE))
8853 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8854 msg = Perl_form(aTHX_
8855 "Constant(%.*s) unknown",
8856 (int)(type ? typelen : len),
8862 why3 = "} is not defined";
8865 msg = Perl_form(aTHX_
8866 /* The +3 is for '\N{'; -4 for that, plus '}' */
8867 "Unknown charname '%.*s'", (int)typelen - 4, type + 3
8871 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
8872 (int)(type ? typelen : len),
8873 (type ? type: s), why1, why2, why3);
8876 yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
8877 return SvREFCNT_inc_simple_NN(sv);
8882 pv = newSVpvn_flags(s, len, SVs_TEMP);
8884 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
8886 typesv = &PL_sv_undef;
8888 PUSHSTACKi(PERLSI_OVERLOAD);
8900 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
8904 /* Check the eval first */
8905 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
8907 const char * errstr;
8908 sv_catpvs(errsv, "Propagated");
8909 errstr = SvPV_const(errsv, errlen);
8910 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
8912 res = SvREFCNT_inc_simple_NN(sv);
8916 SvREFCNT_inc_simple_void_NN(res);
8925 why1 = "Call to &{$^H{";
8927 why3 = "}} did not return a defined value";
8929 (void)sv_2mortal(sv);
8936 PERL_STATIC_INLINE void
8937 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
8938 bool is_utf8, bool check_dollar) {
8939 PERL_ARGS_ASSERT_PARSE_IDENT;
8943 Perl_croak(aTHX_ "%s", ident_too_long);
8944 if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
8945 /* The UTF-8 case must come first, otherwise things
8946 * like c\N{COMBINING TILDE} would start failing, as the
8947 * isWORDCHAR_A case below would gobble the 'c' up.
8950 char *t = *s + UTF8SKIP(*s);
8951 while (isIDCONT_utf8((U8*)t))
8953 if (*d + (t - *s) > e)
8954 Perl_croak(aTHX_ "%s", ident_too_long);
8955 Copy(*s, *d, t - *s, char);
8959 else if ( isWORDCHAR_A(**s) ) {
8962 } while (isWORDCHAR_A(**s) && *d < e);
8964 else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
8969 else if (allow_package && **s == ':' && (*s)[1] == ':'
8970 /* Disallow things like Foo::$bar. For the curious, this is
8971 * the code path that triggers the "Bad name after" warning
8972 * when looking for barewords.
8974 && !(check_dollar && (*s)[2] == '$')) {
8984 /* Returns a NUL terminated string, with the length of the string written to
8988 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
8991 char * const e = d + destlen - 3; /* two-character token, ending NUL */
8992 bool is_utf8 = cBOOL(UTF);
8994 PERL_ARGS_ASSERT_SCAN_WORD;
8996 parse_ident(&s, &d, e, allow_package, is_utf8, TRUE);
9002 /* Is the byte 'd' a legal single character identifier name? 'u' is true
9003 * iff Unicode semantics are to be used. The legal ones are any of:
9004 * a) all ASCII characters except:
9005 * 1) control and space-type ones, like NUL, SOH, \t, and SPACE;
9007 * The final case currently doesn't get this far in the program, so we
9008 * don't test for it. If that were to change, it would be ok to allow it.
9009 * b) When not under Unicode rules, any upper Latin1 character
9010 * c) Otherwise, when unicode rules are used, all XIDS characters.
9012 * Because all ASCII characters have the same representation whether
9013 * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
9014 * '{' without knowing if is UTF-8 or not. */
9015 #define VALID_LEN_ONE_IDENT(s, is_utf8) \
9016 (isGRAPH_A(*(s)) || ((is_utf8) \
9017 ? isIDFIRST_utf8((U8*) (s)) \
9019 && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
9022 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
9024 I32 herelines = PL_parser->herelines;
9025 SSize_t bracket = -1;
9028 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9029 bool is_utf8 = cBOOL(UTF);
9030 I32 orig_copline = 0, tmp_copline = 0;
9032 PERL_ARGS_ASSERT_SCAN_IDENT;
9034 if (isSPACE(*s) || !*s)
9037 while (isDIGIT(*s)) {
9039 Perl_croak(aTHX_ "%s", ident_too_long);
9043 else { /* See if it is a "normal" identifier */
9044 parse_ident(&s, &d, e, 1, is_utf8, FALSE);
9049 /* Either a digit variable, or parse_ident() found an identifier
9050 (anything valid as a bareword), so job done and return. */
9051 if (PL_lex_state != LEX_NORMAL)
9052 PL_lex_state = LEX_INTERPENDMAYBE;
9056 /* Here, it is not a run-of-the-mill identifier name */
9058 if (*s == '$' && s[1]
9059 && (isIDFIRST_lazy_if(s+1,is_utf8)
9060 || isDIGIT_A((U8)s[1])
9063 || strEQs(s+1,"::")) )
9065 /* Dereferencing a value in a scalar variable.
9066 The alternatives are different syntaxes for a scalar variable.
9067 Using ' as a leading package separator isn't allowed. :: is. */
9070 /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */
9072 bracket = s - SvPVX(PL_linestr);
9074 orig_copline = CopLINE(PL_curcop);
9075 if (s < PL_bufend && isSPACE(*s)) {
9079 if ((s <= PL_bufend - (is_utf8)
9082 && VALID_LEN_ONE_IDENT(s, is_utf8))
9085 const STRLEN skip = UTF8SKIP(s);
9088 for ( i = 0; i < skip; i++ )
9096 /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
9097 if (*d == '^' && *s && isCONTROLVAR(*s)) {
9101 /* Warn about ambiguous code after unary operators if {...} notation isn't
9102 used. There's no difference in ambiguity; it's merely a heuristic
9103 about when not to warn. */
9104 else if (ck_uni && bracket == -1)
9106 if (bracket != -1) {
9109 /* If we were processing {...} notation then... */
9110 if (isIDFIRST_lazy_if(d,is_utf8)) {
9111 /* if it starts as a valid identifier, assume that it is one.
9112 (the later check for } being at the expected point will trap
9113 cases where this doesn't pan out.) */
9114 d += is_utf8 ? UTF8SKIP(d) : 1;
9115 parse_ident(&s, &d, e, 1, is_utf8, TRUE);
9117 tmp_copline = CopLINE(PL_curcop);
9118 if (s < PL_bufend && isSPACE(*s)) {
9121 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9122 /* ${foo[0]} and ${foo{bar}} notation. */
9123 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9124 const char * const brack =
9126 ((*s == '[') ? "[...]" : "{...}");
9127 orig_copline = CopLINE(PL_curcop);
9128 CopLINE_set(PL_curcop, tmp_copline);
9129 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9130 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9131 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9132 funny, dest, brack, funny, dest, brack);
9133 CopLINE_set(PL_curcop, orig_copline);
9136 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9137 PL_lex_allbrackets++;
9141 /* Handle extended ${^Foo} variables
9142 * 1999-02-27 mjd-perl-patch@plover.com */
9143 else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
9147 while (isWORDCHAR(*s) && d < e) {
9151 Perl_croak(aTHX_ "%s", ident_too_long);
9156 tmp_copline = CopLINE(PL_curcop);
9157 if ((skip = s < PL_bufend && isSPACE(*s)))
9158 /* Avoid incrementing line numbers or resetting PL_linestart,
9159 in case we have to back up. */
9164 /* Expect to find a closing } after consuming any trailing whitespace.
9167 /* Now increment line numbers if applicable. */
9171 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9172 PL_lex_state = LEX_INTERPEND;
9175 if (PL_lex_state == LEX_NORMAL) {
9176 if (ckWARN(WARN_AMBIGUOUS)
9177 && (keyword(dest, d - dest, 0)
9178 || get_cvn_flags(dest, d - dest, is_utf8
9182 SV *tmp = newSVpvn_flags( dest, d - dest,
9183 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
9186 orig_copline = CopLINE(PL_curcop);
9187 CopLINE_set(PL_curcop, tmp_copline);
9188 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9189 "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
9190 funny, SVfARG(tmp), funny, SVfARG(tmp));
9191 CopLINE_set(PL_curcop, orig_copline);
9196 /* Didn't find the closing } at the point we expected, so restore
9197 state such that the next thing to process is the opening { and */
9198 s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
9199 CopLINE_set(PL_curcop, orig_copline);
9200 PL_parser->herelines = herelines;
9204 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9205 PL_lex_state = LEX_INTERPEND;
9210 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
9212 /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
9213 * found in the parse starting at 's', based on the subset that are valid
9214 * in this context input to this routine in 'valid_flags'. Advances s.
9215 * Returns TRUE if the input should be treated as a valid flag, so the next
9216 * char may be as well; otherwise FALSE. 'charset' should point to a NUL
9217 * upon first call on the current regex. This routine will set it to any
9218 * charset modifier found. The caller shouldn't change it. This way,
9219 * another charset modifier encountered in the parse can be detected as an
9220 * error, as we have decided to allow only one */
9223 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9225 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9226 if (isWORDCHAR_lazy_if(*s, UTF)) {
9227 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9228 UTF ? SVf_UTF8 : 0);
9230 /* Pretend that it worked, so will continue processing before
9239 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
9240 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
9241 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
9242 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
9243 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
9244 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9245 case LOCALE_PAT_MOD:
9247 goto multiple_charsets;
9249 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9252 case UNICODE_PAT_MOD:
9254 goto multiple_charsets;
9256 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9259 case ASCII_RESTRICT_PAT_MOD:
9261 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9265 /* Error if previous modifier wasn't an 'a', but if it was, see
9266 * if, and accept, a second occurrence (only) */
9268 || get_regex_charset(*pmfl)
9269 != REGEX_ASCII_RESTRICTED_CHARSET)
9271 goto multiple_charsets;
9273 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9277 case DEPENDS_PAT_MOD:
9279 goto multiple_charsets;
9281 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9290 if (*charset != c) {
9291 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9293 else if (c == 'a') {
9294 /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
9295 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9298 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9301 /* Pretend that it worked, so will continue processing before dieing */
9307 S_scan_pat(pTHX_ char *start, I32 type)
9311 const char * const valid_flags =
9312 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9313 char charset = '\0'; /* character set modifier */
9314 unsigned int x_mod_count = 0;
9316 PERL_ARGS_ASSERT_SCAN_PAT;
9318 s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
9320 Perl_croak(aTHX_ "Search pattern not terminated");
9322 pm = (PMOP*)newPMOP(type, 0);
9323 if (PL_multi_open == '?') {
9324 /* This is the only point in the code that sets PMf_ONCE: */
9325 pm->op_pmflags |= PMf_ONCE;
9327 /* Hence it's safe to do this bit of PMOP book-keeping here, which
9328 allows us to restrict the list needed by reset to just the ??
9330 assert(type != OP_TRANS);
9332 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9335 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9338 elements = mg->mg_len / sizeof(PMOP**);
9339 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9340 ((PMOP**)mg->mg_ptr) [elements++] = pm;
9341 mg->mg_len = elements * sizeof(PMOP**);
9342 PmopSTASH_set(pm,PL_curstash);
9346 /* if qr/...(?{..}).../, then need to parse the pattern within a new
9347 * anon CV. False positives like qr/[(?{]/ are harmless */
9349 if (type == OP_QR) {
9351 char *e, *p = SvPV(PL_lex_stuff, len);
9353 for (; p < e; p++) {
9354 if (p[0] == '(' && p[1] == '?'
9355 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9357 pm->op_pmflags |= PMf_HAS_CV;
9361 pm->op_pmflags |= PMf_IS_QR;
9364 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
9365 &s, &charset, &x_mod_count))
9367 /* issue a warning if /c is specified,but /g is not */
9368 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9370 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9371 "Use of /c modifier is meaningless without /g" );
9374 if (UNLIKELY((x_mod_count) > 1)) {
9375 yyerror("Only one /x regex modifier is allowed");
9378 PL_lex_op = (OP*)pm;
9379 pl_yylval.ival = OP_MATCH;
9384 S_scan_subst(pTHX_ char *start)
9390 line_t linediff = 0;
9392 char charset = '\0'; /* character set modifier */
9393 unsigned int x_mod_count = 0;
9396 PERL_ARGS_ASSERT_SCAN_SUBST;
9398 pl_yylval.ival = OP_NULL;
9400 s = scan_str(start, TRUE, FALSE, FALSE, &t);
9403 Perl_croak(aTHX_ "Substitution pattern not terminated");
9407 first_start = PL_multi_start;
9408 first_line = CopLINE(PL_curcop);
9409 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9411 SvREFCNT_dec_NN(PL_lex_stuff);
9412 PL_lex_stuff = NULL;
9413 Perl_croak(aTHX_ "Substitution replacement not terminated");
9415 PL_multi_start = first_start; /* so whole substitution is taken together */
9417 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9421 if (*s == EXEC_PAT_MOD) {
9425 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
9426 &s, &charset, &x_mod_count))
9432 if (UNLIKELY((x_mod_count) > 1)) {
9433 yyerror("Only one /x regex modifier is allowed");
9436 if ((pm->op_pmflags & PMf_CONTINUE)) {
9437 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9441 SV * const repl = newSVpvs("");
9444 pm->op_pmflags |= PMf_EVAL;
9447 sv_catpvs(repl, "eval ");
9449 sv_catpvs(repl, "do ");
9451 sv_catpvs(repl, "{");
9452 sv_catsv(repl, PL_parser->lex_sub_repl);
9453 sv_catpvs(repl, "}");
9454 SvREFCNT_dec(PL_parser->lex_sub_repl);
9455 PL_parser->lex_sub_repl = repl;
9460 linediff = CopLINE(PL_curcop) - first_line;
9462 CopLINE_set(PL_curcop, first_line);
9464 if (linediff || es) {
9465 /* the IVX field indicates that the replacement string is a s///e;
9466 * the NVX field indicates how many src code lines the replacement
9468 sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
9469 ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = 0;
9470 ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen = es;
9473 PL_lex_op = (OP*)pm;
9474 pl_yylval.ival = OP_SUBST;
9479 S_scan_trans(pTHX_ char *start)
9486 bool nondestruct = 0;
9489 PERL_ARGS_ASSERT_SCAN_TRANS;
9491 pl_yylval.ival = OP_NULL;
9493 s = scan_str(start,FALSE,FALSE,FALSE,&t);
9495 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9499 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9501 SvREFCNT_dec_NN(PL_lex_stuff);
9502 PL_lex_stuff = NULL;
9503 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9506 complement = del = squash = 0;
9510 complement = OPpTRANS_COMPLEMENT;
9513 del = OPpTRANS_DELETE;
9516 squash = OPpTRANS_SQUASH;
9528 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9529 o->op_private &= ~OPpTRANS_ALL;
9530 o->op_private |= del|squash|complement|
9531 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9532 (DO_UTF8(PL_parser->lex_sub_repl) ? OPpTRANS_TO_UTF : 0);
9535 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9542 Takes a pointer to the first < in <<FOO.
9543 Returns a pointer to the byte following <<FOO.
9545 This function scans a heredoc, which involves different methods
9546 depending on whether we are in a string eval, quoted construct, etc.
9547 This is because PL_linestr could containing a single line of input, or
9548 a whole string being evalled, or the contents of the current quote-
9551 The two basic methods are:
9552 - Steal lines from the input stream
9553 - Scan the heredoc in PL_linestr and remove it therefrom
9555 In a file scope or filtered eval, the first method is used; in a
9556 string eval, the second.
9558 In a quote-like operator, we have to choose between the two,
9559 depending on where we can find a newline. We peek into outer lex-
9560 ing scopes until we find one with a newline in it. If we reach the
9561 outermost lexing scope and it is a file, we use the stream method.
9562 Otherwise it is treated as an eval.
9566 S_scan_heredoc(pTHX_ char *s)
9568 I32 op_type = OP_SCALAR;
9577 bool indented = FALSE;
9578 const bool infile = PL_rsfp || PL_parser->filtered;
9579 const line_t origline = CopLINE(PL_curcop);
9580 LEXSHARED *shared = PL_parser->lex_shared;
9582 PERL_ARGS_ASSERT_SCAN_HEREDOC;
9585 d = PL_tokenbuf + 1;
9586 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9587 *PL_tokenbuf = '\n';
9593 while (SPACE_OR_TAB(*peek))
9595 if (*peek == '`' || *peek == '\'' || *peek =='"') {
9598 s = delimcpy(d, e, s, PL_bufend, term, &len);
9600 Perl_croak(aTHX_ "Unterminated delimiter for here document");
9606 /* <<\FOO is equivalent to <<'FOO' */
9610 if (!isWORDCHAR_lazy_if(s,UTF))
9611 deprecate("bare << to mean <<\"\"");
9613 while (isWORDCHAR_lazy_if(peek,UTF)) {
9614 peek += UTF ? UTF8SKIP(peek) : 1;
9616 len = (peek - s >= e - d) ? (e - d) : (peek - s);
9617 Copy(s, d, len, char);
9621 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9622 Perl_croak(aTHX_ "Delimiter for here document is too long");
9625 len = d - PL_tokenbuf;
9627 #ifndef PERL_STRICT_CR
9628 d = strchr(s, '\r');
9630 char * const olds = s;
9632 while (s < PL_bufend) {
9638 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9647 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9652 tmpstr = newSV_type(SVt_PVIV);
9656 SvIV_set(tmpstr, -1);
9658 else if (term == '`') {
9659 op_type = OP_BACKTICK;
9660 SvIV_set(tmpstr, '\\');
9663 PL_multi_start = origline + 1 + PL_parser->herelines;
9664 PL_multi_open = PL_multi_close = '<';
9665 /* inside a string eval or quote-like operator */
9666 if (!infile || PL_lex_inwhat) {
9669 char * const olds = s;
9670 PERL_CONTEXT * const cx = CX_CUR();
9671 /* These two fields are not set until an inner lexing scope is
9672 entered. But we need them set here. */
9673 shared->ls_bufptr = s;
9674 shared->ls_linestr = PL_linestr;
9676 /* Look for a newline. If the current buffer does not have one,
9677 peek into the line buffer of the parent lexing scope, going
9678 up as many levels as necessary to find one with a newline
9681 while (!(s = (char *)memchr(
9682 (void *)shared->ls_bufptr, '\n',
9683 SvEND(shared->ls_linestr)-shared->ls_bufptr
9685 shared = shared->ls_prev;
9686 /* shared is only null if we have gone beyond the outermost
9687 lexing scope. In a file, we will have broken out of the
9688 loop in the previous iteration. In an eval, the string buf-
9689 fer ends with "\n;", so the while condition above will have
9690 evaluated to false. So shared can never be null. Or so you
9691 might think. Odd syntax errors like s;@{<<; can gobble up
9692 the implicit semicolon at the end of a flie, causing the
9693 file handle to be closed even when we are not in a string
9694 eval. So shared may be null in that case.
9695 (Closing '}' here to balance the earlier open brace for
9696 editors that look for matched pairs.) */
9697 if (UNLIKELY(!shared))
9699 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
9700 most lexing scope. In a file, shared->ls_linestr at that
9701 level is just one line, so there is no body to steal. */
9702 if (infile && !shared->ls_prev) {
9707 else { /* eval or we've already hit EOF */
9708 s = (char*)memchr((void*)s, '\n', PL_bufend - s);
9712 linestr = shared->ls_linestr;
9713 bufend = SvEND(linestr);
9718 while (s < bufend - len + 1) {
9720 ++PL_parser->herelines;
9722 if (memEQ(s, PL_tokenbuf + 1, len - 1)) {
9726 /* Only valid if it's preceded by whitespace only */
9727 while (backup != myolds && --backup >= myolds) {
9728 if (*backup != ' ' && *backup != '\t') {
9735 /* No whitespace or all! */
9736 if (backup == s || *backup == '\n') {
9737 Newxz(indent, indent_len + 1, char);
9738 memcpy(indent, backup + 1, indent_len);
9739 s--; /* before our delimiter */
9740 PL_parser->herelines--; /* this line doesn't count */
9746 while (s < bufend - len + 1
9747 && memNE(s,PL_tokenbuf,len) )
9750 ++PL_parser->herelines;
9754 if (s >= bufend - len + 1) {
9757 sv_setpvn(tmpstr,d+1,s-d);
9759 /* the preceding stmt passes a newline */
9760 PL_parser->herelines++;
9762 /* s now points to the newline after the heredoc terminator.
9763 d points to the newline before the body of the heredoc.
9766 /* We are going to modify linestr in place here, so set
9767 aside copies of the string if necessary for re-evals or
9769 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
9770 check shared->re_eval_str. */
9771 if (shared->re_eval_start || shared->re_eval_str) {
9772 /* Set aside the rest of the regexp */
9773 if (!shared->re_eval_str)
9774 shared->re_eval_str =
9775 newSVpvn(shared->re_eval_start,
9776 bufend - shared->re_eval_start);
9777 shared->re_eval_start -= s-d;
9780 && CxTYPE(cx) == CXt_EVAL
9781 && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
9782 && cx->blk_eval.cur_text == linestr)
9784 cx->blk_eval.cur_text = newSVsv(linestr);
9785 cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
9787 /* Copy everything from s onwards back to d. */
9788 Move(s,d,bufend-s + 1,char);
9789 SvCUR_set(linestr, SvCUR(linestr) - (s-d));
9790 /* Setting PL_bufend only applies when we have not dug deeper
9791 into other scopes, because sublex_done sets PL_bufend to
9792 SvEND(PL_linestr). */
9793 if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
9799 char *oldbufptr_save;
9800 char *oldoldbufptr_save;
9802 SvPVCLEAR(tmpstr); /* avoid "uninitialized" warning */
9803 term = PL_tokenbuf[1];
9805 linestr_save = PL_linestr; /* must restore this afterwards */
9806 d = s; /* and this */
9807 oldbufptr_save = PL_oldbufptr;
9808 oldoldbufptr_save = PL_oldoldbufptr;
9809 PL_linestr = newSVpvs("");
9810 PL_bufend = SvPVX(PL_linestr);
9812 PL_bufptr = PL_bufend;
9813 CopLINE_set(PL_curcop,
9814 origline + 1 + PL_parser->herelines);
9815 if (!lex_next_chunk(LEX_NO_TERM)
9816 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
9817 /* Simply freeing linestr_save might seem simpler here, as it
9818 does not matter what PL_linestr points to, since we are
9819 about to croak; but in a quote-like op, linestr_save
9820 will have been prospectively freed already, via
9821 SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
9822 restore PL_linestr. */
9823 SvREFCNT_dec_NN(PL_linestr);
9824 PL_linestr = linestr_save;
9825 PL_oldbufptr = oldbufptr_save;
9826 PL_oldoldbufptr = oldoldbufptr_save;
9829 CopLINE_set(PL_curcop, origline);
9830 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
9831 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
9832 /* ^That should be enough to avoid this needing to grow: */
9833 sv_catpvs(PL_linestr, "\n\0");
9834 assert(s == SvPVX(PL_linestr));
9835 PL_bufend = SvEND(PL_linestr);
9838 PL_parser->herelines++;
9839 PL_last_lop = PL_last_uni = NULL;
9840 #ifndef PERL_STRICT_CR
9841 if (PL_bufend - PL_linestart >= 2) {
9842 if ( (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
9843 || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9845 PL_bufend[-2] = '\n';
9847 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9849 else if (PL_bufend[-1] == '\r')
9850 PL_bufend[-1] = '\n';
9852 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9853 PL_bufend[-1] = '\n';
9855 if (indented && (PL_bufend-s) >= len) {
9856 char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
9859 char *backup = found;
9862 /* Only valid if it's preceded by whitespace only */
9863 while (backup != s && --backup >= s) {
9864 if (*backup != ' ' && *backup != '\t') {
9870 /* All whitespace or none! */
9871 if (backup == found || *backup == ' ' || *backup == '\t') {
9872 Newxz(indent, indent_len + 1, char);
9873 memcpy(indent, backup, indent_len);
9874 SvREFCNT_dec(PL_linestr);
9875 PL_linestr = linestr_save;
9876 PL_linestart = SvPVX(linestr_save);
9877 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9878 PL_oldbufptr = oldbufptr_save;
9879 PL_oldoldbufptr = oldoldbufptr_save;
9885 /* Didn't find it */
9886 sv_catsv(tmpstr,PL_linestr);
9888 if (*s == term && PL_bufend-s >= len
9889 && memEQ(s,PL_tokenbuf + 1,len))
9891 SvREFCNT_dec(PL_linestr);
9892 PL_linestr = linestr_save;
9893 PL_linestart = SvPVX(linestr_save);
9894 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9895 PL_oldbufptr = oldbufptr_save;
9896 PL_oldoldbufptr = oldoldbufptr_save;
9900 sv_catsv(tmpstr,PL_linestr);
9905 PL_multi_end = origline + PL_parser->herelines;
9906 if (indented && indent) {
9907 STRLEN linecount = 1;
9908 STRLEN herelen = SvCUR(tmpstr);
9909 char *ss = SvPVX(tmpstr);
9910 char *se = ss + herelen;
9911 SV *newstr = newSV(herelen+1);
9914 /* Trim leading whitespace */
9916 /* newline only? Copy and move on */
9918 sv_catpv(newstr,"\n");
9922 /* Found our indentation? Strip it */
9923 } else if (se - ss >= indent_len
9924 && memEQ(ss, indent, indent_len))
9930 while ((ss + le) < se && *(ss + le) != '\n')
9933 sv_catpvn(newstr, ss, le);
9937 /* Line doesn't begin with our indentation? Croak */
9940 "Indentation on line %d of here-doc doesn't match delimiter",
9945 /* avoid sv_setsv() as we dont wan't to COW here */
9946 sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
9948 SvREFCNT_dec_NN(newstr);
9950 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9951 SvPV_shrink_to_cur(tmpstr);
9954 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9957 PL_lex_stuff = tmpstr;
9958 pl_yylval.ival = op_type;
9962 SvREFCNT_dec(tmpstr);
9963 CopLINE_set(PL_curcop, origline);
9964 missingterm(PL_tokenbuf + 1);
9968 takes: position of first '<' in input buffer
9969 returns: position of first char following the matching '>' in
9971 side-effects: pl_yylval and lex_op are set.
9976 <<>> read from ARGV without magic open
9977 <FH> read from filehandle
9978 <pkg::FH> read from package qualified filehandle
9979 <pkg'FH> read from package qualified filehandle
9980 <$fh> read from filehandle in $fh
9986 S_scan_inputsymbol(pTHX_ char *start)
9988 char *s = start; /* current position in buffer */
9991 bool nomagicopen = FALSE;
9992 char *d = PL_tokenbuf; /* start of temp holding space */
9993 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
9995 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9997 end = strchr(s, '\n');
10000 if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
10001 nomagicopen = TRUE;
10007 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
10009 /* die if we didn't have space for the contents of the <>,
10010 or if it didn't end, or if we see a newline
10013 if (len >= (I32)sizeof PL_tokenbuf)
10014 Perl_croak(aTHX_ "Excessively long <> operator");
10016 Perl_croak(aTHX_ "Unterminated <> operator");
10021 Remember, only scalar variables are interpreted as filehandles by
10022 this code. Anything more complex (e.g., <$fh{$num}>) will be
10023 treated as a glob() call.
10024 This code makes use of the fact that except for the $ at the front,
10025 a scalar variable and a filehandle look the same.
10027 if (*d == '$' && d[1]) d++;
10029 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10030 while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
10031 d += UTF ? UTF8SKIP(d) : 1;
10033 /* If we've tried to read what we allow filehandles to look like, and
10034 there's still text left, then it must be a glob() and not a getline.
10035 Use scan_str to pull out the stuff between the <> and treat it
10036 as nothing more than a string.
10039 if (d - PL_tokenbuf != len) {
10040 pl_yylval.ival = OP_GLOB;
10041 s = scan_str(start,FALSE,FALSE,FALSE,NULL);
10043 Perl_croak(aTHX_ "Glob not terminated");
10047 bool readline_overriden = FALSE;
10049 /* we're in a filehandle read situation */
10052 /* turn <> into <ARGV> */
10054 Copy("ARGV",d,5,char);
10056 /* Check whether readline() is overriden */
10057 if ((gv_readline = gv_override("readline",8)))
10058 readline_overriden = TRUE;
10060 /* if <$fh>, create the ops to turn the variable into a
10064 /* try to find it in the pad for this block, otherwise find
10065 add symbol table ops
10067 const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
10068 if (tmp != NOT_IN_PAD) {
10069 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10070 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10071 HEK * const stashname = HvNAME_HEK(stash);
10072 SV * const sym = sv_2mortal(newSVhek(stashname));
10073 sv_catpvs(sym, "::");
10074 sv_catpv(sym, d+1);
10079 OP * const o = newOP(OP_PADSV, 0);
10081 PL_lex_op = readline_overriden
10082 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
10083 op_append_elem(OP_LIST, o,
10084 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10085 : newUNOP(OP_READLINE, 0, o);
10093 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
10095 PL_lex_op = readline_overriden
10096 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
10097 op_append_elem(OP_LIST,
10098 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10099 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10100 : newUNOP(OP_READLINE, 0,
10101 newUNOP(OP_RV2SV, 0,
10102 newGVOP(OP_GV, 0, gv)));
10104 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10105 pl_yylval.ival = OP_NULL;
10108 /* If it's none of the above, it must be a literal filehandle
10109 (<Foo::BAR> or <FOO>) so build a simple readline OP */
10111 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
10112 PL_lex_op = readline_overriden
10113 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
10114 op_append_elem(OP_LIST,
10115 newGVOP(OP_GV, 0, gv),
10116 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10117 : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
10118 pl_yylval.ival = OP_NULL;
10128 start position in buffer
10129 keep_bracketed_quoted preserve \ quoting of embedded delimiters, but
10130 only if they are of the open/close form
10131 keep_delims preserve the delimiters around the string
10132 re_reparse compiling a run-time /(?{})/:
10133 collapse // to /, and skip encoding src
10134 delimp if non-null, this is set to the position of
10135 the closing delimiter, or just after it if
10136 the closing and opening delimiters differ
10137 (i.e., the opening delimiter of a substitu-
10139 returns: position to continue reading from buffer
10140 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10141 updates the read buffer.
10143 This subroutine pulls a string out of the input. It is called for:
10144 q single quotes q(literal text)
10145 ' single quotes 'literal text'
10146 qq double quotes qq(interpolate $here please)
10147 " double quotes "interpolate $here please"
10148 qx backticks qx(/bin/ls -l)
10149 ` backticks `/bin/ls -l`
10150 qw quote words @EXPORT_OK = qw( func() $spam )
10151 m// regexp match m/this/
10152 s/// regexp substitute s/this/that/
10153 tr/// string transliterate tr/this/that/
10154 y/// string transliterate y/this/that/
10155 ($*@) sub prototypes sub foo ($)
10156 (stuff) sub attr parameters sub foo : attr(stuff)
10157 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
10159 In most of these cases (all but <>, patterns and transliterate)
10160 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
10161 calls scan_str(). s/// makes yylex() call scan_subst() which calls
10162 scan_str(). tr/// and y/// make yylex() call scan_trans() which
10165 It skips whitespace before the string starts, and treats the first
10166 character as the delimiter. If the delimiter is one of ([{< then
10167 the corresponding "close" character )]}> is used as the closing
10168 delimiter. It allows quoting of delimiters, and if the string has
10169 balanced delimiters ([{<>}]) it allows nesting.
10171 On success, the SV with the resulting string is put into lex_stuff or,
10172 if that is already non-NULL, into lex_repl. The second case occurs only
10173 when parsing the RHS of the special constructs s/// and tr/// (y///).
10174 For convenience, the terminating delimiter character is stuffed into
10179 S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
10183 SV *sv; /* scalar value: string */
10184 const char *tmps; /* temp string, used for delimiter matching */
10185 char *s = start; /* current position in the buffer */
10186 char term; /* terminating character */
10187 char *to; /* current position in the sv's data */
10188 I32 brackets = 1; /* bracket nesting level */
10189 bool has_utf8 = FALSE; /* is there any utf8 content? */
10190 IV termcode; /* terminating char. code */
10191 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
10192 STRLEN termlen; /* length of terminating string */
10195 PERL_ARGS_ASSERT_SCAN_STR;
10197 /* skip space before the delimiter */
10202 /* mark where we are, in case we need to report errors */
10205 /* after skipping whitespace, the next character is the terminator */
10208 termcode = termstr[0] = term;
10212 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10213 Copy(s, termstr, termlen, U8);
10214 if (!UTF8_IS_INVARIANT(term))
10218 /* mark where we are */
10219 PL_multi_start = CopLINE(PL_curcop);
10220 PL_multi_open = termcode;
10221 herelines = PL_parser->herelines;
10223 /* find corresponding closing delimiter */
10224 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10225 termcode = termstr[0] = term = tmps[5];
10227 PL_multi_close = termcode;
10229 if (PL_multi_open == PL_multi_close) {
10230 keep_bracketed_quoted = FALSE;
10233 /* create a new SV to hold the contents. 79 is the SV's initial length.
10234 What a random number. */
10235 sv = newSV_type(SVt_PVIV);
10237 SvIV_set(sv, termcode);
10238 (void)SvPOK_only(sv); /* validate pointer */
10240 /* move past delimiter and try to read a complete string */
10242 sv_catpvn(sv, s, termlen);
10245 /* extend sv if need be */
10246 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10247 /* set 'to' to the next character in the sv's string */
10248 to = SvPVX(sv)+SvCUR(sv);
10250 /* if open delimiter is the close delimiter read unbridle */
10251 if (PL_multi_open == PL_multi_close) {
10252 for (; s < PL_bufend; s++,to++) {
10253 /* embedded newlines increment the current line number */
10254 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10255 COPLINE_INC_WITH_HERELINES;
10256 /* handle quoted delimiters */
10257 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10258 if (!keep_bracketed_quoted
10260 || (re_reparse && s[1] == '\\'))
10263 else /* any other quotes are simply copied straight through */
10266 /* terminate when run out of buffer (the for() condition), or
10267 have found the terminator */
10268 else if (*s == term) {
10271 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10274 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10280 /* if the terminator isn't the same as the start character (e.g.,
10281 matched brackets), we have to allow more in the quoting, and
10282 be prepared for nested brackets.
10285 /* read until we run out of string, or we find the terminator */
10286 for (; s < PL_bufend; s++,to++) {
10287 /* embedded newlines increment the line count */
10288 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10289 COPLINE_INC_WITH_HERELINES;
10290 /* backslashes can escape the open or closing characters */
10291 if (*s == '\\' && s+1 < PL_bufend) {
10292 if (!keep_bracketed_quoted
10293 && ( ((UV)s[1] == PL_multi_open)
10294 || ((UV)s[1] == PL_multi_close) ))
10301 /* allow nested opens and closes */
10302 else if ((UV)*s == PL_multi_close && --brackets <= 0)
10304 else if ((UV)*s == PL_multi_open)
10306 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10311 /* terminate the copied string and update the sv's end-of-string */
10313 SvCUR_set(sv, to - SvPVX_const(sv));
10316 * this next chunk reads more into the buffer if we're not done yet
10320 break; /* handle case where we are done yet :-) */
10322 #ifndef PERL_STRICT_CR
10323 if (to - SvPVX_const(sv) >= 2) {
10324 if ( (to[-2] == '\r' && to[-1] == '\n')
10325 || (to[-2] == '\n' && to[-1] == '\r'))
10329 SvCUR_set(sv, to - SvPVX_const(sv));
10331 else if (to[-1] == '\r')
10334 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10338 /* if we're out of file, or a read fails, bail and reset the current
10339 line marker so we can report where the unterminated string began
10341 COPLINE_INC_WITH_HERELINES;
10342 PL_bufptr = PL_bufend;
10343 if (!lex_next_chunk(0)) {
10345 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10351 /* at this point, we have successfully read the delimited string */
10354 sv_catpvn(sv, s, termlen);
10360 PL_multi_end = CopLINE(PL_curcop);
10361 CopLINE_set(PL_curcop, PL_multi_start);
10362 PL_parser->herelines = herelines;
10364 /* if we allocated too much space, give some back */
10365 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10366 SvLEN_set(sv, SvCUR(sv) + 1);
10367 SvPV_renew(sv, SvLEN(sv));
10370 /* decide whether this is the first or second quoted string we've read
10375 PL_parser->lex_sub_repl = sv;
10378 if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
10384 takes: pointer to position in buffer
10385 returns: pointer to new position in buffer
10386 side-effects: builds ops for the constant in pl_yylval.op
10388 Read a number in any of the formats that Perl accepts:
10390 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10391 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
10392 0b[01](_?[01])* binary integers
10393 0[0-7](_?[0-7])* octal integers
10394 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* hexadecimal integers
10395 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+ hexadecimal floats
10397 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10400 If it reads a number without a decimal point or an exponent, it will
10401 try converting the number to an integer and see if it can do so
10402 without loss of precision.
10406 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10408 const char *s = start; /* current position in buffer */
10409 char *d; /* destination in temp buffer */
10410 char *e; /* end of temp buffer */
10411 NV nv; /* number read, as a double */
10412 SV *sv = NULL; /* place to put the converted number */
10413 bool floatit; /* boolean: int or float? */
10414 const char *lastub = NULL; /* position of last underbar */
10415 static const char* const number_too_long = "Number too long";
10416 /* Hexadecimal floating point.
10418 * In many places (where we have quads and NV is IEEE 754 double)
10419 * we can fit the mantissa bits of a NV into an unsigned quad.
10420 * (Note that UVs might not be quads even when we have quads.)
10421 * This will not work everywhere, though (either no quads, or
10422 * using long doubles), in which case we have to resort to NV,
10423 * which will probably mean horrible loss of precision due to
10424 * multiple fp operations. */
10425 bool hexfp = FALSE;
10426 int total_bits = 0;
10427 int significant_bits = 0;
10428 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
10429 # define HEXFP_UQUAD
10430 Uquad_t hexfp_uquad = 0;
10431 int hexfp_frac_bits = 0;
10436 NV hexfp_mult = 1.0;
10437 UV high_non_zero = 0; /* highest digit */
10438 int non_zero_integer_digits = 0;
10440 PERL_ARGS_ASSERT_SCAN_NUM;
10442 /* We use the first character to decide what type of number this is */
10446 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
10448 /* if it starts with a 0, it could be an octal number, a decimal in
10449 0.13 disguise, or a hexadecimal number, or a binary number. */
10453 u holds the "number so far"
10454 shift the power of 2 of the base
10455 (hex == 4, octal == 3, binary == 1)
10456 overflowed was the number more than we can hold?
10458 Shift is used when we add a digit. It also serves as an "are
10459 we in octal/hex/binary?" indicator to disallow hex characters
10460 when in octal mode.
10465 bool overflowed = FALSE;
10466 bool just_zero = TRUE; /* just plain 0 or binary number? */
10467 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10468 static const char* const bases[5] =
10469 { "", "binary", "", "octal", "hexadecimal" };
10470 static const char* const Bases[5] =
10471 { "", "Binary", "", "Octal", "Hexadecimal" };
10472 static const char* const maxima[5] =
10474 "0b11111111111111111111111111111111",
10478 const char *base, *Base, *max;
10480 /* check for hex */
10481 if (isALPHA_FOLD_EQ(s[1], 'x')) {
10485 } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
10490 /* check for a decimal in disguise */
10491 else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
10493 /* so it must be octal */
10500 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10501 "Misplaced _ in number");
10505 base = bases[shift];
10506 Base = Bases[shift];
10507 max = maxima[shift];
10509 /* read the rest of the number */
10511 /* x is used in the overflow test,
10512 b is the digit we're adding on. */
10517 /* if we don't mention it, we're done */
10521 /* _ are ignored -- but warned about if consecutive */
10523 if (lastub && s == lastub + 1)
10524 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10525 "Misplaced _ in number");
10529 /* 8 and 9 are not octal */
10530 case '8': case '9':
10532 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10536 case '2': case '3': case '4':
10537 case '5': case '6': case '7':
10539 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10542 case '0': case '1':
10543 b = *s++ & 15; /* ASCII digit -> value of digit */
10547 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10548 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10549 /* make sure they said 0x */
10552 b = (*s++ & 7) + 9;
10554 /* Prepare to put the digit we have onto the end
10555 of the number so far. We check for overflows.
10561 x = u << shift; /* make room for the digit */
10563 total_bits += shift;
10565 if ((x >> shift) != u
10566 && !(PL_hints & HINT_NEW_BINARY)) {
10569 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10570 "Integer overflow in %s number",
10573 u = x | b; /* add the digit to the end */
10576 n *= nvshift[shift];
10577 /* If an NV has not enough bits in its
10578 * mantissa to represent an UV this summing of
10579 * small low-order numbers is a waste of time
10580 * (because the NV cannot preserve the
10581 * low-order bits anyway): we could just
10582 * remember when did we overflow and in the
10583 * end just multiply n by the right
10588 if (high_non_zero == 0 && b > 0)
10592 non_zero_integer_digits++;
10594 /* this could be hexfp, but peek ahead
10595 * to avoid matching ".." */
10596 if (UNLIKELY(HEXFP_PEEK(s))) {
10604 /* if we get here, we had success: make a scalar value from
10609 /* final misplaced underbar check */
10610 if (s[-1] == '_') {
10611 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10614 if (UNLIKELY(HEXFP_PEEK(s))) {
10615 /* Do sloppy (on the underbars) but quick detection
10616 * (and value construction) for hexfp, the decimal
10617 * detection will shortly be more thorough with the
10618 * underbar checks. */
10620 significant_bits = non_zero_integer_digits * shift;
10623 #else /* HEXFP_NV */
10626 /* Ignore the leading zero bits of
10627 * the high (first) non-zero digit. */
10628 if (high_non_zero) {
10629 if (high_non_zero < 0x8)
10630 significant_bits--;
10631 if (high_non_zero < 0x4)
10632 significant_bits--;
10633 if (high_non_zero < 0x2)
10634 significant_bits--;
10641 bool accumulate = TRUE;
10642 for (h++; (isXDIGIT(*h) || *h == '_'); h++) {
10643 if (isXDIGIT(*h)) {
10644 U8 b = XDIGIT_VALUE(*h);
10645 significant_bits += shift;
10648 if (significant_bits < NV_MANT_DIG) {
10649 /* We are in the long "run" of xdigits,
10650 * accumulate the full four bits. */
10651 hexfp_uquad <<= shift;
10653 hexfp_frac_bits += shift;
10655 /* We are at a hexdigit either at,
10656 * or straddling, the edge of mantissa.
10657 * We will try grabbing as many as
10658 * possible bits. */
10660 significant_bits - NV_MANT_DIG;
10663 hexfp_uquad <<= tail;
10664 hexfp_uquad |= b >> (shift - tail);
10665 hexfp_frac_bits += tail;
10667 /* Ignore the trailing zero bits
10668 * of the last non-zero xdigit.
10670 * The assumption here is that if
10671 * one has input of e.g. the xdigit
10672 * eight (0x8), there is only one
10673 * bit being input, not the full
10674 * four bits. Conversely, if one
10675 * specifies a zero xdigit, the
10676 * assumption is that one really
10677 * wants all those bits to be zero. */
10679 if ((b & 0x1) == 0x0) {
10680 significant_bits--;
10681 if ((b & 0x2) == 0x0) {
10682 significant_bits--;
10683 if ((b & 0x4) == 0x0) {
10684 significant_bits--;
10690 accumulate = FALSE;
10693 /* Keep skipping the xdigits, and
10694 * accumulating the significant bits,
10695 * but do not shift the uquad
10696 * (which would catastrophically drop
10697 * high-order bits) or accumulate the
10698 * xdigits anymore. */
10700 #else /* HEXFP_NV */
10704 hexfp_nv += b * nv_mult;
10706 accumulate = FALSE;
10710 if (significant_bits >= NV_MANT_DIG)
10711 accumulate = FALSE;
10715 if ((total_bits > 0 || significant_bits > 0) &&
10716 isALPHA_FOLD_EQ(*h, 'p')) {
10717 bool negexp = FALSE;
10721 else if (*h == '-') {
10727 while (isDIGIT(*h) || *h == '_') {
10730 hexfp_exp += *h - '0';
10733 && -hexfp_exp < NV_MIN_EXP - 1) {
10734 /* NOTE: this means that the exponent
10735 * underflow warning happens for
10736 * the IEEE 754 subnormals (denormals),
10737 * because DBL_MIN_EXP etc are the lowest
10738 * possible binary (or, rather, DBL_RADIX-base)
10739 * exponent for normals, not subnormals.
10741 * This may or may not be a good thing. */
10742 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10743 "Hexadecimal float: exponent underflow");
10749 && hexfp_exp > NV_MAX_EXP - 1) {
10750 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10751 "Hexadecimal float: exponent overflow");
10759 hexfp_exp = -hexfp_exp;
10761 hexfp_exp -= hexfp_frac_bits;
10763 hexfp_mult = Perl_pow(2.0, hexfp_exp);
10771 if (n > 4294967295.0)
10772 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10773 "%s number > %s non-portable",
10779 if (u > 0xffffffff)
10780 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10781 "%s number > %s non-portable",
10786 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10787 sv = new_constant(start, s - start, "integer",
10788 sv, NULL, NULL, 0);
10789 else if (PL_hints & HINT_NEW_BINARY)
10790 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10795 handle decimal numbers.
10796 we're also sent here when we read a 0 as the first digit
10798 case '1': case '2': case '3': case '4': case '5':
10799 case '6': case '7': case '8': case '9': case '.':
10802 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10811 /* read next group of digits and _ and copy into d */
10814 || UNLIKELY(hexfp && isXDIGIT(*s)))
10816 /* skip underscores, checking for misplaced ones
10820 if (lastub && s == lastub + 1)
10821 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10822 "Misplaced _ in number");
10826 /* check for end of fixed-length buffer */
10828 Perl_croak(aTHX_ "%s", number_too_long);
10829 /* if we're ok, copy the character */
10834 /* final misplaced underbar check */
10835 if (lastub && s == lastub + 1) {
10836 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10839 /* read a decimal portion if there is one. avoid
10840 3..5 being interpreted as the number 3. followed
10843 if (*s == '.' && s[1] != '.') {
10848 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10849 "Misplaced _ in number");
10853 /* copy, ignoring underbars, until we run out of digits.
10857 || UNLIKELY(hexfp && isXDIGIT(*s));
10860 /* fixed length buffer check */
10862 Perl_croak(aTHX_ "%s", number_too_long);
10864 if (lastub && s == lastub + 1)
10865 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10866 "Misplaced _ in number");
10872 /* fractional part ending in underbar? */
10873 if (s[-1] == '_') {
10874 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10875 "Misplaced _ in number");
10877 if (*s == '.' && isDIGIT(s[1])) {
10878 /* oops, it's really a v-string, but without the "v" */
10884 /* read exponent part, if present */
10885 if ((isALPHA_FOLD_EQ(*s, 'e')
10886 || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
10887 && strchr("+-0123456789_", s[1]))
10891 /* regardless of whether user said 3E5 or 3e5, use lower 'e',
10892 ditto for p (hexfloats) */
10893 if ((isALPHA_FOLD_EQ(*s, 'e'))) {
10894 /* At least some Mach atof()s don't grok 'E' */
10897 else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
10904 /* stray preinitial _ */
10906 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10907 "Misplaced _ in number");
10911 /* allow positive or negative exponent */
10912 if (*s == '+' || *s == '-')
10915 /* stray initial _ */
10917 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10918 "Misplaced _ in number");
10922 /* read digits of exponent */
10923 while (isDIGIT(*s) || *s == '_') {
10926 Perl_croak(aTHX_ "%s", number_too_long);
10930 if (((lastub && s == lastub + 1)
10931 || (!isDIGIT(s[1]) && s[1] != '_')))
10932 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10933 "Misplaced _ in number");
10941 We try to do an integer conversion first if no characters
10942 indicating "float" have been found.
10947 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10949 if (flags == IS_NUMBER_IN_UV) {
10951 sv = newSViv(uv); /* Prefer IVs over UVs. */
10954 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10955 if (uv <= (UV) IV_MIN)
10956 sv = newSViv(-(IV)uv);
10963 STORE_LC_NUMERIC_UNDERLYING_SET_STANDARD();
10964 /* terminate the string */
10966 if (UNLIKELY(hexfp)) {
10967 # ifdef NV_MANT_DIG
10968 if (significant_bits > NV_MANT_DIG)
10969 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10970 "Hexadecimal float: mantissa overflow");
10973 nv = hexfp_uquad * hexfp_mult;
10974 #else /* HEXFP_NV */
10975 nv = hexfp_nv * hexfp_mult;
10978 nv = Atof(PL_tokenbuf);
10980 RESTORE_LC_NUMERIC_UNDERLYING();
10985 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10986 const char *const key = floatit ? "float" : "integer";
10987 const STRLEN keylen = floatit ? 5 : 7;
10988 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10989 key, keylen, sv, NULL, NULL, 0);
10993 /* if it starts with a v, it could be a v-string */
10996 sv = newSV(5); /* preallocate storage space */
10997 ENTER_with_name("scan_vstring");
10999 s = scan_vstring(s, PL_bufend, sv);
11000 SvREFCNT_inc_simple_void_NN(sv);
11001 LEAVE_with_name("scan_vstring");
11005 /* make the op for the constant and return */
11008 lvalp->opval = newSVOP(OP_CONST, 0, sv);
11010 lvalp->opval = NULL;
11016 S_scan_formline(pTHX_ char *s)
11020 SV * const stuff = newSVpvs("");
11021 bool needargs = FALSE;
11022 bool eofmt = FALSE;
11024 PERL_ARGS_ASSERT_SCAN_FORMLINE;
11026 while (!needargs) {
11029 #ifdef PERL_STRICT_CR
11030 while (SPACE_OR_TAB(*t))
11033 while (SPACE_OR_TAB(*t) || *t == '\r')
11036 if (*t == '\n' || t == PL_bufend) {
11041 eol = (char *) memchr(s,'\n',PL_bufend-s);
11045 for (t = s; t < eol; t++) {
11046 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
11048 goto enough; /* ~~ must be first line in formline */
11050 if (*t == '@' || *t == '^')
11054 sv_catpvn(stuff, s, eol-s);
11055 #ifndef PERL_STRICT_CR
11056 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
11057 char *end = SvPVX(stuff) + SvCUR(stuff);
11060 SvCUR_set(stuff, SvCUR(stuff) - 1);
11068 if ((PL_rsfp || PL_parser->filtered)
11069 && PL_parser->form_lex_state == LEX_NORMAL) {
11071 PL_bufptr = PL_bufend;
11072 COPLINE_INC_WITH_HERELINES;
11073 got_some = lex_next_chunk(0);
11074 CopLINE_dec(PL_curcop);
11082 if (!SvCUR(stuff) || needargs)
11083 PL_lex_state = PL_parser->form_lex_state;
11084 if (SvCUR(stuff)) {
11085 PL_expect = XSTATE;
11087 const char *s2 = s;
11088 while (*s2 == '\r' || *s2 == ' ' || *s2 == '\t' || *s2 == '\f'
11092 PL_expect = XTERMBLOCK;
11093 NEXTVAL_NEXTTOKE.ival = 0;
11096 NEXTVAL_NEXTTOKE.ival = 0;
11097 force_next(FORMLBRACK);
11100 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
11103 NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff);
11107 SvREFCNT_dec(stuff);
11109 PL_lex_formbrack = 0;
11115 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
11117 const I32 oldsavestack_ix = PL_savestack_ix;
11118 CV* const outsidecv = PL_compcv;
11120 SAVEI32(PL_subline);
11121 save_item(PL_subname);
11122 SAVESPTR(PL_compcv);
11124 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
11125 CvFLAGS(PL_compcv) |= flags;
11127 PL_subline = CopLINE(PL_curcop);
11128 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
11129 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
11130 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
11131 if (outsidecv && CvPADLIST(outsidecv))
11132 CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
11134 return oldsavestack_ix;
11138 S_yywarn(pTHX_ const char *const s, U32 flags)
11140 PERL_ARGS_ASSERT_YYWARN;
11142 PL_in_eval |= EVAL_WARNONLY;
11143 yyerror_pv(s, flags);
11148 Perl_yyerror(pTHX_ const char *const s)
11150 PERL_ARGS_ASSERT_YYERROR;
11151 return yyerror_pvn(s, strlen(s), 0);
11155 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11157 PERL_ARGS_ASSERT_YYERROR_PV;
11158 return yyerror_pvn(s, strlen(s), flags);
11162 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11164 const char *context = NULL;
11167 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
11168 int yychar = PL_parser->yychar;
11170 PERL_ARGS_ASSERT_YYERROR_PVN;
11172 if (!yychar || (yychar == ';' && !PL_rsfp))
11173 sv_catpvs(where_sv, "at EOF");
11174 else if ( PL_oldoldbufptr
11175 && PL_bufptr > PL_oldoldbufptr
11176 && PL_bufptr - PL_oldoldbufptr < 200
11177 && PL_oldoldbufptr != PL_oldbufptr
11178 && PL_oldbufptr != PL_bufptr)
11182 The code below is removed for NetWare because it abends/crashes on NetWare
11183 when the script has error such as not having the closing quotes like:
11184 if ($var eq "value)
11185 Checking of white spaces is anyway done in NetWare code.
11188 while (isSPACE(*PL_oldoldbufptr))
11191 context = PL_oldoldbufptr;
11192 contlen = PL_bufptr - PL_oldoldbufptr;
11194 else if ( PL_oldbufptr
11195 && PL_bufptr > PL_oldbufptr
11196 && PL_bufptr - PL_oldbufptr < 200
11197 && PL_oldbufptr != PL_bufptr) {
11200 The code below is removed for NetWare because it abends/crashes on NetWare
11201 when the script has error such as not having the closing quotes like:
11202 if ($var eq "value)
11203 Checking of white spaces is anyway done in NetWare code.
11206 while (isSPACE(*PL_oldbufptr))
11209 context = PL_oldbufptr;
11210 contlen = PL_bufptr - PL_oldbufptr;
11212 else if (yychar > 255)
11213 sv_catpvs(where_sv, "next token ???");
11214 else if (yychar == YYEMPTY) {
11215 if (PL_lex_state == LEX_NORMAL)
11216 sv_catpvs(where_sv, "at end of line");
11217 else if (PL_lex_inpat)
11218 sv_catpvs(where_sv, "within pattern");
11220 sv_catpvs(where_sv, "within string");
11223 sv_catpvs(where_sv, "next char ");
11225 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11226 else if (isPRINT_LC(yychar)) {
11227 const char string = yychar;
11228 sv_catpvn(where_sv, &string, 1);
11231 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11233 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
11234 Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ",
11235 OutCopFILE(PL_curcop),
11236 (IV)(PL_parser->preambling == NOLINE
11237 ? CopLINE(PL_curcop)
11238 : PL_parser->preambling));
11240 Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
11241 UTF8fARG(UTF, contlen, context));
11243 Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
11244 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11245 Perl_sv_catpvf(aTHX_ msg,
11246 " (Might be a runaway multi-line %c%c string starting on line %" IVdf ")\n",
11247 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11250 if (PL_in_eval & EVAL_WARNONLY) {
11251 PL_in_eval &= ~EVAL_WARNONLY;
11252 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
11256 if (PL_error_count >= 10) {
11258 if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
11259 Perl_croak(aTHX_ "%" SVf "%s has too many errors.\n",
11260 SVfARG(errsv), OutCopFILE(PL_curcop));
11262 Perl_croak(aTHX_ "%s has too many errors.\n",
11263 OutCopFILE(PL_curcop));
11266 PL_in_my_stash = NULL;
11271 S_swallow_bom(pTHX_ U8 *s)
11273 const STRLEN slen = SvCUR(PL_linestr);
11275 PERL_ARGS_ASSERT_SWALLOW_BOM;
11279 if (s[1] == 0xFE) {
11280 /* UTF-16 little-endian? (or UTF-32LE?) */
11281 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
11282 /* diag_listed_as: Unsupported script encoding %s */
11283 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11284 #ifndef PERL_NO_UTF16_FILTER
11285 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11287 if (PL_bufend > (char*)s) {
11288 s = add_utf16_textfilter(s, TRUE);
11291 /* diag_listed_as: Unsupported script encoding %s */
11292 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11297 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
11298 #ifndef PERL_NO_UTF16_FILTER
11299 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11301 if (PL_bufend > (char *)s) {
11302 s = add_utf16_textfilter(s, FALSE);
11305 /* diag_listed_as: Unsupported script encoding %s */
11306 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11310 case BOM_UTF8_FIRST_BYTE: {
11311 const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
11312 if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
11313 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11314 s += len + 1; /* UTF-8 */
11321 if (s[2] == 0xFE && s[3] == 0xFF) {
11322 /* UTF-32 big-endian */
11323 /* diag_listed_as: Unsupported script encoding %s */
11324 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11327 else if (s[2] == 0 && s[3] != 0) {
11330 * are a good indicator of UTF-16BE. */
11331 #ifndef PERL_NO_UTF16_FILTER
11332 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11333 s = add_utf16_textfilter(s, FALSE);
11335 /* diag_listed_as: Unsupported script encoding %s */
11336 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11343 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11346 * are a good indicator of UTF-16LE. */
11347 #ifndef PERL_NO_UTF16_FILTER
11348 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11349 s = add_utf16_textfilter(s, TRUE);
11351 /* diag_listed_as: Unsupported script encoding %s */
11352 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11360 #ifndef PERL_NO_UTF16_FILTER
11362 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11364 SV *const filter = FILTER_DATA(idx);
11365 /* We re-use this each time round, throwing the contents away before we
11367 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11368 SV *const utf8_buffer = filter;
11369 IV status = IoPAGE(filter);
11370 const bool reverse = cBOOL(IoLINES(filter));
11373 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11375 /* As we're automatically added, at the lowest level, and hence only called
11376 from this file, we can be sure that we're not called in block mode. Hence
11377 don't bother writing code to deal with block mode. */
11379 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11382 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
11384 DEBUG_P(PerlIO_printf(Perl_debug_log,
11385 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
11386 FPTR2DPTR(void *, S_utf16_textfilter),
11387 reverse ? 'l' : 'b', idx, maxlen, status,
11388 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11395 /* First, look in our buffer of existing UTF-8 data: */
11396 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11400 } else if (status == 0) {
11402 IoPAGE(filter) = 0;
11403 nl = SvEND(utf8_buffer);
11406 STRLEN got = nl - SvPVX(utf8_buffer);
11407 /* Did we have anything to append? */
11409 sv_catpvn(sv, SvPVX(utf8_buffer), got);
11410 /* Everything else in this code works just fine if SVp_POK isn't
11411 set. This, however, needs it, and we need it to work, else
11412 we loop infinitely because the buffer is never consumed. */
11413 sv_chop(utf8_buffer, nl);
11417 /* OK, not a complete line there, so need to read some more UTF-16.
11418 Read an extra octect if the buffer currently has an odd number. */
11422 if (SvCUR(utf16_buffer) >= 2) {
11423 /* Location of the high octet of the last complete code point.
11424 Gosh, UTF-16 is a pain. All the benefits of variable length,
11425 *coupled* with all the benefits of partial reads and
11427 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11428 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11430 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11434 /* We have the first half of a surrogate. Read more. */
11435 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11438 status = FILTER_READ(idx + 1, utf16_buffer,
11439 160 + (SvCUR(utf16_buffer) & 1));
11440 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
11441 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11444 IoPAGE(filter) = status;
11449 chars = SvCUR(utf16_buffer) >> 1;
11450 have = SvCUR(utf8_buffer);
11451 SvGROW(utf8_buffer, have + chars * 3 + 1);
11454 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11455 (U8*)SvPVX_const(utf8_buffer) + have,
11456 chars * 2, &newlen);
11458 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11459 (U8*)SvPVX_const(utf8_buffer) + have,
11460 chars * 2, &newlen);
11462 SvCUR_set(utf8_buffer, have + newlen);
11465 /* No need to keep this SV "well-formed" with a '\0' after the end, as
11466 it's private to us, and utf16_to_utf8{,reversed} take a
11467 (pointer,length) pair, rather than a NUL-terminated string. */
11468 if(SvCUR(utf16_buffer) & 1) {
11469 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11470 SvCUR_set(utf16_buffer, 1);
11472 SvCUR_set(utf16_buffer, 0);
11475 DEBUG_P(PerlIO_printf(Perl_debug_log,
11476 "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
11478 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11479 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11484 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11486 SV *filter = filter_add(S_utf16_textfilter, NULL);
11488 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11490 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11492 IoLINES(filter) = reversed;
11493 IoPAGE(filter) = 1; /* Not EOF */
11495 /* Sadly, we have to return a valid pointer, come what may, so we have to
11496 ignore any error return from this. */
11497 SvCUR_set(PL_linestr, 0);
11498 if (FILTER_READ(0, PL_linestr, 0)) {
11499 SvUTF8_on(PL_linestr);
11501 SvUTF8_on(PL_linestr);
11503 PL_bufend = SvEND(PL_linestr);
11504 return (U8*)SvPVX(PL_linestr);
11509 Returns a pointer to the next character after the parsed
11510 vstring, as well as updating the passed in sv.
11512 Function must be called like
11514 sv = sv_2mortal(newSV(5));
11515 s = scan_vstring(s,e,sv);
11517 where s and e are the start and end of the string.
11518 The sv should already be large enough to store the vstring
11519 passed in, for performance reasons.
11521 This function may croak if fatal warnings are enabled in the
11522 calling scope, hence the sv_2mortal in the example (to prevent
11523 a leak). Make sure to do SvREFCNT_inc afterwards if you use
11529 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11531 const char *pos = s;
11532 const char *start = s;
11534 PERL_ARGS_ASSERT_SCAN_VSTRING;
11536 if (*pos == 'v') pos++; /* get past 'v' */
11537 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11539 if ( *pos != '.') {
11540 /* this may not be a v-string if followed by => */
11541 const char *next = pos;
11542 while (next < e && isSPACE(*next))
11544 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11545 /* return string not v-string */
11546 sv_setpvn(sv,(char *)s,pos-s);
11547 return (char *)pos;
11551 if (!isALPHA(*pos)) {
11552 U8 tmpbuf[UTF8_MAXBYTES+1];
11555 s++; /* get past 'v' */
11560 /* this is atoi() that tolerates underscores */
11563 const char *end = pos;
11565 while (--end >= s) {
11567 const UV orev = rev;
11568 rev += (*end - '0') * mult;
11571 /* diag_listed_as: Integer overflow in %s number */
11572 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11573 "Integer overflow in decimal number");
11577 /* Append native character for the rev point */
11578 tmpend = uvchr_to_utf8(tmpbuf, rev);
11579 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11580 if (!UVCHR_IS_INVARIANT(rev))
11582 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11588 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11592 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11599 Perl_keyword_plugin_standard(pTHX_
11600 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11602 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11603 PERL_UNUSED_CONTEXT;
11604 PERL_UNUSED_ARG(keyword_ptr);
11605 PERL_UNUSED_ARG(keyword_len);
11606 PERL_UNUSED_ARG(op_ptr);
11607 return KEYWORD_PLUGIN_DECLINE;
11610 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11612 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11614 SAVEI32(PL_lex_brackets);
11615 if (PL_lex_brackets > 100)
11616 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11617 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11618 SAVEI32(PL_lex_allbrackets);
11619 PL_lex_allbrackets = 0;
11620 SAVEI8(PL_lex_fakeeof);
11621 PL_lex_fakeeof = (U8)fakeeof;
11622 if(yyparse(gramtype) && !PL_parser->error_count)
11623 qerror(Perl_mess(aTHX_ "Parse error"));
11626 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11628 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11632 SAVEVPTR(PL_eval_root);
11633 PL_eval_root = NULL;
11634 parse_recdescent(gramtype, fakeeof);
11640 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11642 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11645 if (flags & ~PARSE_OPTIONAL)
11646 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11647 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11648 if (!exprop && !(flags & PARSE_OPTIONAL)) {
11649 if (!PL_parser->error_count)
11650 qerror(Perl_mess(aTHX_ "Parse error"));
11651 exprop = newOP(OP_NULL, 0);
11657 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11659 Parse a Perl arithmetic expression. This may contain operators of precedence
11660 down to the bit shift operators. The expression must be followed (and thus
11661 terminated) either by a comparison or lower-precedence operator or by
11662 something that would normally terminate an expression such as semicolon.
11663 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
11664 otherwise it is mandatory. It is up to the caller to ensure that the
11665 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11666 the source of the code to be parsed and the lexical context for the
11669 The op tree representing the expression is returned. If an optional
11670 expression is absent, a null pointer is returned, otherwise the pointer
11673 If an error occurs in parsing or compilation, in most cases a valid op
11674 tree is returned anyway. The error is reflected in the parser state,
11675 normally resulting in a single exception at the top level of parsing
11676 which covers all the compilation errors that occurred. Some compilation
11677 errors, however, will throw an exception immediately.
11683 Perl_parse_arithexpr(pTHX_ U32 flags)
11685 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11689 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11691 Parse a Perl term expression. This may contain operators of precedence
11692 down to the assignment operators. The expression must be followed (and thus
11693 terminated) either by a comma or lower-precedence operator or by
11694 something that would normally terminate an expression such as semicolon.
11695 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
11696 otherwise it is mandatory. It is up to the caller to ensure that the
11697 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11698 the source of the code to be parsed and the lexical context for the
11701 The op tree representing the expression is returned. If an optional
11702 expression is absent, a null pointer is returned, otherwise the pointer
11705 If an error occurs in parsing or compilation, in most cases a valid op
11706 tree is returned anyway. The error is reflected in the parser state,
11707 normally resulting in a single exception at the top level of parsing
11708 which covers all the compilation errors that occurred. Some compilation
11709 errors, however, will throw an exception immediately.
11715 Perl_parse_termexpr(pTHX_ U32 flags)
11717 return parse_expr(LEX_FAKEEOF_COMMA, flags);
11721 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11723 Parse a Perl list expression. This may contain operators of precedence
11724 down to the comma operator. The expression must be followed (and thus
11725 terminated) either by a low-precedence logic operator such as C<or> or by
11726 something that would normally terminate an expression such as semicolon.
11727 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
11728 otherwise it is mandatory. It is up to the caller to ensure that the
11729 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11730 the source of the code to be parsed and the lexical context for the
11733 The op tree representing the expression is returned. If an optional
11734 expression is absent, a null pointer is returned, otherwise the pointer
11737 If an error occurs in parsing or compilation, in most cases a valid op
11738 tree is returned anyway. The error is reflected in the parser state,
11739 normally resulting in a single exception at the top level of parsing
11740 which covers all the compilation errors that occurred. Some compilation
11741 errors, however, will throw an exception immediately.
11747 Perl_parse_listexpr(pTHX_ U32 flags)
11749 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11753 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
11755 Parse a single complete Perl expression. This allows the full
11756 expression grammar, including the lowest-precedence operators such
11757 as C<or>. The expression must be followed (and thus terminated) by a
11758 token that an expression would normally be terminated by: end-of-file,
11759 closing bracketing punctuation, semicolon, or one of the keywords that
11760 signals a postfix expression-statement modifier. If C<flags> has the
11761 C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
11762 mandatory. It is up to the caller to ensure that the dynamic parser
11763 state (L</PL_parser> et al) is correctly set to reflect the source of
11764 the code to be parsed and the lexical context for the expression.
11766 The op tree representing the expression is returned. If an optional
11767 expression is absent, a null pointer is returned, otherwise the pointer
11770 If an error occurs in parsing or compilation, in most cases a valid op
11771 tree is returned anyway. The error is reflected in the parser state,
11772 normally resulting in a single exception at the top level of parsing
11773 which covers all the compilation errors that occurred. Some compilation
11774 errors, however, will throw an exception immediately.
11780 Perl_parse_fullexpr(pTHX_ U32 flags)
11782 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11786 =for apidoc Amx|OP *|parse_block|U32 flags
11788 Parse a single complete Perl code block. This consists of an opening
11789 brace, a sequence of statements, and a closing brace. The block
11790 constitutes a lexical scope, so C<my> variables and various compile-time
11791 effects can be contained within it. It is up to the caller to ensure
11792 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11793 reflect the source of the code to be parsed and the lexical context for
11796 The op tree representing the code block is returned. This is always a
11797 real op, never a null pointer. It will normally be a C<lineseq> list,
11798 including C<nextstate> or equivalent ops. No ops to construct any kind
11799 of runtime scope are included by virtue of it being a block.
11801 If an error occurs in parsing or compilation, in most cases a valid op
11802 tree (most likely null) is returned anyway. The error is reflected in
11803 the parser state, normally resulting in a single exception at the top
11804 level of parsing which covers all the compilation errors that occurred.
11805 Some compilation errors, however, will throw an exception immediately.
11807 The C<flags> parameter is reserved for future use, and must always
11814 Perl_parse_block(pTHX_ U32 flags)
11817 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11818 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11822 =for apidoc Amx|OP *|parse_barestmt|U32 flags
11824 Parse a single unadorned Perl statement. This may be a normal imperative
11825 statement or a declaration that has compile-time effect. It does not
11826 include any label or other affixture. It is up to the caller to ensure
11827 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11828 reflect the source of the code to be parsed and the lexical context for
11831 The op tree representing the statement is returned. This may be a
11832 null pointer if the statement is null, for example if it was actually
11833 a subroutine definition (which has compile-time side effects). If not
11834 null, it will be ops directly implementing the statement, suitable to
11835 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
11836 equivalent op (except for those embedded in a scope contained entirely
11837 within the statement).
11839 If an error occurs in parsing or compilation, in most cases a valid op
11840 tree (most likely null) is returned anyway. The error is reflected in
11841 the parser state, normally resulting in a single exception at the top
11842 level of parsing which covers all the compilation errors that occurred.
11843 Some compilation errors, however, will throw an exception immediately.
11845 The C<flags> parameter is reserved for future use, and must always
11852 Perl_parse_barestmt(pTHX_ U32 flags)
11855 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11856 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11860 =for apidoc Amx|SV *|parse_label|U32 flags
11862 Parse a single label, possibly optional, of the type that may prefix a
11863 Perl statement. It is up to the caller to ensure that the dynamic parser
11864 state (L</PL_parser> et al) is correctly set to reflect the source of
11865 the code to be parsed. If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
11866 label is optional, otherwise it is mandatory.
11868 The name of the label is returned in the form of a fresh scalar. If an
11869 optional label is absent, a null pointer is returned.
11871 If an error occurs in parsing, which can only occur if the label is
11872 mandatory, a valid label is returned anyway. The error is reflected in
11873 the parser state, normally resulting in a single exception at the top
11874 level of parsing which covers all the compilation errors that occurred.
11880 Perl_parse_label(pTHX_ U32 flags)
11882 if (flags & ~PARSE_OPTIONAL)
11883 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11885 PL_parser->yychar = yylex();
11886 if (PL_parser->yychar == LABEL) {
11887 char * const lpv = pl_yylval.pval;
11888 STRLEN llen = strlen(lpv);
11889 PL_parser->yychar = YYEMPTY;
11890 return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
11897 STRLEN wlen, bufptr_pos;
11900 if (!isIDFIRST_lazy_if(s, UTF))
11902 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
11903 if (word_takes_any_delimiter(s, wlen))
11905 bufptr_pos = s - SvPVX(PL_linestr);
11907 lex_read_space(LEX_KEEP_PREVIOUS);
11909 s = SvPVX(PL_linestr) + bufptr_pos;
11910 if (t[0] == ':' && t[1] != ':') {
11911 PL_oldoldbufptr = PL_oldbufptr;
11914 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
11918 if (flags & PARSE_OPTIONAL) {
11921 qerror(Perl_mess(aTHX_ "Parse error"));
11922 return newSVpvs("x");
11929 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
11931 Parse a single complete Perl statement. This may be a normal imperative
11932 statement or a declaration that has compile-time effect, and may include
11933 optional labels. It is up to the caller to ensure that the dynamic
11934 parser state (L</PL_parser> et al) is correctly set to reflect the source
11935 of the code to be parsed and the lexical context for the statement.
11937 The op tree representing the statement is returned. This may be a
11938 null pointer if the statement is null, for example if it was actually
11939 a subroutine definition (which has compile-time side effects). If not
11940 null, it will be the result of a L</newSTATEOP> call, normally including
11941 a C<nextstate> or equivalent op.
11943 If an error occurs in parsing or compilation, in most cases a valid op
11944 tree (most likely null) is returned anyway. The error is reflected in
11945 the parser state, normally resulting in a single exception at the top
11946 level of parsing which covers all the compilation errors that occurred.
11947 Some compilation errors, however, will throw an exception immediately.
11949 The C<flags> parameter is reserved for future use, and must always
11956 Perl_parse_fullstmt(pTHX_ U32 flags)
11959 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11960 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11964 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
11966 Parse a sequence of zero or more Perl statements. These may be normal
11967 imperative statements, including optional labels, or declarations
11968 that have compile-time effect, or any mixture thereof. The statement
11969 sequence ends when a closing brace or end-of-file is encountered in a
11970 place where a new statement could have validly started. It is up to
11971 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11972 is correctly set to reflect the source of the code to be parsed and the
11973 lexical context for the statements.
11975 The op tree representing the statement sequence is returned. This may
11976 be a null pointer if the statements were all null, for example if there
11977 were no statements or if there were only subroutine definitions (which
11978 have compile-time side effects). If not null, it will be a C<lineseq>
11979 list, normally including C<nextstate> or equivalent ops.
11981 If an error occurs in parsing or compilation, in most cases a valid op
11982 tree is returned anyway. The error is reflected in the parser state,
11983 normally resulting in a single exception at the top level of parsing
11984 which covers all the compilation errors that occurred. Some compilation
11985 errors, however, will throw an exception immediately.
11987 The C<flags> parameter is reserved for future use, and must always
11994 Perl_parse_stmtseq(pTHX_ U32 flags)
11999 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
12000 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
12001 c = lex_peek_unichar(0);
12002 if (c != -1 && c != /*{*/'}')
12003 qerror(Perl_mess(aTHX_ "Parse error"));
12008 * ex: set ts=8 sts=4 sw=4 et: