3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 =head1 Lexer interface
26 This is the lower layer of the Perl parser, managing characters and tokens.
28 =for apidoc AmU|yy_parser *|PL_parser
30 Pointer to a structure encapsulating the state of the parsing operation
31 currently in progress. The pointer can be locally changed to perform
32 a nested parse without interfering with the state of an outer parse.
33 Individual members of C<PL_parser> have their own documentation.
39 #define PERL_IN_TOKE_C
41 #include "dquote_inline.h"
43 #define new_constant(a,b,c,d,e,f,g) \
44 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
46 #define pl_yylval (PL_parser->yylval)
48 /* XXX temporary backwards compatibility */
49 #define PL_lex_brackets (PL_parser->lex_brackets)
50 #define PL_lex_allbrackets (PL_parser->lex_allbrackets)
51 #define PL_lex_fakeeof (PL_parser->lex_fakeeof)
52 #define PL_lex_brackstack (PL_parser->lex_brackstack)
53 #define PL_lex_casemods (PL_parser->lex_casemods)
54 #define PL_lex_casestack (PL_parser->lex_casestack)
55 #define PL_lex_dojoin (PL_parser->lex_dojoin)
56 #define PL_lex_formbrack (PL_parser->lex_formbrack)
57 #define PL_lex_inpat (PL_parser->lex_inpat)
58 #define PL_lex_inwhat (PL_parser->lex_inwhat)
59 #define PL_lex_op (PL_parser->lex_op)
60 #define PL_lex_repl (PL_parser->lex_repl)
61 #define PL_lex_starts (PL_parser->lex_starts)
62 #define PL_lex_stuff (PL_parser->lex_stuff)
63 #define PL_multi_start (PL_parser->multi_start)
64 #define PL_multi_open (PL_parser->multi_open)
65 #define PL_multi_close (PL_parser->multi_close)
66 #define PL_preambled (PL_parser->preambled)
67 #define PL_linestr (PL_parser->linestr)
68 #define PL_expect (PL_parser->expect)
69 #define PL_copline (PL_parser->copline)
70 #define PL_bufptr (PL_parser->bufptr)
71 #define PL_oldbufptr (PL_parser->oldbufptr)
72 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
73 #define PL_linestart (PL_parser->linestart)
74 #define PL_bufend (PL_parser->bufend)
75 #define PL_last_uni (PL_parser->last_uni)
76 #define PL_last_lop (PL_parser->last_lop)
77 #define PL_last_lop_op (PL_parser->last_lop_op)
78 #define PL_lex_state (PL_parser->lex_state)
79 #define PL_rsfp (PL_parser->rsfp)
80 #define PL_rsfp_filters (PL_parser->rsfp_filters)
81 #define PL_in_my (PL_parser->in_my)
82 #define PL_in_my_stash (PL_parser->in_my_stash)
83 #define PL_tokenbuf (PL_parser->tokenbuf)
84 #define PL_multi_end (PL_parser->multi_end)
85 #define PL_error_count (PL_parser->error_count)
87 # define PL_nexttoke (PL_parser->nexttoke)
88 # define PL_nexttype (PL_parser->nexttype)
89 # define PL_nextval (PL_parser->nextval)
92 #define SvEVALED(sv) \
93 (SvTYPE(sv) >= SVt_PVNV \
94 && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen)
96 static const char* const ident_too_long = "Identifier too long";
98 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
100 #define XENUMMASK 0x3f
101 #define XFAKEEOF 0x40
102 #define XFAKEBRACK 0x80
104 #ifdef USE_UTF8_SCRIPTS
105 # define UTF cBOOL(!IN_BYTES)
107 # define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
110 /* The maximum number of characters preceding the unrecognized one to display */
111 #define UNRECOGNIZED_PRECEDE_COUNT 10
113 /* In variables named $^X, these are the legal values for X.
114 * 1999-02-27 mjd-perl-patch@plover.com */
115 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
117 #define SPACE_OR_TAB(c) isBLANK_A(c)
119 #define HEXFP_PEEK(s) \
121 (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
122 isALPHA_FOLD_EQ(s[0], 'p'))
124 /* LEX_* are values for PL_lex_state, the state of the lexer.
125 * They are arranged oddly so that the guard on the switch statement
126 * can get by with a single comparison (if the compiler is smart enough).
128 * These values refer to the various states within a sublex parse,
129 * i.e. within a double quotish string
132 /* #define LEX_NOTPARSING 11 is done in perl.h. */
134 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
135 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
136 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
137 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
138 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
140 /* at end of code, eg "$x" followed by: */
141 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
142 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
144 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
145 string or after \E, $foo, etc */
146 #define LEX_INTERPCONST 2 /* NOT USED */
147 #define LEX_FORMLINE 1 /* expecting a format line */
151 static const char* const lex_state_names[] = {
166 #include "keywords.h"
168 /* CLINE is a macro that ensures PL_copline has a sane value */
170 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
173 * Convenience functions to return different tokens and prime the
174 * lexer for the next token. They all take an argument.
176 * TOKEN : generic token (used for '(', DOLSHARP, etc)
177 * OPERATOR : generic operator
178 * AOPERATOR : assignment operator
179 * PREBLOCK : beginning the block after an if, while, foreach, ...
180 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
181 * PREREF : *EXPR where EXPR is not a simple identifier
182 * TERM : expression term
183 * POSTDEREF : postfix dereference (->$* ->@[...] etc.)
184 * LOOPX : loop exiting command (goto, last, dump, etc)
185 * FTST : file test operator
186 * FUN0 : zero-argument function
187 * FUN0OP : zero-argument function, with its op created in this file
188 * FUN1 : not used, except for not, which isn't a UNIOP
189 * BOop : bitwise or or xor
191 * BCop : bitwise complement
192 * SHop : shift operator
193 * PWop : power operator
194 * PMop : pattern-matching operator
195 * Aop : addition-level operator
196 * AopNOASSIGN : addition-level operator that is never part of .=
197 * Mop : multiplication-level operator
198 * Eop : equality-testing operator
199 * Rop : relational operator <= != gt
201 * Also see LOP and lop() below.
204 #ifdef DEBUGGING /* Serve -DT. */
205 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
207 # define REPORT(retval) (retval)
210 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
211 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
212 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
213 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
214 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
215 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
216 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
217 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
218 #define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \
220 PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
222 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
223 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
224 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
225 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
226 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
227 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
228 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
230 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
231 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
232 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
233 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
234 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
235 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
236 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
237 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
239 /* This bit of chicanery makes a unary function followed by
240 * a parenthesis into a function with one argument, highest precedence.
241 * The UNIDOR macro is for unary functions that can be followed by the //
242 * operator (such as C<shift // 0>).
244 #define UNI3(f,x,have_x) { \
245 pl_yylval.ival = f; \
246 if (have_x) PL_expect = x; \
248 PL_last_uni = PL_oldbufptr; \
249 PL_last_lop_op = (f) < 0 ? -(f) : (f); \
251 return REPORT( (int)FUNC1 ); \
253 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
255 #define UNI(f) UNI3(f,XTERM,1)
256 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
257 #define UNIPROTO(f,optional) { \
258 if (optional) PL_last_uni = PL_oldbufptr; \
262 #define UNIBRACK(f) UNI3(f,0,0)
264 /* grandfather return to old style */
267 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
268 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
269 pl_yylval.ival = (f); \
275 #define COPLINE_INC_WITH_HERELINES \
277 CopLINE_inc(PL_curcop); \
278 if (PL_parser->herelines) \
279 CopLINE(PL_curcop) += PL_parser->herelines, \
280 PL_parser->herelines = 0; \
282 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
283 * is no sublex_push to follow. */
284 #define COPLINE_SET_FROM_MULTI_END \
286 CopLINE_set(PL_curcop, PL_multi_end); \
287 if (PL_multi_end != PL_multi_start) \
288 PL_parser->herelines = 0; \
294 /* how to interpret the pl_yylval associated with the token */
298 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
303 static struct debug_tokens {
305 enum token_type type;
307 } const debug_tokens[] =
309 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
310 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
311 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
312 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
313 { ARROW, TOKENTYPE_NONE, "ARROW" },
314 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
315 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
316 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
317 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
318 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
319 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
320 { DO, TOKENTYPE_NONE, "DO" },
321 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
322 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
323 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
324 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
325 { ELSE, TOKENTYPE_NONE, "ELSE" },
326 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
327 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
328 { FOR, TOKENTYPE_IVAL, "FOR" },
329 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
330 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
331 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
332 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
333 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
334 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
335 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
336 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
337 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
338 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
339 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
340 { IF, TOKENTYPE_IVAL, "IF" },
341 { LABEL, TOKENTYPE_PVAL, "LABEL" },
342 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
343 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
344 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
345 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
346 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
347 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
348 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
349 { MY, TOKENTYPE_IVAL, "MY" },
350 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
351 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
352 { OROP, TOKENTYPE_IVAL, "OROP" },
353 { OROR, TOKENTYPE_NONE, "OROR" },
354 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
355 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
356 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
357 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
358 { POSTJOIN, TOKENTYPE_NONE, "POSTJOIN" },
359 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
360 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
361 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
362 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
363 { PREINC, TOKENTYPE_NONE, "PREINC" },
364 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
365 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
366 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
367 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
368 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
369 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
370 { SUB, TOKENTYPE_NONE, "SUB" },
371 { THING, TOKENTYPE_OPVAL, "THING" },
372 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
373 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
374 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
375 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
376 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
377 { USE, TOKENTYPE_IVAL, "USE" },
378 { WHEN, TOKENTYPE_IVAL, "WHEN" },
379 { WHILE, TOKENTYPE_IVAL, "WHILE" },
380 { BAREWORD, TOKENTYPE_OPVAL, "BAREWORD" },
381 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
382 { 0, TOKENTYPE_NONE, NULL }
385 /* dump the returned token in rv, plus any optional arg in pl_yylval */
388 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
390 PERL_ARGS_ASSERT_TOKEREPORT;
393 const char *name = NULL;
394 enum token_type type = TOKENTYPE_NONE;
395 const struct debug_tokens *p;
396 SV* const report = newSVpvs("<== ");
398 for (p = debug_tokens; p->token; p++) {
399 if (p->token == (int)rv) {
406 Perl_sv_catpv(aTHX_ report, name);
407 else if (isGRAPH(rv))
409 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
411 sv_catpvs(report, " (pending identifier)");
414 sv_catpvs(report, "EOF");
416 Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
421 Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
423 case TOKENTYPE_OPNUM:
424 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
425 PL_op_name[lvalp->ival]);
428 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
430 case TOKENTYPE_OPVAL:
432 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
433 PL_op_name[lvalp->opval->op_type]);
434 if (lvalp->opval->op_type == OP_CONST) {
435 Perl_sv_catpvf(aTHX_ report, " %s",
436 SvPEEK(cSVOPx_sv(lvalp->opval)));
441 sv_catpvs(report, "(opval=null)");
444 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
450 /* print the buffer with suitable escapes */
453 S_printbuf(pTHX_ const char *const fmt, const char *const s)
455 SV* const tmp = newSVpvs("");
457 PERL_ARGS_ASSERT_PRINTBUF;
459 GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */
460 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
468 S_deprecate_commaless_var_list(pTHX) {
470 deprecate("comma-less variable list");
471 return REPORT(','); /* grandfather non-comma-format format */
477 * This subroutine looks for an '=' next to the operator that has just been
478 * parsed and turns it into an ASSIGNOP if it finds one.
482 S_ao(pTHX_ int toketype)
484 if (*PL_bufptr == '=') {
486 if (toketype == ANDAND)
487 pl_yylval.ival = OP_ANDASSIGN;
488 else if (toketype == OROR)
489 pl_yylval.ival = OP_ORASSIGN;
490 else if (toketype == DORDOR)
491 pl_yylval.ival = OP_DORASSIGN;
494 return REPORT(toketype);
499 * When Perl expects an operator and finds something else, no_op
500 * prints the warning. It always prints "<something> found where
501 * operator expected. It prints "Missing semicolon on previous line?"
502 * if the surprise occurs at the start of the line. "do you need to
503 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
504 * where the compiler doesn't know if foo is a method call or a function.
505 * It prints "Missing operator before end of line" if there's nothing
506 * after the missing operator, or "... before <...>" if there is something
507 * after the missing operator.
509 * PL_bufptr is expected to point to the start of the thing that was found,
510 * and s after the next token or partial token.
514 S_no_op(pTHX_ const char *const what, char *s)
516 char * const oldbp = PL_bufptr;
517 const bool is_first = (PL_oldbufptr == PL_linestart);
519 PERL_ARGS_ASSERT_NO_OP;
525 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
526 if (ckWARN_d(WARN_SYNTAX)) {
528 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
529 "\t(Missing semicolon on previous line?)\n");
530 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
532 for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
533 t += UTF ? UTF8SKIP(t) : 1)
535 if (t < PL_bufptr && isSPACE(*t))
536 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
537 "\t(Do you need to predeclare %" UTF8f "?)\n",
538 UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
542 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
543 "\t(Missing operator before %" UTF8f "?)\n",
544 UTF8fARG(UTF, s - oldbp, oldbp));
552 * Complain about missing quote/regexp/heredoc terminator.
553 * If it's called with NULL then it cauterizes the line buffer.
554 * If we're in a delimited string and the delimiter is a control
555 * character, it's reformatted into a two-char sequence like ^C.
560 S_missingterm(pTHX_ char *s)
562 char tmpbuf[UTF8_MAXBYTES + 1];
567 char * const nl = strrchr(s,'\n');
572 else if (PL_multi_close < 32) {
574 tmpbuf[1] = (char)toCTRL(PL_multi_close);
579 if (LIKELY(PL_multi_close < 256)) {
580 *tmpbuf = (char)PL_multi_close;
585 *uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close) = 0;
589 q = strchr(s,'"') ? '\'' : '"';
590 sv = sv_2mortal(newSVpv(s,0));
593 Perl_croak(aTHX_ "Can't find string terminator %c%" SVf
594 "%c anywhere before EOF",q,SVfARG(sv),q);
600 * Check whether the named feature is enabled.
603 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
605 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
607 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
609 assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
611 if (namelen > MAX_FEATURE_LEN)
613 memcpy(&he_name[8], name, namelen);
615 return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
616 REFCOUNTED_HE_EXISTS));
620 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
621 * utf16-to-utf8-reversed.
624 #ifdef PERL_CR_FILTER
628 const char *s = SvPVX_const(sv);
629 const char * const e = s + SvCUR(sv);
631 PERL_ARGS_ASSERT_STRIP_RETURN;
633 /* outer loop optimized to do nothing if there are no CR-LFs */
635 if (*s++ == '\r' && *s == '\n') {
636 /* hit a CR-LF, need to copy the rest */
640 if (*s == '\r' && s[1] == '\n')
651 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
653 const I32 count = FILTER_READ(idx+1, sv, maxlen);
654 if (count > 0 && !maxlen)
661 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
663 Creates and initialises a new lexer/parser state object, supplying
664 a context in which to lex and parse from a new source of Perl code.
665 A pointer to the new state object is placed in L</PL_parser>. An entry
666 is made on the save stack so that upon unwinding the new state object
667 will be destroyed and the former value of L</PL_parser> will be restored.
668 Nothing else need be done to clean up the parsing context.
670 The code to be parsed comes from C<line> and C<rsfp>. C<line>, if
671 non-null, provides a string (in SV form) containing code to be parsed.
672 A copy of the string is made, so subsequent modification of C<line>
673 does not affect parsing. C<rsfp>, if non-null, provides an input stream
674 from which code will be read to be parsed. If both are non-null, the
675 code in C<line> comes first and must consist of complete lines of input,
676 and C<rsfp> supplies the remainder of the source.
678 The C<flags> parameter is reserved for future use. Currently it is only
679 used by perl internally, so extensions should always pass zero.
684 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
685 can share filters with the current parser.
686 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
687 caller, hence isn't owned by the parser, so shouldn't be closed on parser
688 destruction. This is used to handle the case of defaulting to reading the
689 script from the standard input because no filename was given on the command
690 line (without getting confused by situation where STDIN has been closed, so
691 the script handle is opened on fd 0) */
694 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
696 const char *s = NULL;
697 yy_parser *parser, *oparser;
698 if (flags && flags & ~LEX_START_FLAGS)
699 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
701 /* create and initialise a parser */
703 Newxz(parser, 1, yy_parser);
704 parser->old_parser = oparser = PL_parser;
707 parser->stack = NULL;
708 parser->stack_maxbase = NULL;
711 /* on scope exit, free this parser and restore any outer one */
713 parser->saved_curcop = PL_curcop;
715 /* initialise lexer state */
717 parser->nexttoke = 0;
718 parser->error_count = oparser ? oparser->error_count : 0;
719 parser->copline = parser->preambling = NOLINE;
720 parser->lex_state = LEX_NORMAL;
721 parser->expect = XSTATE;
723 parser->rsfp_filters =
724 !(flags & LEX_START_SAME_FILTER) || !oparser
726 : MUTABLE_AV(SvREFCNT_inc(
727 oparser->rsfp_filters
728 ? oparser->rsfp_filters
729 : (oparser->rsfp_filters = newAV())
732 Newx(parser->lex_brackstack, 120, char);
733 Newx(parser->lex_casestack, 12, char);
734 *parser->lex_casestack = '\0';
735 Newxz(parser->lex_shared, 1, LEXSHARED);
739 s = SvPV_const(line, len);
740 parser->linestr = flags & LEX_START_COPIED
741 ? SvREFCNT_inc_simple_NN(line)
742 : newSVpvn_flags(s, len, SvUTF8(line));
744 sv_catpvs(parser->linestr, "\n;");
746 parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
748 parser->oldoldbufptr =
751 parser->linestart = SvPVX(parser->linestr);
752 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
753 parser->last_lop = parser->last_uni = NULL;
755 STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
756 |LEX_DONT_CLOSE_RSFP));
757 parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
758 |LEX_DONT_CLOSE_RSFP));
760 parser->in_pod = parser->filtered = 0;
764 /* delete a parser object */
767 Perl_parser_free(pTHX_ const yy_parser *parser)
769 PERL_ARGS_ASSERT_PARSER_FREE;
771 PL_curcop = parser->saved_curcop;
772 SvREFCNT_dec(parser->linestr);
774 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
775 PerlIO_clearerr(parser->rsfp);
776 else if (parser->rsfp && (!parser->old_parser
777 || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
778 PerlIO_close(parser->rsfp);
779 SvREFCNT_dec(parser->rsfp_filters);
780 SvREFCNT_dec(parser->lex_stuff);
781 SvREFCNT_dec(parser->lex_sub_repl);
783 Safefree(parser->lex_brackstack);
784 Safefree(parser->lex_casestack);
785 Safefree(parser->lex_shared);
786 PL_parser = parser->old_parser;
791 Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab)
793 I32 nexttoke = parser->nexttoke;
794 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
796 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
797 && parser->nextval[nexttoke].opval
798 && parser->nextval[nexttoke].opval->op_slabbed
799 && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
800 op_free(parser->nextval[nexttoke].opval);
801 parser->nextval[nexttoke].opval = NULL;
808 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
810 Buffer scalar containing the chunk currently under consideration of the
811 text currently being lexed. This is always a plain string scalar (for
812 which C<SvPOK> is true). It is not intended to be used as a scalar by
813 normal scalar means; instead refer to the buffer directly by the pointer
814 variables described below.
816 The lexer maintains various C<char*> pointers to things in the
817 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
818 reallocated, all of these pointers must be updated. Don't attempt to
819 do this manually, but rather use L</lex_grow_linestr> if you need to
820 reallocate the buffer.
822 The content of the text chunk in the buffer is commonly exactly one
823 complete line of input, up to and including a newline terminator,
824 but there are situations where it is otherwise. The octets of the
825 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
826 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
827 flag on this scalar, which may disagree with it.
829 For direct examination of the buffer, the variable
830 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
831 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
832 of these pointers is usually preferable to examination of the scalar
833 through normal scalar means.
835 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
837 Direct pointer to the end of the chunk of text currently being lexed, the
838 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
839 + SvCUR(PL_parser-E<gt>linestr)>. A C<NUL> character (zero octet) is
840 always located at the end of the buffer, and does not count as part of
841 the buffer's contents.
843 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
845 Points to the current position of lexing inside the lexer buffer.
846 Characters around this point may be freely examined, within
847 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
848 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
849 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
851 Lexing code (whether in the Perl core or not) moves this pointer past
852 the characters that it consumes. It is also expected to perform some
853 bookkeeping whenever a newline character is consumed. This movement
854 can be more conveniently performed by the function L</lex_read_to>,
855 which handles newlines appropriately.
857 Interpretation of the buffer's octets can be abstracted out by
858 using the slightly higher-level functions L</lex_peek_unichar> and
859 L</lex_read_unichar>.
861 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
863 Points to the start of the current line inside the lexer buffer.
864 This is useful for indicating at which column an error occurred, and
865 not much else. This must be updated by any lexing code that consumes
866 a newline; the function L</lex_read_to> handles this detail.
872 =for apidoc Amx|bool|lex_bufutf8
874 Indicates whether the octets in the lexer buffer
875 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
876 of Unicode characters. If not, they should be interpreted as Latin-1
877 characters. This is analogous to the C<SvUTF8> flag for scalars.
879 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
880 contains valid UTF-8. Lexing code must be robust in the face of invalid
883 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
884 is significant, but not the whole story regarding the input character
885 encoding. Normally, when a file is being read, the scalar contains octets
886 and its C<SvUTF8> flag is off, but the octets should be interpreted as
887 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
888 however, the scalar may have the C<SvUTF8> flag on, and in this case its
889 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
890 is in effect. This logic may change in the future; use this function
891 instead of implementing the logic yourself.
897 Perl_lex_bufutf8(pTHX)
903 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
905 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
906 at least C<len> octets (including terminating C<NUL>). Returns a
907 pointer to the reallocated buffer. This is necessary before making
908 any direct modification of the buffer that would increase its length.
909 L</lex_stuff_pvn> provides a more convenient way to insert text into
912 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
913 this function updates all of the lexer's variables that point directly
920 Perl_lex_grow_linestr(pTHX_ STRLEN len)
924 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
925 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
926 linestr = PL_parser->linestr;
927 buf = SvPVX(linestr);
928 if (len <= SvLEN(linestr))
930 bufend_pos = PL_parser->bufend - buf;
931 bufptr_pos = PL_parser->bufptr - buf;
932 oldbufptr_pos = PL_parser->oldbufptr - buf;
933 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
934 linestart_pos = PL_parser->linestart - buf;
935 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
936 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
937 re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
938 PL_parser->lex_shared->re_eval_start - buf : 0;
940 buf = sv_grow(linestr, len);
942 PL_parser->bufend = buf + bufend_pos;
943 PL_parser->bufptr = buf + bufptr_pos;
944 PL_parser->oldbufptr = buf + oldbufptr_pos;
945 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
946 PL_parser->linestart = buf + linestart_pos;
947 if (PL_parser->last_uni)
948 PL_parser->last_uni = buf + last_uni_pos;
949 if (PL_parser->last_lop)
950 PL_parser->last_lop = buf + last_lop_pos;
951 if (PL_parser->lex_shared->re_eval_start)
952 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
957 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
959 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
960 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
961 reallocating the buffer if necessary. This means that lexing code that
962 runs later will see the characters as if they had appeared in the input.
963 It is not recommended to do this as part of normal parsing, and most
964 uses of this facility run the risk of the inserted characters being
965 interpreted in an unintended manner.
967 The string to be inserted is represented by C<len> octets starting
968 at C<pv>. These octets are interpreted as either UTF-8 or Latin-1,
969 according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
970 The characters are recoded for the lexer buffer, according to how the
971 buffer is currently being interpreted (L</lex_bufutf8>). If a string
972 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
973 function is more convenient.
979 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
983 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
984 if (flags & ~(LEX_STUFF_UTF8))
985 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
987 if (flags & LEX_STUFF_UTF8) {
990 STRLEN highhalf = 0; /* Count of variants */
991 const char *p, *e = pv+len;
992 for (p = pv; p != e; p++) {
993 if (! UTF8_IS_INVARIANT(*p)) {
999 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1000 bufptr = PL_parser->bufptr;
1001 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1002 SvCUR_set(PL_parser->linestr,
1003 SvCUR(PL_parser->linestr) + len+highhalf);
1004 PL_parser->bufend += len+highhalf;
1005 for (p = pv; p != e; p++) {
1007 if (! UTF8_IS_INVARIANT(c)) {
1008 *bufptr++ = UTF8_TWO_BYTE_HI(c);
1009 *bufptr++ = UTF8_TWO_BYTE_LO(c);
1011 *bufptr++ = (char)c;
1016 if (flags & LEX_STUFF_UTF8) {
1017 STRLEN highhalf = 0;
1018 const char *p, *e = pv+len;
1019 for (p = pv; p != e; p++) {
1021 if (UTF8_IS_ABOVE_LATIN1(c)) {
1022 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1023 "non-Latin-1 character into Latin-1 input");
1024 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1027 } else if (! UTF8_IS_INVARIANT(c)) {
1028 /* malformed UTF-8 */
1030 SAVESPTR(PL_warnhook);
1031 PL_warnhook = PERL_WARNHOOK_FATAL;
1032 utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
1038 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1039 bufptr = PL_parser->bufptr;
1040 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1041 SvCUR_set(PL_parser->linestr,
1042 SvCUR(PL_parser->linestr) + len-highhalf);
1043 PL_parser->bufend += len-highhalf;
1046 if (UTF8_IS_INVARIANT(*p)) {
1052 *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
1058 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1059 bufptr = PL_parser->bufptr;
1060 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1061 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1062 PL_parser->bufend += len;
1063 Copy(pv, bufptr, len, char);
1069 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1071 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1072 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1073 reallocating the buffer if necessary. This means that lexing code that
1074 runs later will see the characters as if they had appeared in the input.
1075 It is not recommended to do this as part of normal parsing, and most
1076 uses of this facility run the risk of the inserted characters being
1077 interpreted in an unintended manner.
1079 The string to be inserted is represented by octets starting at C<pv>
1080 and continuing to the first nul. These octets are interpreted as either
1081 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1082 in C<flags>. The characters are recoded for the lexer buffer, according
1083 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1084 If it is not convenient to nul-terminate a string to be inserted, the
1085 L</lex_stuff_pvn> function is more appropriate.
1091 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1093 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1094 lex_stuff_pvn(pv, strlen(pv), flags);
1098 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1100 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1101 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1102 reallocating the buffer if necessary. This means that lexing code that
1103 runs later will see the characters as if they had appeared in the input.
1104 It is not recommended to do this as part of normal parsing, and most
1105 uses of this facility run the risk of the inserted characters being
1106 interpreted in an unintended manner.
1108 The string to be inserted is the string value of C<sv>. The characters
1109 are recoded for the lexer buffer, according to how the buffer is currently
1110 being interpreted (L</lex_bufutf8>). If a string to be inserted is
1111 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1112 need to construct a scalar.
1118 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1122 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1124 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1126 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1130 =for apidoc Amx|void|lex_unstuff|char *ptr
1132 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1133 C<ptr>. Text following C<ptr> will be moved, and the buffer shortened.
1134 This hides the discarded text from any lexing code that runs later,
1135 as if the text had never appeared.
1137 This is not the normal way to consume lexed text. For that, use
1144 Perl_lex_unstuff(pTHX_ char *ptr)
1148 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1149 buf = PL_parser->bufptr;
1151 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1154 bufend = PL_parser->bufend;
1156 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1157 unstuff_len = ptr - buf;
1158 Move(ptr, buf, bufend+1-ptr, char);
1159 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1160 PL_parser->bufend = bufend - unstuff_len;
1164 =for apidoc Amx|void|lex_read_to|char *ptr
1166 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1167 to C<ptr>. This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
1168 performing the correct bookkeeping whenever a newline character is passed.
1169 This is the normal way to consume lexed text.
1171 Interpretation of the buffer's octets can be abstracted out by
1172 using the slightly higher-level functions L</lex_peek_unichar> and
1173 L</lex_read_unichar>.
1179 Perl_lex_read_to(pTHX_ char *ptr)
1182 PERL_ARGS_ASSERT_LEX_READ_TO;
1183 s = PL_parser->bufptr;
1184 if (ptr < s || ptr > PL_parser->bufend)
1185 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1186 for (; s != ptr; s++)
1188 COPLINE_INC_WITH_HERELINES;
1189 PL_parser->linestart = s+1;
1191 PL_parser->bufptr = ptr;
1195 =for apidoc Amx|void|lex_discard_to|char *ptr
1197 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1198 up to C<ptr>. The remaining content of the buffer will be moved, and
1199 all pointers into the buffer updated appropriately. C<ptr> must not
1200 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1201 it is not permitted to discard text that has yet to be lexed.
1203 Normally it is not necessarily to do this directly, because it suffices to
1204 use the implicit discarding behaviour of L</lex_next_chunk> and things
1205 based on it. However, if a token stretches across multiple lines,
1206 and the lexing code has kept multiple lines of text in the buffer for
1207 that purpose, then after completion of the token it would be wise to
1208 explicitly discard the now-unneeded earlier lines, to avoid future
1209 multi-line tokens growing the buffer without bound.
1215 Perl_lex_discard_to(pTHX_ char *ptr)
1219 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1220 buf = SvPVX(PL_parser->linestr);
1222 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1225 if (ptr > PL_parser->bufptr)
1226 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1227 discard_len = ptr - buf;
1228 if (PL_parser->oldbufptr < ptr)
1229 PL_parser->oldbufptr = ptr;
1230 if (PL_parser->oldoldbufptr < ptr)
1231 PL_parser->oldoldbufptr = ptr;
1232 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1233 PL_parser->last_uni = NULL;
1234 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1235 PL_parser->last_lop = NULL;
1236 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1237 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1238 PL_parser->bufend -= discard_len;
1239 PL_parser->bufptr -= discard_len;
1240 PL_parser->oldbufptr -= discard_len;
1241 PL_parser->oldoldbufptr -= discard_len;
1242 if (PL_parser->last_uni)
1243 PL_parser->last_uni -= discard_len;
1244 if (PL_parser->last_lop)
1245 PL_parser->last_lop -= discard_len;
1249 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1251 Reads in the next chunk of text to be lexed, appending it to
1252 L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1253 looked to the end of the current chunk and wants to know more. It is
1254 usual, but not necessary, for lexing to have consumed the entirety of
1255 the current chunk at this time.
1257 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1258 chunk (i.e., the current chunk has been entirely consumed), normally the
1259 current chunk will be discarded at the same time that the new chunk is
1260 read in. If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
1261 will not be discarded. If the current chunk has not been entirely
1262 consumed, then it will not be discarded regardless of the flag.
1264 Returns true if some new text was added to the buffer, or false if the
1265 buffer has reached the end of the input text.
1270 #define LEX_FAKE_EOF 0x80000000
1271 #define LEX_NO_TERM 0x40000000 /* here-doc */
1274 Perl_lex_next_chunk(pTHX_ U32 flags)
1278 STRLEN old_bufend_pos, new_bufend_pos;
1279 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1280 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1281 bool got_some_for_debugger = 0;
1283 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1284 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1285 if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1287 linestr = PL_parser->linestr;
1288 buf = SvPVX(linestr);
1289 if (!(flags & LEX_KEEP_PREVIOUS)
1290 && PL_parser->bufptr == PL_parser->bufend)
1292 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1294 if (PL_parser->last_uni != PL_parser->bufend)
1295 PL_parser->last_uni = NULL;
1296 if (PL_parser->last_lop != PL_parser->bufend)
1297 PL_parser->last_lop = NULL;
1298 last_uni_pos = last_lop_pos = 0;
1302 old_bufend_pos = PL_parser->bufend - buf;
1303 bufptr_pos = PL_parser->bufptr - buf;
1304 oldbufptr_pos = PL_parser->oldbufptr - buf;
1305 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1306 linestart_pos = PL_parser->linestart - buf;
1307 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1308 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1310 if (flags & LEX_FAKE_EOF) {
1312 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1314 } else if (filter_gets(linestr, old_bufend_pos)) {
1316 got_some_for_debugger = 1;
1317 } else if (flags & LEX_NO_TERM) {
1320 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1323 /* End of real input. Close filehandle (unless it was STDIN),
1324 * then add implicit termination.
1326 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1327 PerlIO_clearerr(PL_parser->rsfp);
1328 else if (PL_parser->rsfp)
1329 (void)PerlIO_close(PL_parser->rsfp);
1330 PL_parser->rsfp = NULL;
1331 PL_parser->in_pod = PL_parser->filtered = 0;
1332 if (!PL_in_eval && PL_minus_p) {
1334 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1335 PL_minus_n = PL_minus_p = 0;
1336 } else if (!PL_in_eval && PL_minus_n) {
1337 sv_catpvs(linestr, /*{*/";}");
1340 sv_catpvs(linestr, ";");
1343 buf = SvPVX(linestr);
1344 new_bufend_pos = SvCUR(linestr);
1345 PL_parser->bufend = buf + new_bufend_pos;
1346 PL_parser->bufptr = buf + bufptr_pos;
1347 PL_parser->oldbufptr = buf + oldbufptr_pos;
1348 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1349 PL_parser->linestart = buf + linestart_pos;
1350 if (PL_parser->last_uni)
1351 PL_parser->last_uni = buf + last_uni_pos;
1352 if (PL_parser->last_lop)
1353 PL_parser->last_lop = buf + last_lop_pos;
1354 if (PL_parser->preambling != NOLINE) {
1355 CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1356 PL_parser->preambling = NOLINE;
1358 if ( got_some_for_debugger
1359 && PERLDB_LINE_OR_SAVESRC
1360 && PL_curstash != PL_debstash)
1362 /* debugger active and we're not compiling the debugger code,
1363 * so store the line into the debugger's array of lines
1365 update_debugger_info(NULL, buf+old_bufend_pos,
1366 new_bufend_pos-old_bufend_pos);
1372 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1374 Looks ahead one (Unicode) character in the text currently being lexed.
1375 Returns the codepoint (unsigned integer value) of the next character,
1376 or -1 if lexing has reached the end of the input text. To consume the
1377 peeked character, use L</lex_read_unichar>.
1379 If the next character is in (or extends into) the next chunk of input
1380 text, the next chunk will be read in. Normally the current chunk will be
1381 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1382 bit set, then the current chunk will not be discarded.
1384 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1385 is encountered, an exception is generated.
1391 Perl_lex_peek_unichar(pTHX_ U32 flags)
1395 if (flags & ~(LEX_KEEP_PREVIOUS))
1396 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1397 s = PL_parser->bufptr;
1398 bufend = PL_parser->bufend;
1404 if (!lex_next_chunk(flags))
1406 s = PL_parser->bufptr;
1407 bufend = PL_parser->bufend;
1410 if (UTF8_IS_INVARIANT(head))
1412 if (UTF8_IS_START(head)) {
1413 len = UTF8SKIP(&head);
1414 while ((STRLEN)(bufend-s) < len) {
1415 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1417 s = PL_parser->bufptr;
1418 bufend = PL_parser->bufend;
1421 unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1422 if (retlen == (STRLEN)-1) {
1423 /* malformed UTF-8 */
1425 SAVESPTR(PL_warnhook);
1426 PL_warnhook = PERL_WARNHOOK_FATAL;
1427 utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
1433 if (!lex_next_chunk(flags))
1435 s = PL_parser->bufptr;
1442 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1444 Reads the next (Unicode) character in the text currently being lexed.
1445 Returns the codepoint (unsigned integer value) of the character read,
1446 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1447 if lexing has reached the end of the input text. To non-destructively
1448 examine the next character, use L</lex_peek_unichar> instead.
1450 If the next character is in (or extends into) the next chunk of input
1451 text, the next chunk will be read in. Normally the current chunk will be
1452 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1453 bit set, then the current chunk will not be discarded.
1455 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1456 is encountered, an exception is generated.
1462 Perl_lex_read_unichar(pTHX_ U32 flags)
1465 if (flags & ~(LEX_KEEP_PREVIOUS))
1466 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1467 c = lex_peek_unichar(flags);
1470 COPLINE_INC_WITH_HERELINES;
1472 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1474 ++(PL_parser->bufptr);
1480 =for apidoc Amx|void|lex_read_space|U32 flags
1482 Reads optional spaces, in Perl style, in the text currently being
1483 lexed. The spaces may include ordinary whitespace characters and
1484 Perl-style comments. C<#line> directives are processed if encountered.
1485 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1486 at a non-space character (or the end of the input text).
1488 If spaces extend into the next chunk of input text, the next chunk will
1489 be read in. Normally the current chunk will be discarded at the same
1490 time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
1491 chunk will not be discarded.
1496 #define LEX_NO_INCLINE 0x40000000
1497 #define LEX_NO_NEXT_CHUNK 0x80000000
1500 Perl_lex_read_space(pTHX_ U32 flags)
1503 const bool can_incline = !(flags & LEX_NO_INCLINE);
1504 bool need_incline = 0;
1505 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1506 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1507 s = PL_parser->bufptr;
1508 bufend = PL_parser->bufend;
1514 } while (!(c == '\n' || (c == 0 && s == bufend)));
1515 } else if (c == '\n') {
1518 PL_parser->linestart = s;
1524 } else if (isSPACE(c)) {
1526 } else if (c == 0 && s == bufend) {
1529 if (flags & LEX_NO_NEXT_CHUNK)
1531 PL_parser->bufptr = s;
1532 l = CopLINE(PL_curcop);
1533 CopLINE(PL_curcop) += PL_parser->herelines + 1;
1534 got_more = lex_next_chunk(flags);
1535 CopLINE_set(PL_curcop, l);
1536 s = PL_parser->bufptr;
1537 bufend = PL_parser->bufend;
1540 if (can_incline && need_incline && PL_parser->rsfp) {
1550 PL_parser->bufptr = s;
1555 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1557 This function performs syntax checking on a prototype, C<proto>.
1558 If C<warn> is true, any illegal characters or mismatched brackets
1559 will trigger illegalproto warnings, declaring that they were
1560 detected in the prototype for C<name>.
1562 The return value is C<true> if this is a valid prototype, and
1563 C<false> if it is not, regardless of whether C<warn> was C<true> or
1566 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1573 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
1575 STRLEN len, origlen;
1577 bool bad_proto = FALSE;
1578 bool in_brackets = FALSE;
1579 bool after_slash = FALSE;
1580 char greedy_proto = ' ';
1581 bool proto_after_greedy_proto = FALSE;
1582 bool must_be_last = FALSE;
1583 bool underscore = FALSE;
1584 bool bad_proto_after_underscore = FALSE;
1586 PERL_ARGS_ASSERT_VALIDATE_PROTO;
1591 p = SvPV(proto, len);
1593 for (; len--; p++) {
1596 proto_after_greedy_proto = TRUE;
1598 if (!strchr(";@%", *p))
1599 bad_proto_after_underscore = TRUE;
1602 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1609 in_brackets = FALSE;
1610 else if ((*p == '@' || *p == '%')
1614 must_be_last = TRUE;
1623 after_slash = FALSE;
1628 SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1631 ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1632 origlen, UNI_DISPLAY_ISPRINT)
1633 : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1635 if (proto_after_greedy_proto)
1636 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1637 "Prototype after '%c' for %" SVf " : %s",
1638 greedy_proto, SVfARG(name), p);
1640 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1641 "Missing ']' in prototype for %" SVf " : %s",
1644 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1645 "Illegal character in prototype for %" SVf " : %s",
1647 if (bad_proto_after_underscore)
1648 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1649 "Illegal character after '_' in prototype for %" SVf " : %s",
1653 return (! (proto_after_greedy_proto || bad_proto) );
1658 * This subroutine has nothing to do with tilting, whether at windmills
1659 * or pinball tables. Its name is short for "increment line". It
1660 * increments the current line number in CopLINE(PL_curcop) and checks
1661 * to see whether the line starts with a comment of the form
1662 * # line 500 "foo.pm"
1663 * If so, it sets the current line number and file to the values in the comment.
1667 S_incline(pTHX_ const char *s)
1675 PERL_ARGS_ASSERT_INCLINE;
1677 COPLINE_INC_WITH_HERELINES;
1678 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1679 && s+1 == PL_bufend && *s == ';') {
1680 /* fake newline in string eval */
1681 CopLINE_dec(PL_curcop);
1686 while (SPACE_OR_TAB(*s))
1688 if (strEQs(s, "line"))
1692 if (SPACE_OR_TAB(*s))
1696 while (SPACE_OR_TAB(*s))
1704 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1706 while (SPACE_OR_TAB(*s))
1708 if (*s == '"' && (t = strchr(s+1, '"'))) {
1714 while (*t && !isSPACE(*t))
1718 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1720 if (*e != '\n' && *e != '\0')
1721 return; /* false alarm */
1723 if (!grok_atoUV(n, &uv, &e))
1725 line_num = ((line_t)uv) - 1;
1728 const STRLEN len = t - s;
1730 if (!PL_rsfp && !PL_parser->filtered) {
1731 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1732 * to *{"::_<newfilename"} */
1733 /* However, the long form of evals is only turned on by the
1734 debugger - usually they're "(eval %lu)" */
1735 GV * const cfgv = CopFILEGV(PL_curcop);
1738 STRLEN tmplen2 = len;
1742 if (tmplen2 + 2 <= sizeof smallbuf)
1745 Newx(tmpbuf2, tmplen2 + 2, char);
1750 memcpy(tmpbuf2 + 2, s, tmplen2);
1753 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1755 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1756 /* adjust ${"::_<newfilename"} to store the new file name */
1757 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1758 /* The line number may differ. If that is the case,
1759 alias the saved lines that are in the array.
1760 Otherwise alias the whole array. */
1761 if (CopLINE(PL_curcop) == line_num) {
1762 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1763 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1765 else if (GvAV(cfgv)) {
1766 AV * const av = GvAV(cfgv);
1767 const I32 start = CopLINE(PL_curcop)+1;
1768 I32 items = AvFILLp(av) - start;
1770 AV * const av2 = GvAVn(gv2);
1771 SV **svp = AvARRAY(av) + start;
1772 I32 l = (I32)line_num+1;
1774 av_store(av2, l++, SvREFCNT_inc(*svp++));
1779 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1782 CopFILE_free(PL_curcop);
1783 CopFILE_setn(PL_curcop, s, len);
1785 CopLINE_set(PL_curcop, line_num);
1789 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1791 AV *av = CopFILEAVx(PL_curcop);
1794 if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1796 sv = *av_fetch(av, 0, 1);
1797 SvUPGRADE(sv, SVt_PVMG);
1799 if (!SvPOK(sv)) SvPVCLEAR(sv);
1801 sv_catsv(sv, orig_sv);
1803 sv_catpvn(sv, buf, len);
1808 if (PL_parser->preambling == NOLINE)
1809 av_store(av, CopLINE(PL_curcop), sv);
1815 * Called to gobble the appropriate amount and type of whitespace.
1816 * Skips comments as well.
1817 * Returns the next character after the whitespace that is skipped.
1820 * Same thing, but look ahead without incrementing line numbers or
1821 * adjusting PL_linestart.
1824 #define skipspace(s) skipspace_flags(s, 0)
1825 #define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
1828 S_skipspace_flags(pTHX_ char *s, U32 flags)
1830 PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1831 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1832 while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
1835 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1837 lex_read_space(flags | LEX_KEEP_PREVIOUS |
1838 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
1839 LEX_NO_NEXT_CHUNK : 0));
1841 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1842 if (PL_linestart > PL_bufptr)
1843 PL_bufptr = PL_linestart;
1851 * Check the unary operators to ensure there's no ambiguity in how they're
1852 * used. An ambiguous piece of code would be:
1854 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1855 * the +5 is its argument.
1864 if (PL_oldoldbufptr != PL_last_uni)
1866 while (isSPACE(*PL_last_uni))
1869 while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
1870 s += UTF ? UTF8SKIP(s) : 1;
1871 if ((t = strchr(s, '(')) && t < PL_bufptr)
1874 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1875 "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
1876 UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
1880 * LOP : macro to build a list operator. Its behaviour has been replaced
1881 * with a subroutine, S_lop() for which LOP is just another name.
1884 #define LOP(f,x) return lop(f,x,s)
1888 * Build a list operator (or something that might be one). The rules:
1889 * - if we have a next token, then it's a list operator (no parens) for
1890 * which the next token has already been parsed; e.g.,
1893 * - if the next thing is an opening paren, then it's a function
1894 * - else it's a list operator
1898 S_lop(pTHX_ I32 f, U8 x, char *s)
1900 PERL_ARGS_ASSERT_LOP;
1905 PL_last_lop = PL_oldbufptr;
1906 PL_last_lop_op = (OPCODE)f;
1911 return REPORT(FUNC);
1914 return REPORT(FUNC);
1917 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1918 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1919 return REPORT(LSTOP);
1925 * When the lexer realizes it knows the next token (for instance,
1926 * it is reordering tokens for the parser) then it can call S_force_next
1927 * to know what token to return the next time the lexer is called. Caller
1928 * will need to set PL_nextval[] and possibly PL_expect to ensure
1929 * the lexer handles the token correctly.
1933 S_force_next(pTHX_ I32 type)
1937 PerlIO_printf(Perl_debug_log, "### forced token:\n");
1938 tokereport(type, &NEXTVAL_NEXTTOKE);
1941 assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
1942 PL_nexttype[PL_nexttoke] = type;
1949 * This subroutine handles postfix deref syntax after the arrow has already
1950 * been emitted. @* $* etc. are emitted as two separate token right here.
1951 * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
1952 * only the first, leaving yylex to find the next.
1956 S_postderef(pTHX_ int const funny, char const next)
1958 assert(funny == DOLSHARP || strchr("$@%&*", funny));
1960 PL_expect = XOPERATOR;
1961 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
1962 assert('@' == funny || '$' == funny || DOLSHARP == funny);
1963 PL_lex_state = LEX_INTERPEND;
1965 force_next(POSTJOIN);
1971 if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
1972 && !PL_lex_brackets)
1974 PL_expect = XOPERATOR;
1983 int yyc = PL_parser->yychar;
1984 if (yyc != YYEMPTY) {
1986 NEXTVAL_NEXTTOKE = PL_parser->yylval;
1987 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
1988 PL_lex_allbrackets--;
1990 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1991 } else if (yyc == '('/*)*/) {
1992 PL_lex_allbrackets--;
1997 PL_parser->yychar = YYEMPTY;
2002 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2004 SV * const sv = newSVpvn_utf8(start, len,
2007 && !is_utf8_invariant_string((const U8*)start, len)
2008 && is_utf8_string((const U8*)start, len));
2014 * When the lexer knows the next thing is a word (for instance, it has
2015 * just seen -> and it knows that the next char is a word char, then
2016 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2020 * char *start : buffer position (must be within PL_linestr)
2021 * int token : PL_next* will be this type of bare word
2022 * (e.g., METHOD,BAREWORD)
2023 * int check_keyword : if true, Perl checks to make sure the word isn't
2024 * a keyword (do this if the word is a label, e.g. goto FOO)
2025 * int allow_pack : if true, : characters will also be allowed (require,
2026 * use, etc. do this)
2030 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2035 PERL_ARGS_ASSERT_FORCE_WORD;
2037 start = skipspace(start);
2039 if (isIDFIRST_lazy_if(s,UTF)
2040 || (allow_pack && *s == ':' && s[1] == ':') )
2042 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2043 if (check_keyword) {
2044 char *s2 = PL_tokenbuf;
2046 if (allow_pack && len > 6 && strEQs(s2, "CORE::"))
2048 if (keyword(s2, len2, 0))
2051 if (token == METHOD) {
2056 PL_expect = XOPERATOR;
2059 NEXTVAL_NEXTTOKE.opval
2060 = newSVOP(OP_CONST,0,
2061 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2062 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2070 * Called when the lexer wants $foo *foo &foo etc, but the program
2071 * text only contains the "foo" portion. The first argument is a pointer
2072 * to the "foo", and the second argument is the type symbol to prefix.
2073 * Forces the next token to be a "BAREWORD".
2074 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2078 S_force_ident(pTHX_ const char *s, int kind)
2080 PERL_ARGS_ASSERT_FORCE_IDENT;
2083 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2084 OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2085 UTF ? SVf_UTF8 : 0));
2086 NEXTVAL_NEXTTOKE.opval = o;
2087 force_next(BAREWORD);
2089 o->op_private = OPpCONST_ENTERED;
2090 /* XXX see note in pp_entereval() for why we forgo typo
2091 warnings if the symbol must be introduced in an eval.
2093 gv_fetchpvn_flags(s, len,
2094 (PL_in_eval ? GV_ADDMULTI
2095 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2096 kind == '$' ? SVt_PV :
2097 kind == '@' ? SVt_PVAV :
2098 kind == '%' ? SVt_PVHV :
2106 S_force_ident_maybe_lex(pTHX_ char pit)
2108 NEXTVAL_NEXTTOKE.ival = pit;
2113 Perl_str_to_version(pTHX_ SV *sv)
2118 const char *start = SvPV_const(sv,len);
2119 const char * const end = start + len;
2120 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2122 PERL_ARGS_ASSERT_STR_TO_VERSION;
2124 while (start < end) {
2128 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2133 retval += ((NV)n)/nshift;
2142 * Forces the next token to be a version number.
2143 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2144 * and if "guessing" is TRUE, then no new token is created (and the caller
2145 * must use an alternative parsing method).
2149 S_force_version(pTHX_ char *s, int guessing)
2154 PERL_ARGS_ASSERT_FORCE_VERSION;
2162 while (isDIGIT(*d) || *d == '_' || *d == '.')
2164 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2166 s = scan_num(s, &pl_yylval);
2167 version = pl_yylval.opval;
2168 ver = cSVOPx(version)->op_sv;
2169 if (SvPOK(ver) && !SvNIOK(ver)) {
2170 SvUPGRADE(ver, SVt_PVNV);
2171 SvNV_set(ver, str_to_version(ver));
2172 SvNOK_on(ver); /* hint that it is a version */
2175 else if (guessing) {
2180 /* NOTE: The parser sees the package name and the VERSION swapped */
2181 NEXTVAL_NEXTTOKE.opval = version;
2182 force_next(BAREWORD);
2188 * S_force_strict_version
2189 * Forces the next token to be a version number using strict syntax rules.
2193 S_force_strict_version(pTHX_ char *s)
2196 const char *errstr = NULL;
2198 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2200 while (isSPACE(*s)) /* leading whitespace */
2203 if (is_STRICT_VERSION(s,&errstr)) {
2205 s = (char *)scan_version(s, ver, 0);
2206 version = newSVOP(OP_CONST, 0, ver);
2208 else if ((*s != ';' && *s != '{' && *s != '}' )
2209 && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
2213 yyerror(errstr); /* version required */
2217 /* NOTE: The parser sees the package name and the VERSION swapped */
2218 NEXTVAL_NEXTTOKE.opval = version;
2219 force_next(BAREWORD);
2226 * Tokenize a quoted string passed in as an SV. It finds the next
2227 * chunk, up to end of string or a backslash. It may make a new
2228 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2233 S_tokeq(pTHX_ SV *sv)
2240 PERL_ARGS_ASSERT_TOKEQ;
2244 assert (!SvIsCOW(sv));
2245 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2249 /* This is relying on the SV being "well formed" with a trailing '\0' */
2250 while (s < send && !(*s == '\\' && s[1] == '\\'))
2255 if ( PL_hints & HINT_NEW_STRING ) {
2256 pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2257 SVs_TEMP | SvUTF8(sv));
2261 if (s + 1 < send && (s[1] == '\\'))
2262 s++; /* all that, just for this */
2267 SvCUR_set(sv, d - SvPVX_const(sv));
2269 if ( PL_hints & HINT_NEW_STRING )
2270 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2275 * Now come three functions related to double-quote context,
2276 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2277 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2278 * interact with PL_lex_state, and create fake ( ... ) argument lists
2279 * to handle functions and concatenation.
2283 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
2288 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2290 * Pattern matching will set PL_lex_op to the pattern-matching op to
2291 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2293 * OP_CONST is easy--just make the new op and return.
2295 * Everything else becomes a FUNC.
2297 * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we
2298 * had an OP_CONST. This just sets us up for a
2299 * call to S_sublex_push().
2303 S_sublex_start(pTHX)
2305 const I32 op_type = pl_yylval.ival;
2307 if (op_type == OP_NULL) {
2308 pl_yylval.opval = PL_lex_op;
2312 if (op_type == OP_CONST) {
2313 SV *sv = PL_lex_stuff;
2314 PL_lex_stuff = NULL;
2317 if (SvTYPE(sv) == SVt_PVIV) {
2318 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2320 const char * const p = SvPV_const(sv, len);
2321 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2325 pl_yylval.opval = newSVOP(op_type, 0, sv);
2329 PL_parser->lex_super_state = PL_lex_state;
2330 PL_parser->lex_sub_inwhat = (U16)op_type;
2331 PL_parser->lex_sub_op = PL_lex_op;
2332 PL_lex_state = LEX_INTERPPUSH;
2336 pl_yylval.opval = PL_lex_op;
2346 * Create a new scope to save the lexing state. The scope will be
2347 * ended in S_sublex_done. Returns a '(', starting the function arguments
2348 * to the uc, lc, etc. found before.
2349 * Sets PL_lex_state to LEX_INTERPCONCAT.
2356 const bool is_heredoc = PL_multi_close == '<';
2359 PL_lex_state = PL_parser->lex_super_state;
2360 SAVEI8(PL_lex_dojoin);
2361 SAVEI32(PL_lex_brackets);
2362 SAVEI32(PL_lex_allbrackets);
2363 SAVEI32(PL_lex_formbrack);
2364 SAVEI8(PL_lex_fakeeof);
2365 SAVEI32(PL_lex_casemods);
2366 SAVEI32(PL_lex_starts);
2367 SAVEI8(PL_lex_state);
2368 SAVESPTR(PL_lex_repl);
2369 SAVEVPTR(PL_lex_inpat);
2370 SAVEI16(PL_lex_inwhat);
2373 SAVECOPLINE(PL_curcop);
2374 SAVEI32(PL_multi_end);
2375 SAVEI32(PL_parser->herelines);
2376 PL_parser->herelines = 0;
2378 SAVEIV(PL_multi_close);
2379 SAVEPPTR(PL_bufptr);
2380 SAVEPPTR(PL_bufend);
2381 SAVEPPTR(PL_oldbufptr);
2382 SAVEPPTR(PL_oldoldbufptr);
2383 SAVEPPTR(PL_last_lop);
2384 SAVEPPTR(PL_last_uni);
2385 SAVEPPTR(PL_linestart);
2386 SAVESPTR(PL_linestr);
2387 SAVEGENERICPV(PL_lex_brackstack);
2388 SAVEGENERICPV(PL_lex_casestack);
2389 SAVEGENERICPV(PL_parser->lex_shared);
2390 SAVEBOOL(PL_parser->lex_re_reparsing);
2391 SAVEI32(PL_copline);
2393 /* The here-doc parser needs to be able to peek into outer lexing
2394 scopes to find the body of the here-doc. So we put PL_linestr and
2395 PL_bufptr into lex_shared, to ‘share’ those values.
2397 PL_parser->lex_shared->ls_linestr = PL_linestr;
2398 PL_parser->lex_shared->ls_bufptr = PL_bufptr;
2400 PL_linestr = PL_lex_stuff;
2401 PL_lex_repl = PL_parser->lex_sub_repl;
2402 PL_lex_stuff = NULL;
2403 PL_parser->lex_sub_repl = NULL;
2405 /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2406 set for an inner quote-like operator and then an error causes scope-
2407 popping. We must not have a PL_lex_stuff value left dangling, as
2408 that breaks assumptions elsewhere. See bug #123617. */
2409 SAVEGENERICSV(PL_lex_stuff);
2410 SAVEGENERICSV(PL_parser->lex_sub_repl);
2412 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2413 = SvPVX(PL_linestr);
2414 PL_bufend += SvCUR(PL_linestr);
2415 PL_last_lop = PL_last_uni = NULL;
2416 SAVEFREESV(PL_linestr);
2417 if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2419 PL_lex_dojoin = FALSE;
2420 PL_lex_brackets = PL_lex_formbrack = 0;
2421 PL_lex_allbrackets = 0;
2422 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2423 Newx(PL_lex_brackstack, 120, char);
2424 Newx(PL_lex_casestack, 12, char);
2425 PL_lex_casemods = 0;
2426 *PL_lex_casestack = '\0';
2428 PL_lex_state = LEX_INTERPCONCAT;
2430 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2431 PL_copline = NOLINE;
2433 Newxz(shared, 1, LEXSHARED);
2434 shared->ls_prev = PL_parser->lex_shared;
2435 PL_parser->lex_shared = shared;
2437 PL_lex_inwhat = PL_parser->lex_sub_inwhat;
2438 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2439 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2440 PL_lex_inpat = PL_parser->lex_sub_op;
2442 PL_lex_inpat = NULL;
2444 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2445 PL_in_eval &= ~EVAL_RE_REPARSING;
2452 * Restores lexer state after a S_sublex_push.
2458 if (!PL_lex_starts++) {
2459 SV * const sv = newSVpvs("");
2460 if (SvUTF8(PL_linestr))
2462 PL_expect = XOPERATOR;
2463 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
2467 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2468 PL_lex_state = LEX_INTERPCASEMOD;
2472 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2473 assert(PL_lex_inwhat != OP_TRANSR);
2475 assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2476 PL_linestr = PL_lex_repl;
2478 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2479 PL_bufend += SvCUR(PL_linestr);
2480 PL_last_lop = PL_last_uni = NULL;
2481 PL_lex_dojoin = FALSE;
2482 PL_lex_brackets = 0;
2483 PL_lex_allbrackets = 0;
2484 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2485 PL_lex_casemods = 0;
2486 *PL_lex_casestack = '\0';
2488 if (SvEVALED(PL_lex_repl)) {
2489 PL_lex_state = LEX_INTERPNORMAL;
2491 /* we don't clear PL_lex_repl here, so that we can check later
2492 whether this is an evalled subst; that means we rely on the
2493 logic to ensure sublex_done() is called again only via the
2494 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2497 PL_lex_state = LEX_INTERPCONCAT;
2500 if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2501 CopLINE(PL_curcop) +=
2502 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines
2503 + PL_parser->herelines;
2504 PL_parser->herelines = 0;
2509 const line_t l = CopLINE(PL_curcop);
2511 if (PL_multi_close == '<')
2512 PL_parser->herelines += l - PL_multi_end;
2513 PL_bufend = SvPVX(PL_linestr);
2514 PL_bufend += SvCUR(PL_linestr);
2515 PL_expect = XOPERATOR;
2520 PERL_STATIC_INLINE SV*
2521 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2523 /* <s> points to first character of interior of \N{}, <e> to one beyond the
2524 * interior, hence to the "}". Finds what the name resolves to, returning
2525 * an SV* containing it; NULL if no valid one found */
2527 SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2534 const U8* first_bad_char_loc;
2535 const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2537 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2540 Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
2541 "Unknown charname '' is deprecated");
2545 if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2547 &first_bad_char_loc))
2549 /* If warnings are on, this will print a more detailed analysis of what
2550 * is wrong than the error message below */
2551 utf8n_to_uvchr(first_bad_char_loc,
2552 e - ((char *) first_bad_char_loc),
2555 /* We deliberately don't try to print the malformed character, which
2556 * might not print very well; it also may be just the first of many
2557 * malformations, so don't print what comes after it */
2558 yyerror_pv(Perl_form(aTHX_
2559 "Malformed UTF-8 character immediately after '%.*s'",
2560 (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr),
2565 res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2566 /* include the <}> */
2567 e - backslash_ptr + 1);
2569 SvREFCNT_dec_NN(res);
2573 /* See if the charnames handler is the Perl core's, and if so, we can skip
2574 * the validation needed for a user-supplied one, as Perl's does its own
2576 table = GvHV(PL_hintgv); /* ^H */
2577 cvp = hv_fetchs(table, "charnames", FALSE);
2578 if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2579 SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2581 const char * const name = HvNAME(stash);
2582 if (HvNAMELEN(stash) == sizeof("_charnames")-1
2583 && strEQ(name, "_charnames")) {
2588 /* Here, it isn't Perl's charname handler. We can't rely on a
2589 * user-supplied handler to validate the input name. For non-ut8 input,
2590 * look to see that the first character is legal. Then loop through the
2591 * rest checking that each is a continuation */
2593 /* This code makes the reasonable assumption that the only Latin1-range
2594 * characters that begin a character name alias are alphabetic, otherwise
2595 * would have to create a isCHARNAME_BEGIN macro */
2598 if (! isALPHAU(*s)) {
2603 if (! isCHARNAME_CONT(*s)) {
2606 if (*s == ' ' && *(s-1) == ' ') {
2613 /* Similarly for utf8. For invariants can check directly; for other
2614 * Latin1, can calculate their code point and check; otherwise use a
2616 if (UTF8_IS_INVARIANT(*s)) {
2617 if (! isALPHAU(*s)) {
2621 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2622 if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
2628 if (! PL_utf8_charname_begin) {
2629 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2630 PL_utf8_charname_begin = _core_swash_init("utf8",
2631 "_Perl_Charname_Begin",
2633 1, 0, NULL, &flags);
2635 if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2642 if (UTF8_IS_INVARIANT(*s)) {
2643 if (! isCHARNAME_CONT(*s)) {
2646 if (*s == ' ' && *(s-1) == ' ') {
2651 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2652 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
2659 if (! PL_utf8_charname_continue) {
2660 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2661 PL_utf8_charname_continue = _core_swash_init("utf8",
2662 "_Perl_Charname_Continue",
2664 1, 0, NULL, &flags);
2666 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2673 if (*(s-1) == ' ') {
2676 "charnames alias definitions may not contain trailing "
2677 "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2678 (int)(s - backslash_ptr + 1), backslash_ptr,
2679 (int)(e - s + 1), s + 1
2681 UTF ? SVf_UTF8 : 0);
2685 if (SvUTF8(res)) { /* Don't accept malformed input */
2686 const U8* first_bad_char_loc;
2688 const char* const str = SvPV_const(res, len);
2689 if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2690 /* If warnings are on, this will print a more detailed analysis of
2691 * what is wrong than the error message below */
2692 utf8n_to_uvchr(first_bad_char_loc,
2693 (char *) first_bad_char_loc - str,
2696 /* We deliberately don't try to print the malformed character,
2697 * which might not print very well; it also may be just the first
2698 * of many malformations, so don't print what comes after it */
2701 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2702 (int) (e - backslash_ptr + 1), backslash_ptr,
2703 (int) ((char *) first_bad_char_loc - str), str
2714 /* The final %.*s makes sure that should the trailing NUL be missing
2715 * that this print won't run off the end of the string */
2718 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2719 (int)(s - backslash_ptr + 1), backslash_ptr,
2720 (int)(e - s + 1), s + 1
2722 UTF ? SVf_UTF8 : 0);
2729 "charnames alias definitions may not contain a sequence of "
2730 "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
2731 (int)(s - backslash_ptr + 1), backslash_ptr,
2732 (int)(e - s + 1), s + 1
2734 UTF ? SVf_UTF8 : 0);
2741 Extracts the next constant part of a pattern, double-quoted string,
2742 or transliteration. This is terrifying code.
2744 For example, in parsing the double-quoted string "ab\x63$d", it would
2745 stop at the '$' and return an OP_CONST containing 'abc'.
2747 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2748 processing a pattern (PL_lex_inpat is true), a transliteration
2749 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2751 Returns a pointer to the character scanned up to. If this is
2752 advanced from the start pointer supplied (i.e. if anything was
2753 successfully parsed), will leave an OP_CONST for the substring scanned
2754 in pl_yylval. Caller must intuit reason for not parsing further
2755 by looking at the next characters herself.
2759 \N{FOO} => \N{U+hex_for_character_FOO}
2760 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2763 all other \-char, including \N and \N{ apart from \N{ABC}
2766 @ and $ where it appears to be a var, but not for $ as tail anchor
2770 In transliterations:
2771 characters are VERY literal, except for - not at the start or end
2772 of the string, which indicates a range. If the range is in bytes,
2773 scan_const expands the range to the full set of intermediate
2774 characters. If the range is in utf8, the hyphen is replaced with
2775 a certain range mark which will be handled by pmtrans() in op.c.
2777 In double-quoted strings:
2779 double-quoted style: \r and \n
2780 constants: \x31, etc.
2781 deprecated backrefs: \1 (in substitution replacements)
2782 case and quoting: \U \Q \E
2785 scan_const does *not* construct ops to handle interpolated strings.
2786 It stops processing as soon as it finds an embedded $ or @ variable
2787 and leaves it to the caller to work out what's going on.
2789 embedded arrays (whether in pattern or not) could be:
2790 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2792 $ in double-quoted strings must be the symbol of an embedded scalar.
2794 $ in pattern could be $foo or could be tail anchor. Assumption:
2795 it's a tail anchor if $ is the last thing in the string, or if it's
2796 followed by one of "()| \r\n\t"
2798 \1 (backreferences) are turned into $1 in substitutions
2800 The structure of the code is
2801 while (there's a character to process) {
2802 handle transliteration ranges
2803 skip regexp comments /(?#comment)/ and codes /(?{code})/
2804 skip #-initiated comments in //x patterns
2805 check for embedded arrays
2806 check for embedded scalars
2808 deprecate \1 in substitution replacements
2809 handle string-changing backslashes \l \U \Q \E, etc.
2810 switch (what was escaped) {
2811 handle \- in a transliteration (becomes a literal -)
2812 if a pattern and not \N{, go treat as regular character
2813 handle \132 (octal characters)
2814 handle \x15 and \x{1234} (hex characters)
2815 handle \N{name} (named characters, also \N{3,5} in a pattern)
2816 handle \cV (control characters)
2817 handle printf-style backslashes (\f, \r, \n, etc)
2820 } (end if backslash)
2821 handle regular character
2822 } (end while character to read)
2827 S_scan_const(pTHX_ char *start)
2829 char *send = PL_bufend; /* end of the constant */
2830 SV *sv = newSV(send - start); /* sv for the constant. See note below
2832 char *s = start; /* start of the constant */
2833 char *d = SvPVX(sv); /* destination for copies */
2834 bool dorange = FALSE; /* are we in a translit range? */
2835 bool didrange = FALSE; /* did we just finish a range? */
2836 bool in_charclass = FALSE; /* within /[...]/ */
2837 bool has_utf8 = FALSE; /* Output constant is UTF8 */
2838 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed to be
2839 UTF8? But, this can show as true
2840 when the source isn't utf8, as for
2841 example when it is entirely composed
2843 SV *res; /* result from charnames */
2844 STRLEN offset_to_max; /* The offset in the output to where the range
2845 high-end character is temporarily placed */
2847 /* Note on sizing: The scanned constant is placed into sv, which is
2848 * initialized by newSV() assuming one byte of output for every byte of
2849 * input. This routine expects newSV() to allocate an extra byte for a
2850 * trailing NUL, which this routine will append if it gets to the end of
2851 * the input. There may be more bytes of input than output (eg., \N{LATIN
2852 * CAPITAL LETTER A}), or more output than input if the constant ends up
2853 * recoded to utf8, but each time a construct is found that might increase
2854 * the needed size, SvGROW() is called. Its size parameter each time is
2855 * based on the best guess estimate at the time, namely the length used so
2856 * far, plus the length the current construct will occupy, plus room for
2857 * the trailing NUL, plus one byte for every input byte still unscanned */
2859 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
2862 int backslash_N = 0; /* ? was the character from \N{} */
2863 int non_portable_endpoint = 0; /* ? In a range is an endpoint
2864 platform-specific like \x65 */
2867 PERL_ARGS_ASSERT_SCAN_CONST;
2869 assert(PL_lex_inwhat != OP_TRANSR);
2870 if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
2871 /* If we are doing a trans and we know we want UTF8 set expectation */
2872 has_utf8 = PL_parser->lex_sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2873 this_utf8 = PL_parser->lex_sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2876 /* Protect sv from errors and fatal warnings. */
2877 ENTER_with_name("scan_const");
2881 || dorange /* Handle tr/// range at right edge of input */
2884 /* get transliterations out of the way (they're most literal) */
2885 if (PL_lex_inwhat == OP_TRANS) {
2887 /* But there isn't any special handling necessary unless there is a
2888 * range, so for most cases we just drop down and handle the value
2889 * as any other. There are two exceptions.
2891 * 1. A minus sign indicates that we are actually going to have
2892 * a range. In this case, skip the '-', set a flag, then drop
2893 * down to handle what should be the end range value.
2894 * 2. After we've handled that value, the next time through, that
2895 * flag is set and we fix up the range.
2897 * Ranges entirely within Latin1 are expanded out entirely, in
2898 * order to avoid the significant overhead of making a swash.
2899 * Ranges that extend above Latin1 have to have a swash, so there
2900 * is no advantage to abbreviating them here, so they are stored
2901 * here as Min, ILLEGAL_UTF8_BYTE, Max. The illegal byte signifies
2902 * a hyphen without any possible ambiguity. On EBCDIC machines, if
2903 * the range is expressed as Unicode, the Latin1 portion is
2904 * expanded out even if the entire range extends above Latin1.
2905 * This is because each code point in it has to be processed here
2906 * individually to get its native translation */
2910 /* Here, we don't think we're in a range. If we've processed
2911 * at least one character, then see if this next one is a '-',
2912 * indicating the previous one was the start of a range. But
2913 * don't bother if we're too close to the end for the minus to
2915 if (*s != '-' || s >= send - 1 || s == start) {
2917 /* A regular character. Process like any other, but first
2918 * clear any flags */
2922 non_portable_endpoint = 0;
2925 /* Drops down to generic code to process current byte */
2928 if (didrange) { /* Something like y/A-C-Z// */
2929 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2934 s++; /* Skip past the minus */
2936 /* d now points to where the end-range character will be
2937 * placed. Save it so won't have to go finding it later,
2938 * and drop down to get that character. (Actually we
2939 * instead save the offset, to handle the case where a
2940 * realloc in the meantime could change the actual
2941 * pointer). We'll finish processing the range the next
2942 * time through the loop */
2943 offset_to_max = d - SvPVX_const(sv);
2945 } /* End of not a range */
2947 /* Here we have parsed a range. Now must handle it. At this
2949 * 'sv' is a SV* that contains the output string we are
2950 * constructing. The final two characters in that string
2951 * are the range start and range end, in order.
2952 * 'd' points to just beyond the range end in the 'sv' string,
2953 * where we would next place something
2954 * 'offset_to_max' is the offset in 'sv' at which the character
2955 * before 'd' begins.
2957 const char * max_ptr = SvPVX_const(sv) + offset_to_max;
2958 const char * min_ptr;
2960 IV range_max; /* last character in range */
2964 bool convert_unicode;
2965 IV real_range_max = 0;
2968 /* Get the range-ends code point values. */
2970 /* We know the utf8 is valid, because we just constructed
2971 * it ourselves in previous loop iterations */
2972 min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
2973 range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
2974 range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
2977 min_ptr = max_ptr - 1;
2978 range_min = * (U8*) min_ptr;
2979 range_max = * (U8*) max_ptr;
2983 /* On EBCDIC platforms, we may have to deal with portable
2984 * ranges. These happen if at least one range endpoint is a
2985 * Unicode value (\N{...}), or if the range is a subset of
2986 * [A-Z] or [a-z], and both ends are literal characters,
2987 * like 'A', and not like \x{C1} */
2988 if ((convert_unicode
2989 = cBOOL(backslash_N) /* \N{} forces Unicode, hence
2991 || ( ! non_portable_endpoint
2992 && (( isLOWER_A(range_min) && isLOWER_A(range_max))
2993 || (isUPPER_A(range_min) && isUPPER_A(range_max))))
2996 /* Special handling is needed for these portable ranges.
2997 * They are defined to all be in Unicode terms, which
2998 * include all Unicode code points between the end points.
2999 * Convert to Unicode to get the Unicode range. Later we
3000 * will convert each code point in the range back to
3002 range_min = NATIVE_TO_UNI(range_min);
3003 range_max = NATIVE_TO_UNI(range_max);
3007 if (range_min > range_max) {
3009 if (convert_unicode) {
3010 /* Need to convert back to native for meaningful
3011 * messages for this platform */
3012 range_min = UNI_TO_NATIVE(range_min);
3013 range_max = UNI_TO_NATIVE(range_max);
3017 /* Use the characters themselves for the error message if
3018 * ASCII printables; otherwise some visible representation
3020 if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
3022 "Invalid range \"%c-%c\" in transliteration operator",
3023 (char)range_min, (char)range_max);
3026 else if (convert_unicode) {
3027 /* diag_listed_as: Invalid range "%s" in transliteration operator */
3029 "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04" UVXf "}\""
3030 " in transliteration operator",
3031 range_min, range_max);
3035 /* diag_listed_as: Invalid range "%s" in transliteration operator */
3037 "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
3038 " in transliteration operator",
3039 range_min, range_max);
3045 /* We try to avoid creating a swash. If the upper end of
3046 * this range is below 256, this range won't force a swash;
3047 * otherwise it does force a swash, and as long as we have
3048 * to have one, we might as well not expand things out.
3049 * But if it's EBCDIC, we may have to look at each
3050 * character below 256 if we have to convert to/from
3054 && (range_min > 255 || ! convert_unicode)
3057 /* Move the high character one byte to the right; then
3058 * insert between it and the range begin, an illegal
3059 * byte which serves to indicate this is a range (using
3060 * a '-' could be ambiguous). */
3062 while (e-- > max_ptr) {
3065 *(e + 1) = (char) ILLEGAL_UTF8_BYTE;
3069 /* Here, we're going to expand out the range. For EBCDIC
3070 * the range can extend above 255 (not so in ASCII), so
3071 * for EBCDIC, split it into the parts above and below
3074 if (range_max > 255) {
3075 real_range_max = range_max;
3081 /* Here we need to expand out the string to contain each
3082 * character in the range. Grow the output to handle this */
3084 save_offset = min_ptr - SvPVX_const(sv);
3086 /* The base growth is the number of code points in the range */
3087 grow = range_max - range_min + 1;
3090 /* But if the output is UTF-8, some of those characters may
3091 * need two bytes (since the maximum range value here is
3092 * 255, the max bytes per character is two). On ASCII
3093 * platforms, it's not much trouble to get an accurate
3094 * count of what's needed. But on EBCDIC, the ones that
3095 * need 2 bytes are scattered around, so just use a worst
3096 * case value instead of calculating for that platform. */
3100 /* Only those above 127 require 2 bytes. This may be
3101 * everything in the range, or not */
3102 if (range_min > 127) {
3105 else if (range_max > 127) {
3106 grow += range_max - 127;
3111 /* Subtract 3 for the bytes that were already accounted for
3112 * (min, max, and the hyphen) */
3113 d = save_offset + SvGROW(sv, SvLEN(sv) + grow - 3);
3116 /* Here, we expand out the range. */
3117 if (convert_unicode) {
3120 /* Recall that the min and max are now in Unicode terms, so
3121 * we have to convert each character to its native
3124 for (i = range_min; i <= range_max; i++) {
3125 append_utf8_from_native_byte(LATIN1_TO_NATIVE((U8) i),
3130 for (i = range_min; i <= range_max; i++) {
3131 *d++ = (char)LATIN1_TO_NATIVE((U8) i);
3137 /* Always gets run for ASCII, and sometimes for EBCDIC. */
3141 /* Here, no conversions are necessary, which means that the
3142 * first character in the range is already in 'd' and
3143 * valid, so we can skip overwriting it */
3146 for (i = range_min + 1; i <= range_max; i++) {
3147 append_utf8_from_native_byte((U8) i, (U8 **) &d);
3152 for (i = range_min + 1; i <= range_max; i++) {
3159 /* If the original range extended above 255, add in that portion. */
3160 if (real_range_max) {
3161 *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
3162 *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
3163 if (real_range_max > 0x101)
3164 *d++ = (char) ILLEGAL_UTF8_BYTE;
3165 if (real_range_max > 0x100)
3166 d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
3171 /* mark the range as done, and continue */
3175 non_portable_endpoint = 0;
3179 } /* End of is a range */
3180 } /* End of transliteration. Joins main code after these else's */
3181 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3184 while (s1 >= start && *s1-- == '\\')
3187 in_charclass = TRUE;
3190 else if (*s == ']' && PL_lex_inpat && in_charclass) {
3193 while (s1 >= start && *s1-- == '\\')
3196 in_charclass = FALSE;
3199 /* skip for regexp comments /(?#comment)/, except for the last
3200 * char, which will be done separately.
3201 * Stop on (?{..}) and friends */
3203 else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3205 while (s+1 < send && *s != ')')
3208 else if (!PL_lex_casemods
3209 && ( s[2] == '{' /* This should match regcomp.c */
3210 || (s[2] == '?' && s[3] == '{')))
3216 /* likewise skip #-initiated comments in //x patterns */
3220 && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
3222 while (s+1 < send && *s != '\n')
3226 /* no further processing of single-quoted regex */
3227 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3228 goto default_action;
3230 /* check for embedded arrays
3231 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3233 else if (*s == '@' && s[1]) {
3234 if (UTF ? isIDFIRST_utf8((U8*)s+1) : isWORDCHAR_A(s[1]))
3236 if (strchr(":'{$", s[1]))
3238 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3239 break; /* in regexp, neither @+ nor @- are interpolated */
3242 /* check for embedded scalars. only stop if we're sure it's a
3245 else if (*s == '$') {
3246 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
3248 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3250 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3251 "Possible unintended interpolation of $\\ in regex");
3253 break; /* in regexp, $ might be tail anchor */
3257 /* End of else if chain - OP_TRANS rejoin rest */
3260 if (*s == '\\' && s+1 < send) {
3261 char* e; /* Can be used for ending '}', etc. */
3265 /* warn on \1 - \9 in substitution replacements, but note that \11
3266 * is an octal; and \19 is \1 followed by '9' */
3267 if (PL_lex_inwhat == OP_SUBST
3273 /* diag_listed_as: \%d better written as $%d */
3274 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3279 /* string-change backslash escapes */
3280 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3284 /* In a pattern, process \N, but skip any other backslash escapes.
3285 * This is because we don't want to translate an escape sequence
3286 * into a meta symbol and have the regex compiler use the meta
3287 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3288 * in spite of this, we do have to process \N here while the proper
3289 * charnames handler is in scope. See bugs #56444 and #62056.
3291 * There is a complication because \N in a pattern may also stand
3292 * for 'match a non-nl', and not mean a charname, in which case its
3293 * processing should be deferred to the regex compiler. To be a
3294 * charname it must be followed immediately by a '{', and not look
3295 * like \N followed by a curly quantifier, i.e., not something like
3296 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3298 else if (PL_lex_inpat
3301 || regcurly(s + 1)))
3304 goto default_action;
3310 if ((isALPHANUMERIC(*s)))
3311 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3312 "Unrecognized escape \\%c passed through",
3314 /* default action is to copy the quoted character */
3315 goto default_action;
3318 /* eg. \132 indicates the octal constant 0132 */
3319 case '0': case '1': case '2': case '3':
3320 case '4': case '5': case '6': case '7':
3322 I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3324 uv = grok_oct(s, &len, &flags, NULL);
3326 if (len < 3 && s < send && isDIGIT(*s)
3327 && ckWARN(WARN_MISC))
3329 Perl_warner(aTHX_ packWARN(WARN_MISC),
3330 "%s", form_short_octal_warning(s, len));
3333 goto NUM_ESCAPE_INSERT;
3335 /* eg. \o{24} indicates the octal constant \024 */
3340 bool valid = grok_bslash_o(&s, &uv, &error,
3341 TRUE, /* Output warning */
3342 FALSE, /* Not strict */
3343 TRUE, /* Output warnings for
3350 goto NUM_ESCAPE_INSERT;
3353 /* eg. \x24 indicates the hex constant 0x24 */
3358 bool valid = grok_bslash_x(&s, &uv, &error,
3359 TRUE, /* Output warning */
3360 FALSE, /* Not strict */
3361 TRUE, /* Output warnings for
3371 /* Insert oct or hex escaped character. */
3373 /* Here uv is the ordinal of the next character being added */
3374 if (UVCHR_IS_INVARIANT(uv)) {
3378 if (!has_utf8 && uv > 255) {
3379 /* Might need to recode whatever we have accumulated so
3380 * far if it contains any chars variant in utf8 or
3383 SvCUR_set(sv, d - SvPVX_const(sv));
3386 /* See Note on sizing above. */
3387 sv_utf8_upgrade_flags_grow(
3389 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE
3390 /* Above-latin1 in string
3391 * implies no encoding */
3392 |SV_UTF8_NO_ENCODING,
3393 UVCHR_SKIP(uv) + (STRLEN)(send - s) + 1);
3394 d = SvPVX(sv) + SvCUR(sv);
3399 /* Usually, there will already be enough room in 'sv'
3400 * since such escapes are likely longer than any UTF-8
3401 * sequence they can end up as. This isn't the case on
3402 * EBCDIC where \x{40000000} contains 12 bytes, and the
3403 * UTF-8 for it contains 14. And, we have to allow for
3404 * a trailing NUL. It probably can't happen on ASCII
3405 * platforms, but be safe */
3406 const STRLEN needed = d - SvPVX(sv) + UVCHR_SKIP(uv)
3408 if (UNLIKELY(needed > SvLEN(sv))) {
3409 SvCUR_set(sv, d - SvPVX_const(sv));
3410 d = sv_grow(sv, needed) + SvCUR(sv);
3413 d = (char*)uvchr_to_utf8((U8*)d, uv);
3414 if (PL_lex_inwhat == OP_TRANS
3415 && PL_parser->lex_sub_op)
3417 PL_parser->lex_sub_op->op_private |=
3418 (PL_lex_repl ? OPpTRANS_FROM_UTF
3427 non_portable_endpoint++;
3432 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3433 * named character, like \N{LATIN SMALL LETTER A}, or a named
3434 * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3435 * GRAVE} (except y/// can't handle the latter, croaking). For
3436 * convenience all three forms are referred to as "named
3437 * characters" below.
3439 * For patterns, \N also can mean to match a non-newline. Code
3440 * before this 'switch' statement should already have handled
3441 * this situation, and hence this code only has to deal with
3442 * the named character cases.
3444 * For non-patterns, the named characters are converted to
3445 * their string equivalents. In patterns, named characters are
3446 * not converted to their ultimate forms for the same reasons
3447 * that other escapes aren't. Instead, they are converted to
3448 * the \N{U+...} form to get the value from the charnames that
3449 * is in effect right now, while preserving the fact that it
3450 * was a named character, so that the regex compiler knows
3453 * The structure of this section of code (besides checking for
3454 * errors and upgrading to utf8) is:
3455 * If the named character is of the form \N{U+...}, pass it
3456 * through if a pattern; otherwise convert the code point
3458 * Otherwise must be some \N{NAME}: convert to
3459 * \N{U+c1.c2...} if a pattern; otherwise convert to utf8
3461 * Transliteration is an exception. The conversion to utf8 is
3462 * only done if the code point requires it to be representable.
3464 * Here, 's' points to the 'N'; the test below is guaranteed to
3465 * succeed if we are being called on a pattern, as we already
3466 * know from a test above that the next character is a '{'. A
3467 * non-pattern \N must mean 'named character', which requires
3471 yyerror("Missing braces on \\N{}");
3476 /* If there is no matching '}', it is an error. */
3477 if (! (e = strchr(s, '}'))) {
3478 if (! PL_lex_inpat) {
3479 yyerror("Missing right brace on \\N{}");
3481 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3486 /* Here it looks like a named character */
3488 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3489 s += 2; /* Skip to next char after the 'U+' */
3492 /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3493 /* Check the syntax. */
3496 if (!isXDIGIT(*s)) {
3499 "Invalid hexadecimal number in \\N{U+...}"
3507 else if ((*s == '.' || *s == '_')
3513 /* Pass everything through unchanged.
3514 * +1 is for the '}' */
3515 Copy(orig_s, d, e - orig_s + 1, char);
3516 d += e - orig_s + 1;
3518 else { /* Not a pattern: convert the hex to string */
3519 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3520 | PERL_SCAN_SILENT_ILLDIGIT
3521 | PERL_SCAN_DISALLOW_PREFIX;
3523 uv = grok_hex(s, &len, &flags, NULL);
3524 if (len == 0 || (len != (STRLEN)(e - s)))
3527 /* For non-tr///, if the destination is not in utf8,
3528 * unconditionally recode it to be so. This is
3529 * because \N{} implies Unicode semantics, and scalars
3530 * have to be in utf8 to guarantee those semantics.
3531 * tr/// doesn't care about Unicode rules, so no need
3532 * there to upgrade to UTF-8 for small enough code
3534 if (! has_utf8 && ( uv > 0xFF
3535 || PL_lex_inwhat != OP_TRANS))
3537 SvCUR_set(sv, d - SvPVX_const(sv));
3540 /* See Note on sizing above. */
3541 sv_utf8_upgrade_flags_grow(
3543 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3544 OFFUNISKIP(uv) + (STRLEN)(send - e) + 1);
3545 d = SvPVX(sv) + SvCUR(sv);
3549 /* Add the (Unicode) code point to the output. */
3550 if (! has_utf8 || OFFUNI_IS_INVARIANT(uv)) {
3551 *d++ = (char) LATIN1_TO_NATIVE(uv);
3554 d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3558 else /* Here is \N{NAME} but not \N{U+...}. */
3559 if ((res = get_and_check_backslash_N_name(s, e)))
3562 const char *str = SvPV_const(res, len);
3565 if (! len) { /* The name resolved to an empty string */
3566 Copy("\\N{}", d, 4, char);
3570 /* In order to not lose information for the regex
3571 * compiler, pass the result in the specially made
3572 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3573 * the code points in hex of each character
3574 * returned by charnames */
3576 const char *str_end = str + len;
3577 const STRLEN off = d - SvPVX_const(sv);
3579 if (! SvUTF8(res)) {
3580 /* For the non-UTF-8 case, we can determine the
3581 * exact length needed without having to parse
3582 * through the string. Each character takes up
3583 * 2 hex digits plus either a trailing dot or
3585 const char initial_text[] = "\\N{U+";
3586 const STRLEN initial_len = sizeof(initial_text)
3588 d = off + SvGROW(sv, off
3591 /* +1 for trailing NUL */
3594 + (STRLEN)(send - e));
3595 Copy(initial_text, d, initial_len, char);
3597 while (str < str_end) {
3600 my_snprintf(hex_string,
3604 /* The regex compiler is
3605 * expecting Unicode, not
3607 NATIVE_TO_LATIN1(*str));
3608 PERL_MY_SNPRINTF_POST_GUARD(len,
3609 sizeof(hex_string));
3610 Copy(hex_string, d, 3, char);
3614 d--; /* Below, we will overwrite the final
3615 dot with a right brace */
3618 STRLEN char_length; /* cur char's byte length */
3620 /* and the number of bytes after this is
3621 * translated into hex digits */
3622 STRLEN output_length;
3624 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3625 * for max('U+', '.'); and 1 for NUL */
3626 char hex_string[2 * UTF8_MAXBYTES + 5];
3628 /* Get the first character of the result. */
3629 U32 uv = utf8n_to_uvchr((U8 *) str,
3633 /* Convert first code point to Unicode hex,
3634 * including the boiler plate before it. */
3636 my_snprintf(hex_string, sizeof(hex_string),
3638 (unsigned int) NATIVE_TO_UNI(uv));
3640 /* Make sure there is enough space to hold it */
3641 d = off + SvGROW(sv, off
3643 + (STRLEN)(send - e)
3644 + 2); /* '}' + NUL */
3646 Copy(hex_string, d, output_length, char);
3649 /* For each subsequent character, append dot and
3650 * its Unicode code point in hex */
3651 while ((str += char_length) < str_end) {
3652 const STRLEN off = d - SvPVX_const(sv);
3653 U32 uv = utf8n_to_uvchr((U8 *) str,
3658 my_snprintf(hex_string,
3661 (unsigned int) NATIVE_TO_UNI(uv));
3663 d = off + SvGROW(sv, off
3665 + (STRLEN)(send - e)
3666 + 2); /* '}' + NUL */
3667 Copy(hex_string, d, output_length, char);
3672 *d++ = '}'; /* Done. Add the trailing brace */
3675 else { /* Here, not in a pattern. Convert the name to a
3678 if (PL_lex_inwhat == OP_TRANS) {
3679 str = SvPV_const(res, len);
3680 if (len > ((SvUTF8(res))
3684 yyerror(Perl_form(aTHX_
3685 "%.*s must not be a named sequence"
3686 " in transliteration operator",
3687 /* +1 to include the "}" */
3688 (int) (e + 1 - start), start));
3689 goto end_backslash_N;
3692 else if (! SvUTF8(res)) {
3693 /* Make sure \N{} return is UTF-8. This is because
3694 * \N{} implies Unicode semantics, and scalars have
3695 * to be in utf8 to guarantee those semantics; but
3696 * not needed in tr/// */
3697 sv_utf8_upgrade_flags(res, SV_UTF8_NO_ENCODING);
3698 str = SvPV_const(res, len);
3701 /* Upgrade destination to be utf8 if this new
3703 if (! has_utf8 && SvUTF8(res)) {
3704 SvCUR_set(sv, d - SvPVX_const(sv));
3707 /* See Note on sizing above. */
3708 sv_utf8_upgrade_flags_grow(sv,
3709 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3710 len + (STRLEN)(send - s) + 1);
3711 d = SvPVX(sv) + SvCUR(sv);
3713 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3715 /* See Note on sizing above. (NOTE: SvCUR() is not
3716 * set correctly here). */
3717 const STRLEN off = d - SvPVX_const(sv);
3718 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3720 Copy(str, d, len, char);
3726 } /* End \N{NAME} */
3730 backslash_N++; /* \N{} is defined to be Unicode */
3732 s = e + 1; /* Point to just after the '}' */
3735 /* \c is a control character */
3739 *d++ = grok_bslash_c(*s++, 1);
3742 yyerror("Missing control char name in \\c");
3745 non_portable_endpoint++;
3749 /* printf-style backslashes, formfeeds, newlines, etc */
3775 } /* end if (backslash) */
3778 /* If we started with encoded form, or already know we want it,
3779 then encode the next character */
3780 if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3783 /* One might think that it is wasted effort in the case of the
3784 * source being utf8 (this_utf8 == TRUE) to take the next character
3785 * in the source, convert it to an unsigned value, and then convert
3786 * it back again. But the source has not been validated here. The
3787 * routine that does the conversion checks for errors like
3790 const UV nextuv = (this_utf8)
3791 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
3793 const STRLEN need = UVCHR_SKIP(nextuv);
3795 SvCUR_set(sv, d - SvPVX_const(sv));
3798 /* See Note on sizing above. */
3799 sv_utf8_upgrade_flags_grow(sv,
3800 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3801 need + (STRLEN)(send - s) + 1);
3802 d = SvPVX(sv) + SvCUR(sv);
3804 } else if (need > len) {
3805 /* encoded value larger than old, may need extra space (NOTE:
3806 * SvCUR() is not set correctly here). See Note on sizing
3808 const STRLEN off = d - SvPVX_const(sv);
3809 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3813 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3818 } /* while loop to process each character */
3820 /* terminate the string and set up the sv */
3822 SvCUR_set(sv, d - SvPVX_const(sv));
3823 if (SvCUR(sv) >= SvLEN(sv))
3824 Perl_croak(aTHX_ "panic: constant overflowed allocated space, %" UVuf
3825 " >= %" UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3830 if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
3831 PL_parser->lex_sub_op->op_private |=
3832 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3836 /* shrink the sv if we allocated more than we used */
3837 if (SvCUR(sv) + 5 < SvLEN(sv)) {
3838 SvPV_shrink_to_cur(sv);
3841 /* return the substring (via pl_yylval) only if we parsed anything */
3844 for (; s2 < s; s2++) {
3846 COPLINE_INC_WITH_HERELINES;
3848 SvREFCNT_inc_simple_void_NN(sv);
3849 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
3850 && ! PL_parser->lex_re_reparsing)
3852 const char *const key = PL_lex_inpat ? "qr" : "q";
3853 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3857 if (PL_lex_inwhat == OP_TRANS) {
3860 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3863 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3871 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3874 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
3876 LEAVE_with_name("scan_const");
3881 * Returns TRUE if there's more to the expression (e.g., a subscript),
3884 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3886 * ->[ and ->{ return TRUE
3887 * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
3888 * { and [ outside a pattern are always subscripts, so return TRUE
3889 * if we're outside a pattern and it's not { or [, then return FALSE
3890 * if we're in a pattern and the first char is a {
3891 * {4,5} (any digits around the comma) returns FALSE
3892 * if we're in a pattern and the first char is a [
3894 * [SOMETHING] has a funky algorithm to decide whether it's a
3895 * character class or not. It has to deal with things like
3896 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3897 * anything else returns TRUE
3900 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3903 S_intuit_more(pTHX_ char *s)
3905 PERL_ARGS_ASSERT_INTUIT_MORE;
3907 if (PL_lex_brackets)
3909 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3911 if (*s == '-' && s[1] == '>'
3912 && FEATURE_POSTDEREF_QQ_IS_ENABLED
3913 && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
3914 ||(s[2] == '@' && strchr("*[{",s[3])) ))
3916 if (*s != '{' && *s != '[')
3921 /* In a pattern, so maybe we have {n,m}. */
3929 /* On the other hand, maybe we have a character class */
3932 if (*s == ']' || *s == '^')
3935 /* this is terrifying, and it works */
3938 const char * const send = strchr(s,']');
3939 unsigned char un_char, last_un_char;
3940 char tmpbuf[sizeof PL_tokenbuf * 4];
3942 if (!send) /* has to be an expression */
3944 weight = 2; /* let's weigh the evidence */
3948 else if (isDIGIT(*s)) {
3950 if (isDIGIT(s[1]) && s[2] == ']')
3956 Zero(seen,256,char);
3958 for (; s < send; s++) {
3959 last_un_char = un_char;
3960 un_char = (unsigned char)*s;
3965 weight -= seen[un_char] * 10;
3966 if (isWORDCHAR_lazy_if(s+1,UTF)) {
3968 char *tmp = PL_bufend;
3969 PL_bufend = (char*)send;
3970 scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
3972 len = (int)strlen(tmpbuf);
3973 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3974 UTF ? SVf_UTF8 : 0, SVt_PV))
3981 && strchr("[#!%*<>()-=",s[1]))
3983 if (/*{*/ strchr("])} =",s[2]))
3992 if (strchr("wds]",s[1]))
3994 else if (seen[(U8)'\''] || seen[(U8)'"'])
3996 else if (strchr("rnftbxcav",s[1]))
3998 else if (isDIGIT(s[1])) {
4000 while (s[1] && isDIGIT(s[1]))
4010 if (strchr("aA01! ",last_un_char))
4012 if (strchr("zZ79~",s[1]))
4014 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4015 weight -= 5; /* cope with negative subscript */
4018 if (!isWORDCHAR(last_un_char)
4019 && !(last_un_char == '$' || last_un_char == '@'
4020 || last_un_char == '&')
4021 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4025 if (keyword(d, s - d, 0))
4028 if (un_char == last_un_char + 1)
4030 weight -= seen[un_char];
4035 if (weight >= 0) /* probably a character class */
4045 * Does all the checking to disambiguate
4047 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
4048 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4050 * First argument is the stuff after the first token, e.g. "bar".
4052 * Not a method if foo is a filehandle.
4053 * Not a method if foo is a subroutine prototyped to take a filehandle.
4054 * Not a method if it's really "Foo $bar"
4055 * Method if it's "foo $bar"
4056 * Not a method if it's really "print foo $bar"
4057 * Method if it's really "foo package::" (interpreted as package->foo)
4058 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4059 * Not a method if bar is a filehandle or package, but is quoted with
4064 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
4066 char *s = start + (*start == '$');
4067 char tmpbuf[sizeof PL_tokenbuf];
4070 /* Mustn't actually add anything to a symbol table.
4071 But also don't want to "initialise" any placeholder
4072 constants that might already be there into full
4073 blown PVGVs with attached PVCV. */
4075 ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
4077 PERL_ARGS_ASSERT_INTUIT_METHOD;
4079 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4081 if (cv && SvPOK(cv)) {
4082 const char *proto = CvPROTO(cv);
4084 while (*proto && (isSPACE(*proto) || *proto == ';'))
4091 if (*start == '$') {
4092 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
4093 || isUPPER(*PL_tokenbuf))
4098 return *s == '(' ? FUNCMETH : METHOD;
4101 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4102 /* start is the beginning of the possible filehandle/object,
4103 * and s is the end of it
4104 * tmpbuf is a copy of it (but with single quotes as double colons)
4107 if (!keyword(tmpbuf, len, 0)) {
4108 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4113 indirgv = gv_fetchpvn_flags(tmpbuf, len,
4114 GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
4116 if (indirgv && SvTYPE(indirgv) != SVt_NULL
4117 && (!isGV(indirgv) || GvCVu(indirgv)))
4119 /* filehandle or package name makes it a method */
4120 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4122 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4123 return 0; /* no assumptions -- "=>" quotes bareword */
4125 NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0,
4126 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4127 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4129 force_next(BAREWORD);
4131 return *s == '(' ? FUNCMETH : METHOD;
4137 /* Encoded script support. filter_add() effectively inserts a
4138 * 'pre-processing' function into the current source input stream.
4139 * Note that the filter function only applies to the current source file
4140 * (e.g., it will not affect files 'require'd or 'use'd by this one).
4142 * The datasv parameter (which may be NULL) can be used to pass
4143 * private data to this instance of the filter. The filter function
4144 * can recover the SV using the FILTER_DATA macro and use it to
4145 * store private buffers and state information.
4147 * The supplied datasv parameter is upgraded to a PVIO type
4148 * and the IoDIRP/IoANY field is used to store the function pointer,
4149 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4150 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4151 * private use must be set using malloc'd pointers.
4155 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4163 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4164 Perl_croak(aTHX_ "Source filters apply only to byte streams");
4166 if (!PL_rsfp_filters)
4167 PL_rsfp_filters = newAV();
4170 SvUPGRADE(datasv, SVt_PVIO);
4171 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4172 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4173 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4174 FPTR2DPTR(void *, IoANY(datasv)),
4175 SvPV_nolen(datasv)));
4176 av_unshift(PL_rsfp_filters, 1);
4177 av_store(PL_rsfp_filters, 0, datasv) ;
4179 !PL_parser->filtered
4180 && PL_parser->lex_flags & LEX_EVALBYTES
4181 && PL_bufptr < PL_bufend
4183 const char *s = PL_bufptr;
4184 while (s < PL_bufend) {
4186 SV *linestr = PL_parser->linestr;
4187 char *buf = SvPVX(linestr);
4188 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4189 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4190 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4191 STRLEN const linestart_pos = PL_parser->linestart - buf;
4192 STRLEN const last_uni_pos =
4193 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4194 STRLEN const last_lop_pos =
4195 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4196 av_push(PL_rsfp_filters, linestr);
4197 PL_parser->linestr =
4198 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4199 buf = SvPVX(PL_parser->linestr);
4200 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4201 PL_parser->bufptr = buf + bufptr_pos;
4202 PL_parser->oldbufptr = buf + oldbufptr_pos;
4203 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4204 PL_parser->linestart = buf + linestart_pos;
4205 if (PL_parser->last_uni)
4206 PL_parser->last_uni = buf + last_uni_pos;
4207 if (PL_parser->last_lop)
4208 PL_parser->last_lop = buf + last_lop_pos;
4209 SvLEN(linestr) = SvCUR(linestr);
4210 SvCUR(linestr) = s-SvPVX(linestr);
4211 PL_parser->filtered = 1;
4221 /* Delete most recently added instance of this filter function. */
4223 Perl_filter_del(pTHX_ filter_t funcp)
4227 PERL_ARGS_ASSERT_FILTER_DEL;
4230 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4231 FPTR2DPTR(void*, funcp)));
4233 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4235 /* if filter is on top of stack (usual case) just pop it off */
4236 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4237 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4238 sv_free(av_pop(PL_rsfp_filters));
4242 /* we need to search for the correct entry and clear it */
4243 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4247 /* Invoke the idxth filter function for the current rsfp. */
4248 /* maxlen 0 = read one text line */
4250 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4254 /* This API is bad. It should have been using unsigned int for maxlen.
4255 Not sure if we want to change the API, but if not we should sanity
4256 check the value here. */
4257 unsigned int correct_length = maxlen < 0 ? PERL_INT_MAX : maxlen;
4259 PERL_ARGS_ASSERT_FILTER_READ;
4261 if (!PL_parser || !PL_rsfp_filters)
4263 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
4264 /* Provide a default input filter to make life easy. */
4265 /* Note that we append to the line. This is handy. */
4266 DEBUG_P(PerlIO_printf(Perl_debug_log,
4267 "filter_read %d: from rsfp\n", idx));
4268 if (correct_length) {
4271 const int old_len = SvCUR(buf_sv);
4273 /* ensure buf_sv is large enough */
4274 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4275 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4276 correct_length)) <= 0) {
4277 if (PerlIO_error(PL_rsfp))
4278 return -1; /* error */
4280 return 0 ; /* end of file */
4282 SvCUR_set(buf_sv, old_len + len) ;
4283 SvPVX(buf_sv)[old_len + len] = '\0';
4286 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4287 if (PerlIO_error(PL_rsfp))
4288 return -1; /* error */
4290 return 0 ; /* end of file */
4293 return SvCUR(buf_sv);
4295 /* Skip this filter slot if filter has been deleted */
4296 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4297 DEBUG_P(PerlIO_printf(Perl_debug_log,
4298 "filter_read %d: skipped (filter deleted)\n",
4300 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4302 if (SvTYPE(datasv) != SVt_PVIO) {
4303 if (correct_length) {
4305 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4306 if (!remainder) return 0; /* eof */
4307 if (correct_length > remainder) correct_length = remainder;
4308 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4309 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4312 const char *s = SvEND(datasv);
4313 const char *send = SvPVX(datasv) + SvLEN(datasv);
4321 if (s == send) return 0; /* eof */
4322 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4323 SvCUR_set(datasv, s-SvPVX(datasv));
4325 return SvCUR(buf_sv);
4327 /* Get function pointer hidden within datasv */
4328 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4329 DEBUG_P(PerlIO_printf(Perl_debug_log,
4330 "filter_read %d: via function %p (%s)\n",
4331 idx, (void*)datasv, SvPV_nolen_const(datasv)));
4332 /* Call function. The function is expected to */
4333 /* call "FILTER_READ(idx+1, buf_sv)" first. */
4334 /* Return: <0:error, =0:eof, >0:not eof */
4335 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4339 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4341 PERL_ARGS_ASSERT_FILTER_GETS;
4343 #ifdef PERL_CR_FILTER
4344 if (!PL_rsfp_filters) {
4345 filter_add(S_cr_textfilter,NULL);
4348 if (PL_rsfp_filters) {
4350 SvCUR_set(sv, 0); /* start with empty line */
4351 if (FILTER_READ(0, sv, 0) > 0)
4352 return ( SvPVX(sv) ) ;
4357 return (sv_gets(sv, PL_rsfp, append));
4361 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4365 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4367 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4371 && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
4372 && (gv = gv_fetchpvn_flags(pkgname,
4374 ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4376 return GvHV(gv); /* Foo:: */
4379 /* use constant CLASS => 'MyClass' */
4380 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4381 if (gv && GvCV(gv)) {
4382 SV * const sv = cv_const_sv(GvCV(gv));
4384 return gv_stashsv(sv, 0);
4387 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4392 S_tokenize_use(pTHX_ int is_use, char *s) {
4393 PERL_ARGS_ASSERT_TOKENIZE_USE;
4395 if (PL_expect != XSTATE)
4396 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4397 is_use ? "use" : "no"));
4400 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4401 s = force_version(s, TRUE);
4402 if (*s == ';' || *s == '}'
4403 || (s = skipspace(s), (*s == ';' || *s == '}'))) {
4404 NEXTVAL_NEXTTOKE.opval = NULL;
4405 force_next(BAREWORD);
4407 else if (*s == 'v') {
4408 s = force_word(s,BAREWORD,FALSE,TRUE);
4409 s = force_version(s, FALSE);
4413 s = force_word(s,BAREWORD,FALSE,TRUE);
4414 s = force_version(s, FALSE);
4416 pl_yylval.ival = is_use;
4420 static const char* const exp_name[] =
4421 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4422 "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
4423 "SIGVAR", "TERMORDORDOR"
4427 #define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
4429 S_word_takes_any_delimiter(char *p, STRLEN len)
4431 return (len == 1 && strchr("msyq", p[0]))
4433 && ((p[0] == 't' && p[1] == 'r')
4434 || (p[0] == 'q' && strchr("qwxr", p[1]))));
4438 S_check_scalar_slice(pTHX_ char *s)
4441 while (*s == ' ' || *s == '\t') s++;
4442 if (*s == 'q' && s[1] == 'w'
4443 && !isWORDCHAR_lazy_if(s+2,UTF))
4445 while (*s && (isWORDCHAR_lazy_if(s,UTF) || strchr(" \t$#+-'\"", *s)))
4446 s += UTF ? UTF8SKIP(s) : 1;
4447 if (*s == '}' || *s == ']')
4448 pl_yylval.ival = OPpSLICEWARNING;
4451 #define lex_token_boundary() S_lex_token_boundary(aTHX)
4453 S_lex_token_boundary(pTHX)
4455 PL_oldoldbufptr = PL_oldbufptr;
4456 PL_oldbufptr = PL_bufptr;
4459 #define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s)
4461 S_vcs_conflict_marker(pTHX_ char *s)
4463 lex_token_boundary();
4465 yyerror("Version control conflict marker");
4466 while (s < PL_bufend && *s != '\n')
4474 Works out what to call the token just pulled out of the input
4475 stream. The yacc parser takes care of taking the ops we return and
4476 stitching them into a tree.
4479 The type of the next token
4482 Check if we have already built the token; if so, use it.
4483 Switch based on the current state:
4484 - if we have a case modifier in a string, deal with that
4485 - handle other cases of interpolation inside a string
4486 - scan the next line if we are inside a format
4487 In the normal state, switch on the next character:
4489 if alphabetic, go to key lookup
4490 unrecognized character - croak
4491 - 0/4/26: handle end-of-line or EOF
4492 - cases for whitespace
4493 - \n and #: handle comments and line numbers
4494 - various operators, brackets and sigils
4497 - 'v': vstrings (or go to key lookup)
4498 - 'x' repetition operator (or go to key lookup)
4499 - other ASCII alphanumerics (key lookup begins here):
4502 scan built-in keyword (but do nothing with it yet)
4503 check for statement label
4504 check for lexical subs
4505 goto just_a_word if there is one
4506 see whether built-in keyword is overridden
4507 switch on keyword number:
4508 - default: just_a_word:
4509 not a built-in keyword; handle bareword lookup
4510 disambiguate between method and sub call
4511 fall back to bareword
4512 - cases for built-in keywords
4520 char *s = PL_bufptr;
4524 const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil);
4528 /* orig_keyword, gvp, and gv are initialized here because
4529 * jump to the label just_a_word_zero can bypass their
4530 * initialization later. */
4531 I32 orig_keyword = 0;
4536 SV* tmp = newSVpvs("");
4537 PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n",
4538 (IV)CopLINE(PL_curcop),
4539 lex_state_names[PL_lex_state],
4540 exp_name[PL_expect],
4541 pv_display(tmp, s, strlen(s), 0, 60));
4545 /* when we've already built the next token, just pull it out of the queue */
4548 pl_yylval = PL_nextval[PL_nexttoke];
4551 next_type = PL_nexttype[PL_nexttoke];
4552 if (next_type & (7<<24)) {
4553 if (next_type & (1<<24)) {
4554 if (PL_lex_brackets > 100)
4555 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4556 PL_lex_brackstack[PL_lex_brackets++] =
4557 (char) ((next_type >> 16) & 0xff);
4559 if (next_type & (2<<24))
4560 PL_lex_allbrackets++;
4561 if (next_type & (4<<24))
4562 PL_lex_allbrackets--;
4563 next_type &= 0xffff;
4565 return REPORT(next_type == 'p' ? pending_ident() : next_type);
4569 switch (PL_lex_state) {
4571 case LEX_INTERPNORMAL:
4574 /* interpolated case modifiers like \L \U, including \Q and \E.
4575 when we get here, PL_bufptr is at the \
4577 case LEX_INTERPCASEMOD:
4579 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4581 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4582 PL_bufptr, PL_bufend, *PL_bufptr);
4584 /* handle \E or end of string */
4585 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4587 if (PL_lex_casemods) {
4588 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4589 PL_lex_casestack[PL_lex_casemods] = '\0';
4591 if (PL_bufptr != PL_bufend
4592 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4593 || oldmod == 'F')) {
4595 PL_lex_state = LEX_INTERPCONCAT;
4597 PL_lex_allbrackets--;
4600 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4601 /* Got an unpaired \E */
4602 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4603 "Useless use of \\E");
4605 if (PL_bufptr != PL_bufend)
4607 PL_lex_state = LEX_INTERPCONCAT;
4611 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4612 "### Saw case modifier\n"); });
4614 if (s[1] == '\\' && s[2] == 'E') {
4616 PL_lex_state = LEX_INTERPCONCAT;
4621 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4622 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
4623 if ((*s == 'L' || *s == 'U' || *s == 'F')
4624 && (strchr(PL_lex_casestack, 'L')
4625 || strchr(PL_lex_casestack, 'U')
4626 || strchr(PL_lex_casestack, 'F')))
4628 PL_lex_casestack[--PL_lex_casemods] = '\0';
4629 PL_lex_allbrackets--;
4632 if (PL_lex_casemods > 10)
4633 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4634 PL_lex_casestack[PL_lex_casemods++] = *s;
4635 PL_lex_casestack[PL_lex_casemods] = '\0';
4636 PL_lex_state = LEX_INTERPCONCAT;
4637 NEXTVAL_NEXTTOKE.ival = 0;
4638 force_next((2<<24)|'(');
4640 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4642 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4644 NEXTVAL_NEXTTOKE.ival = OP_LC;
4646 NEXTVAL_NEXTTOKE.ival = OP_UC;
4648 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4650 NEXTVAL_NEXTTOKE.ival = OP_FC;
4652 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4656 if (PL_lex_starts) {
4659 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4660 if (PL_lex_casemods == 1 && PL_lex_inpat)
4663 AopNOASSIGN(OP_CONCAT);
4669 case LEX_INTERPPUSH:
4670 return REPORT(sublex_push());
4672 case LEX_INTERPSTART:
4673 if (PL_bufptr == PL_bufend)
4674 return REPORT(sublex_done());
4675 DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4676 "### Interpolated variable\n"); });
4678 /* for /@a/, we leave the joining for the regex engine to do
4679 * (unless we're within \Q etc) */
4680 PL_lex_dojoin = (*PL_bufptr == '@'
4681 && (!PL_lex_inpat || PL_lex_casemods));
4682 PL_lex_state = LEX_INTERPNORMAL;
4683 if (PL_lex_dojoin) {
4684 NEXTVAL_NEXTTOKE.ival = 0;
4686 force_ident("\"", '$');
4687 NEXTVAL_NEXTTOKE.ival = 0;
4689 NEXTVAL_NEXTTOKE.ival = 0;
4690 force_next((2<<24)|'(');
4691 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
4694 /* Convert (?{...}) and friends to 'do {...}' */
4695 if (PL_lex_inpat && *PL_bufptr == '(') {
4696 PL_parser->lex_shared->re_eval_start = PL_bufptr;
4698 if (*PL_bufptr != '{')
4700 PL_expect = XTERMBLOCK;
4704 if (PL_lex_starts++) {
4706 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4707 if (!PL_lex_casemods && PL_lex_inpat)
4710 AopNOASSIGN(OP_CONCAT);
4714 case LEX_INTERPENDMAYBE:
4715 if (intuit_more(PL_bufptr)) {
4716 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
4722 if (PL_lex_dojoin) {
4723 const U8 dojoin_was = PL_lex_dojoin;
4724 PL_lex_dojoin = FALSE;
4725 PL_lex_state = LEX_INTERPCONCAT;
4726 PL_lex_allbrackets--;
4727 return REPORT(dojoin_was == 1 ? (int)')' : (int)POSTJOIN);
4729 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4730 && SvEVALED(PL_lex_repl))
4732 if (PL_bufptr != PL_bufend)
4733 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4736 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
4737 re_eval_str. If the here-doc body’s length equals the previous
4738 value of re_eval_start, re_eval_start will now be null. So
4739 check re_eval_str as well. */
4740 if (PL_parser->lex_shared->re_eval_start
4741 || PL_parser->lex_shared->re_eval_str) {
4743 if (*PL_bufptr != ')')
4744 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
4746 /* having compiled a (?{..}) expression, return the original
4747 * text too, as a const */
4748 if (PL_parser->lex_shared->re_eval_str) {
4749 sv = PL_parser->lex_shared->re_eval_str;
4750 PL_parser->lex_shared->re_eval_str = NULL;
4752 PL_bufptr - PL_parser->lex_shared->re_eval_start);
4753 SvPV_shrink_to_cur(sv);
4755 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
4756 PL_bufptr - PL_parser->lex_shared->re_eval_start);
4757 NEXTVAL_NEXTTOKE.opval =
4758 newSVOP(OP_CONST, 0,
4761 PL_parser->lex_shared->re_eval_start = NULL;
4767 case LEX_INTERPCONCAT:
4769 if (PL_lex_brackets)
4770 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
4771 (long) PL_lex_brackets);
4773 if (PL_bufptr == PL_bufend)
4774 return REPORT(sublex_done());
4776 /* m'foo' still needs to be parsed for possible (?{...}) */
4777 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
4778 SV *sv = newSVsv(PL_linestr);
4780 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
4784 s = scan_const(PL_bufptr);
4786 PL_lex_state = LEX_INTERPCASEMOD;
4788 PL_lex_state = LEX_INTERPSTART;
4791 if (s != PL_bufptr) {
4792 NEXTVAL_NEXTTOKE = pl_yylval;
4795 if (PL_lex_starts++) {
4796 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4797 if (!PL_lex_casemods && PL_lex_inpat)
4800 AopNOASSIGN(OP_CONCAT);
4810 s = scan_formline(PL_bufptr);
4811 if (!PL_lex_formbrack)
4820 /* We really do *not* want PL_linestr ever becoming a COW. */
4821 assert (!SvIsCOW(PL_linestr));
4823 PL_oldoldbufptr = PL_oldbufptr;
4825 PL_parser->saw_infix_sigil = 0;
4827 if (PL_in_my == KEY_sigvar) {
4828 /* we expect the sigil and optional var name part of a
4829 * signature element here. Since a '$' is not necessarily
4830 * followed by a var name, handle it specially here; the general
4831 * yylex code would otherwise try to interpret whatever follows
4832 * as a var; e.g. ($, ...) would be seen as the var '$,'
4839 PL_bufptr = s; /* for error reporting */
4844 /* spot stuff that looks like an prototype */
4845 if (strchr("$:@%&*;\\[]", *s)) {
4846 yyerror("Illegal character following sigil in a subroutine signature");
4849 /* '$#' is banned, while '$ # comment' isn't */
4851 yyerror("'#' not allowed immediately following a sigil in a subroutine signature");
4855 if (isIDFIRST_lazy_if(s, UTF)) {
4856 char *dest = PL_tokenbuf + 1;
4857 /* read var name, including sigil, into PL_tokenbuf */
4858 PL_tokenbuf[0] = sigil;
4859 parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
4860 0, cBOOL(UTF), FALSE);
4862 assert(PL_tokenbuf[1]); /* we have a variable name */
4863 NEXTVAL_NEXTTOKE.ival = sigil;
4864 force_next('p'); /* force a signature pending identifier */
4868 PL_expect = XOPERATOR;
4874 case ',': /* handle ($a,,$b) */
4879 yyerror("A signature parameter must start with '$', '@' or '%'");
4880 /* very crude error recovery: skip to likely next signature
4882 while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
4893 if (! isUTF8_CHAR((U8 *) s, (U8 *) PL_bufend)) {
4895 SAVESPTR(PL_warnhook);
4896 PL_warnhook = PERL_WARNHOOK_FATAL;
4897 utf8n_to_uvchr((U8*)s, PL_bufend-s, NULL, 0);
4900 if (isIDFIRST_utf8((U8*)s)) {
4904 else if (isALNUMC(*s)) {
4908 SV *dsv = newSVpvs_flags("", SVs_TEMP);
4911 STRLEN skiplen = UTF8SKIP(s);
4912 STRLEN stravail = PL_bufend - s;
4913 c = sv_uni_display(dsv, newSVpvn_flags(s,
4914 skiplen > stravail ? stravail : skiplen,
4915 SVs_TEMP | SVf_UTF8),
4916 10, UNI_DISPLAY_ISPRINT);
4919 c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
4921 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4922 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4923 d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)PL_linestart) : s - UNRECOGNIZED_PRECEDE_COUNT;
4927 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
4928 UTF8fARG(UTF, (s - d), d),
4933 goto fake_eof; /* emulate EOF on ^D or ^Z */
4935 if ((!PL_rsfp || PL_lex_inwhat)
4936 && (!PL_parser->filtered || s+1 < PL_bufend)) {
4940 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
4942 yyerror((const char *)
4944 ? "Format not terminated"
4945 : "Missing right curly or square bracket"));
4947 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4948 "### Tokener got EOF\n");
4952 if (s++ < PL_bufend)
4953 goto retry; /* ignore stray nulls */
4956 if (!PL_in_eval && !PL_preambled) {
4957 PL_preambled = TRUE;
4959 /* Generate a string of Perl code to load the debugger.
4960 * If PERL5DB is set, it will return the contents of that,
4961 * otherwise a compile-time require of perl5db.pl. */
4963 const char * const pdb = PerlEnv_getenv("PERL5DB");
4966 sv_setpv(PL_linestr, pdb);
4967 sv_catpvs(PL_linestr,";");
4969 SETERRNO(0,SS_NORMAL);
4970 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4972 PL_parser->preambling = CopLINE(PL_curcop);
4974 SvPVCLEAR(PL_linestr);
4975 if (PL_preambleav) {
4976 SV **svp = AvARRAY(PL_preambleav);
4977 SV **const end = svp + AvFILLp(PL_preambleav);
4979 sv_catsv(PL_linestr, *svp);
4981 sv_catpvs(PL_linestr, ";");
4983 sv_free(MUTABLE_SV(PL_preambleav));
4984 PL_preambleav = NULL;
4987 sv_catpvs(PL_linestr,
4988 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4989 if (PL_minus_n || PL_minus_p) {
4990 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4992 sv_catpvs(PL_linestr,"chomp;");
4995 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4996 || *PL_splitstr == '"')
4997 && strchr(PL_splitstr + 1, *PL_splitstr))
4998 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
5000 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
5001 bytes can be used as quoting characters. :-) */
5002 const char *splits = PL_splitstr;
5003 sv_catpvs(PL_linestr, "our @F=split(q\0");
5006 if (*splits == '\\')
5007 sv_catpvn(PL_linestr, splits, 1);
5008 sv_catpvn(PL_linestr, splits, 1);
5009 } while (*splits++);
5010 /* This loop will embed the trailing NUL of
5011 PL_linestr as the last thing it does before
5013 sv_catpvs(PL_linestr, ");");
5017 sv_catpvs(PL_linestr,"our @F=split(' ');");
5020 sv_catpvs(PL_linestr, "\n");
5021 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5022 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5023 PL_last_lop = PL_last_uni = NULL;
5024 if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
5025 update_debugger_info(PL_linestr, NULL, 0);
5030 bof = PL_rsfp ? TRUE : FALSE;
5033 fake_eof = LEX_FAKE_EOF;
5035 PL_bufptr = PL_bufend;
5036 COPLINE_INC_WITH_HERELINES;
5037 if (!lex_next_chunk(fake_eof)) {
5038 CopLINE_dec(PL_curcop);
5040 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
5042 CopLINE_dec(PL_curcop);
5044 /* If it looks like the start of a BOM or raw UTF-16,
5045 * check if it in fact is. */
5048 || *(U8*)s == BOM_UTF8_FIRST_BYTE
5052 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
5053 bof = (offset == (Off_t)SvCUR(PL_linestr));
5054 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5055 /* offset may include swallowed CR */
5057 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
5060 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5061 s = swallow_bom((U8*)s);
5064 if (PL_parser->in_pod) {
5065 /* Incest with pod. */
5066 if (*s == '=' && strEQs(s, "=cut") && !isALPHA(s[4])) {
5067 SvPVCLEAR(PL_linestr);
5068 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5069 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5070 PL_last_lop = PL_last_uni = NULL;
5071 PL_parser->in_pod = 0;
5074 if (PL_rsfp || PL_parser->filtered)
5076 } while (PL_parser->in_pod);
5077 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5078 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5079 PL_last_lop = PL_last_uni = NULL;
5080 if (CopLINE(PL_curcop) == 1) {
5081 while (s < PL_bufend && isSPACE(*s))
5083 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5087 if (*s == '#' && *(s+1) == '!')
5089 #ifdef ALTERNATE_SHEBANG
5091 static char const as[] = ALTERNATE_SHEBANG;
5092 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5093 d = s + (sizeof(as) - 1);
5095 #endif /* ALTERNATE_SHEBANG */
5104 while (*d && !isSPACE(*d))
5108 #ifdef ARG_ZERO_IS_SCRIPT
5109 if (ipathend > ipath) {
5111 * HP-UX (at least) sets argv[0] to the script name,
5112 * which makes $^X incorrect. And Digital UNIX and Linux,
5113 * at least, set argv[0] to the basename of the Perl
5114 * interpreter. So, having found "#!", we'll set it right.
5116 SV* copfilesv = CopFILESV(PL_curcop);
5119 GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5121 assert(SvPOK(x) || SvGMAGICAL(x));
5122 if (sv_eq(x, copfilesv)) {
5123 sv_setpvn(x, ipath, ipathend - ipath);
5129 const char *bstart = SvPV_const(copfilesv, blen);
5130 const char * const lstart = SvPV_const(x, llen);
5132 bstart += blen - llen;
5133 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5134 sv_setpvn(x, ipath, ipathend - ipath);
5141 /* Anything to do if no copfilesv? */
5143 TAINT_NOT; /* $^X is always tainted, but that's OK */
5145 #endif /* ARG_ZERO_IS_SCRIPT */
5150 d = instr(s,"perl -");
5152 d = instr(s,"perl");
5153 if (d && d[4] == '6')
5156 /* avoid getting into infinite loops when shebang
5157 * line contains "Perl" rather than "perl" */
5159 for (d = ipathend-4; d >= ipath; --d) {
5160 if (isALPHA_FOLD_EQ(*d, 'p')
5161 && !ibcmp(d, "perl", 4))
5171 #ifdef ALTERNATE_SHEBANG
5173 * If the ALTERNATE_SHEBANG on this system starts with a
5174 * character that can be part of a Perl expression, then if
5175 * we see it but not "perl", we're probably looking at the
5176 * start of Perl code, not a request to hand off to some
5177 * other interpreter. Similarly, if "perl" is there, but
5178 * not in the first 'word' of the line, we assume the line
5179 * contains the start of the Perl program.
5181 if (d && *s != '#') {
5182 const char *c = ipath;
5183 while (*c && !strchr("; \t\r\n\f\v#", *c))
5186 d = NULL; /* "perl" not in first word; ignore */
5188 *s = '#'; /* Don't try to parse shebang line */
5190 #endif /* ALTERNATE_SHEBANG */
5195 && !instr(s,"indir")
5196 && instr(PL_origargv[0],"perl"))
5203 while (s < PL_bufend && isSPACE(*s))
5205 if (s < PL_bufend) {
5206 Newx(newargv,PL_origargc+3,char*);
5208 while (s < PL_bufend && !isSPACE(*s))
5211 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5214 newargv = PL_origargv;
5217 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5219 Perl_croak(aTHX_ "Can't exec %s", ipath);
5222 while (*d && !isSPACE(*d))
5224 while (SPACE_OR_TAB(*d))
5228 const bool switches_done = PL_doswitches;
5229 const U32 oldpdb = PL_perldb;
5230 const bool oldn = PL_minus_n;
5231 const bool oldp = PL_minus_p;
5235 bool baduni = FALSE;
5237 const char *d2 = d1 + 1;
5238 if (parse_unicode_opts((const char **)&d2)
5242 if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
5243 const char * const m = d1;
5244 while (*d1 && !isSPACE(*d1))
5246 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5249 d1 = moreswitches(d1);
5251 if (PL_doswitches && !switches_done) {
5252 int argc = PL_origargc;
5253 char **argv = PL_origargv;
5256 } while (argc && argv[0][0] == '-' && argv[0][1]);
5257 init_argv_symbols(argc,argv);
5259 if ( (PERLDB_LINE_OR_SAVESRC && !oldpdb)
5260 || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5261 /* if we have already added "LINE: while (<>) {",
5262 we must not do it again */
5264 SvPVCLEAR(PL_linestr);
5265 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5266 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5267 PL_last_lop = PL_last_uni = NULL;
5268 PL_preambled = FALSE;
5269 if (PERLDB_LINE_OR_SAVESRC)
5270 (void)gv_fetchfile(PL_origfilename);
5277 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5278 PL_lex_state = LEX_FORMLINE;
5279 force_next(FORMRBRACK);
5284 #ifdef PERL_STRICT_CR
5285 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5287 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5289 case ' ': case '\t': case '\f': case '\v':
5294 if (PL_lex_state != LEX_NORMAL
5295 || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
5297 const bool in_comment = *s == '#';
5298 if (*s == '#' && s == PL_linestart && PL_in_eval
5299 && !PL_rsfp && !PL_parser->filtered) {
5300 /* handle eval qq[#line 1 "foo"\n ...] */
5301 CopLINE_dec(PL_curcop);
5305 while (d < PL_bufend && *d != '\n')
5309 else if (d > PL_bufend)
5310 /* Found by Ilya: feed random input to Perl. */
5311 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5314 if (in_comment && d == PL_bufend
5315 && PL_lex_state == LEX_INTERPNORMAL
5316 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5317 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5320 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5321 PL_lex_state = LEX_FORMLINE;
5322 force_next(FORMRBRACK);
5327 while (s < PL_bufend && *s != '\n')
5335 else if (s > PL_bufend)
5336 /* Found by Ilya: feed random input to Perl. */
5337 Perl_croak(aTHX_ "panic: input overflow");
5341 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5349 while (s < PL_bufend && SPACE_OR_TAB(*s))
5352 if (strEQs(s,"=>")) {
5353 s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE);
5354 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5355 OPERATOR('-'); /* unary minus */
5358 case 'r': ftst = OP_FTEREAD; break;
5359 case 'w': ftst = OP_FTEWRITE; break;
5360 case 'x': ftst = OP_FTEEXEC; break;
5361 case 'o': ftst = OP_FTEOWNED; break;
5362 case 'R': ftst = OP_FTRREAD; break;
5363 case 'W': ftst = OP_FTRWRITE; break;
5364 case 'X': ftst = OP_FTREXEC; break;
5365 case 'O': ftst = OP_FTROWNED; break;
5366 case 'e': ftst = OP_FTIS; break;
5367 case 'z': ftst = OP_FTZERO; break;
5368 case 's': ftst = OP_FTSIZE; break;
5369 case 'f': ftst = OP_FTFILE; break;
5370 case 'd': ftst = OP_FTDIR; break;
5371 case 'l': ftst = OP_FTLINK; break;
5372 case 'p': ftst = OP_FTPIPE; break;
5373 case 'S': ftst = OP_FTSOCK; break;
5374 case 'u': ftst = OP_FTSUID; break;
5375 case 'g': ftst = OP_FTSGID; break;
5376 case 'k': ftst = OP_FTSVTX; break;
5377 case 'b': ftst = OP_FTBLK; break;
5378 case 'c': ftst = OP_FTCHR; break;
5379 case 't': ftst = OP_FTTTY; break;
5380 case 'T': ftst = OP_FTTEXT; break;
5381 case 'B': ftst = OP_FTBINARY; break;
5382 case 'M': case 'A': case 'C':
5383 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5385 case 'M': ftst = OP_FTMTIME; break;
5386 case 'A': ftst = OP_FTATIME; break;
5387 case 'C': ftst = OP_FTCTIME; break;
5395 PL_last_uni = PL_oldbufptr;
5396 PL_last_lop_op = (OPCODE)ftst;
5397 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5398 "### Saw file test %c\n", (int)tmp);
5403 /* Assume it was a minus followed by a one-letter named
5404 * subroutine call (or a -bareword), then. */
5405 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5406 "### '-%c' looked like a file test but was not\n",
5413 const char tmp = *s++;
5416 if (PL_expect == XOPERATOR)
5421 else if (*s == '>') {
5424 if (((*s == '$' || *s == '&') && s[1] == '*')
5425 ||(*s == '$' && s[1] == '#' && s[2] == '*')
5426 ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
5427 ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5430 PL_expect = XPOSTDEREF;
5433 if (isIDFIRST_lazy_if(s,UTF)) {
5434 s = force_word(s,METHOD,FALSE,TRUE);
5442 if (PL_expect == XOPERATOR) {
5444 && !PL_lex_allbrackets
5445 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5453 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5455 OPERATOR('-'); /* unary minus */
5461 const char tmp = *s++;
5464 if (PL_expect == XOPERATOR)
5469 if (PL_expect == XOPERATOR) {
5471 && !PL_lex_allbrackets
5472 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5480 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5487 if (PL_expect == XPOSTDEREF) POSTDEREF('*');
5488 if (PL_expect != XOPERATOR) {
5489 s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5490 PL_expect = XOPERATOR;
5491 force_ident(PL_tokenbuf, '*');
5499 if (*s == '=' && !PL_lex_allbrackets
5500 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5508 && !PL_lex_allbrackets
5509 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5514 PL_parser->saw_infix_sigil = 1;
5519 if (PL_expect == XOPERATOR) {
5521 && !PL_lex_allbrackets
5522 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5527 PL_parser->saw_infix_sigil = 1;
5530 else if (PL_expect == XPOSTDEREF) POSTDEREF('%');
5531 PL_tokenbuf[0] = '%';
5532 s = scan_ident(s, PL_tokenbuf + 1,
5533 sizeof PL_tokenbuf - 1, FALSE);
5535 if (!PL_tokenbuf[1]) {
5538 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5540 PL_tokenbuf[0] = '@';
5542 PL_expect = XOPERATOR;
5543 force_ident_maybe_lex('%');
5548 bof = FEATURE_BITWISE_IS_ENABLED;
5549 if (bof && s[1] == '.')
5551 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5552 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5558 BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
5560 if (PL_lex_brackets > 100)
5561 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5562 PL_lex_brackstack[PL_lex_brackets++] = 0;
5563 PL_lex_allbrackets++;
5565 const char tmp = *s++;
5570 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5572 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5575 Perl_ck_warner_d(aTHX_
5576 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5577 "Smartmatch is experimental");
5581 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
5583 BCop(OP_SCOMPLEMENT);
5585 BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
5587 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5594 goto just_a_word_zero_gv;
5600 switch (PL_expect) {
5602 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5604 PL_bufptr = s; /* update in case we back off */
5607 "Use of := for an empty attribute list is not allowed");
5614 PL_expect = XTERMBLOCK;
5618 while (isIDFIRST_lazy_if(s,UTF)) {
5621 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5622 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5623 if (tmp < 0) tmp = -tmp;
5638 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5640 d = scan_str(d,TRUE,TRUE,FALSE,NULL);
5645 Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list");
5647 COPLINE_SET_FROM_MULTI_END;
5650 sv_catsv(sv, PL_lex_stuff);
5651 attrs = op_append_elem(OP_LIST, attrs,
5652 newSVOP(OP_CONST, 0, sv));
5653 SvREFCNT_dec_NN(PL_lex_stuff);
5654 PL_lex_stuff = NULL;
5657 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5659 if (PL_in_my == KEY_our) {
5660 deprecate(":unique");
5663 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5666 /* NOTE: any CV attrs applied here need to be part of
5667 the CVf_BUILTIN_ATTRS define in cv.h! */
5668 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5670 CvLVALUE_on(PL_compcv);
5672 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5674 deprecate(":locked");
5676 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5678 CvMETHOD_on(PL_compcv);
5680 else if (!PL_in_my && len == 5
5681 && strnEQ(SvPVX(sv), "const", len))
5684 Perl_ck_warner_d(aTHX_
5685 packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
5686 ":const is experimental"
5688 CvANONCONST_on(PL_compcv);
5689 if (!CvANON(PL_compcv))
5690 yyerror(":const is not permitted on named "
5693 /* After we've set the flags, it could be argued that
5694 we don't need to do the attributes.pm-based setting
5695 process, and shouldn't bother appending recognized
5696 flags. To experiment with that, uncomment the
5697 following "else". (Note that's already been
5698 uncommented. That keeps the above-applied built-in
5699 attributes from being intercepted (and possibly
5700 rejected) by a package's attribute routines, but is
5701 justified by the performance win for the common case
5702 of applying only built-in attributes.) */
5704 attrs = op_append_elem(OP_LIST, attrs,
5705 newSVOP(OP_CONST, 0,
5709 if (*s == ':' && s[1] != ':')
5712 break; /* require real whitespace or :'s */
5713 /* XXX losing whitespace on sequential attributes here */
5718 && !(PL_expect == XOPERATOR
5719 ? (*s == '=' || *s == ')')
5720 : (*s == '{' || *s == '(')))
5722 const char q = ((*s == '\'') ? '"' : '\'');
5723 /* If here for an expression, and parsed no attrs, back
5725 if (PL_expect == XOPERATOR && !attrs) {
5729 /* MUST advance bufptr here to avoid bogus "at end of line"
5730 context messages from yyerror().
5733 yyerror( (const char *)
5735 ? Perl_form(aTHX_ "Invalid separator character "
5736 "%c%c%c in attribute list", q, *s, q)
5737 : "Unterminated attribute list" ) );
5745 NEXTVAL_NEXTTOKE.opval = attrs;
5751 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5755 PL_lex_allbrackets--;
5759 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5760 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
5764 PL_lex_allbrackets++;
5767 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5774 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5777 PL_lex_allbrackets--;
5783 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5786 if (PL_lex_brackets <= 0)
5787 /* diag_listed_as: Unmatched right %s bracket */
5788 yyerror("Unmatched right square bracket");
5791 PL_lex_allbrackets--;
5792 if (PL_lex_state == LEX_INTERPNORMAL) {
5793 if (PL_lex_brackets == 0) {
5794 if (*s == '-' && s[1] == '>')
5795 PL_lex_state = LEX_INTERPENDMAYBE;
5796 else if (*s != '[' && *s != '{')
5797 PL_lex_state = LEX_INTERPEND;
5804 if (PL_lex_brackets > 100) {
5805 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5807 switch (PL_expect) {
5810 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5811 PL_lex_allbrackets++;
5812 OPERATOR(HASHBRACK);
5814 while (s < PL_bufend && SPACE_OR_TAB(*s))
5817 PL_tokenbuf[0] = '\0';
5818 if (d < PL_bufend && *d == '-') {
5819 PL_tokenbuf[0] = '-';
5821 while (d < PL_bufend && SPACE_OR_TAB(*d))
5824 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5825 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5827 while (d < PL_bufend && SPACE_OR_TAB(*d))
5830 const char minus = (PL_tokenbuf[0] == '-');
5831 s = force_word(s + minus, BAREWORD, FALSE, TRUE);
5839 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5840 PL_lex_allbrackets++;
5845 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5846 PL_lex_allbrackets++;
5850 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5851 PL_lex_allbrackets++;
5856 if (PL_oldoldbufptr == PL_last_lop)
5857 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5859 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5860 PL_lex_allbrackets++;
5863 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5865 /* This hack is to get the ${} in the message. */
5867 yyerror("syntax error");
5870 OPERATOR(HASHBRACK);
5872 if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
5873 /* ${...} or @{...} etc., but not print {...}
5874 * Skip the disambiguation and treat this as a block.
5876 goto block_expectation;
5878 /* This hack serves to disambiguate a pair of curlies
5879 * as being a block or an anon hash. Normally, expectation
5880 * determines that, but in cases where we're not in a
5881 * position to expect anything in particular (like inside
5882 * eval"") we have to resolve the ambiguity. This code
5883 * covers the case where the first term in the curlies is a
5884 * quoted string. Most other cases need to be explicitly
5885 * disambiguated by prepending a "+" before the opening
5886 * curly in order to force resolution as an anon hash.
5888 * XXX should probably propagate the outer expectation
5889 * into eval"" to rely less on this hack, but that could
5890 * potentially break current behavior of eval"".
5894 if (*s == '\'' || *s == '"' || *s == '`') {
5895 /* common case: get past first string, handling escapes */
5896 for (t++; t < PL_bufend && *t != *s;)
5901 else if (*s == 'q') {
5904 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5905 && !isWORDCHAR(*t))))
5907 /* skip q//-like construct */
5909 char open, close, term;
5912 while (t < PL_bufend && isSPACE(*t))
5914 /* check for q => */
5915 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5916 OPERATOR(HASHBRACK);
5920 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5924 for (t++; t < PL_bufend; t++) {
5925 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5927 else if (*t == open)
5931 for (t++; t < PL_bufend; t++) {
5932 if (*t == '\\' && t+1 < PL_bufend)
5934 else if (*t == close && --brackets <= 0)
5936 else if (*t == open)
5943 /* skip plain q word */
5944 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
5945 t += UTF ? UTF8SKIP(t) : 1;
5947 else if (isWORDCHAR_lazy_if(t,UTF)) {
5948 t += UTF ? UTF8SKIP(t) : 1;
5949 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
5950 t += UTF ? UTF8SKIP(t) : 1;
5952 while (t < PL_bufend && isSPACE(*t))
5954 /* if comma follows first term, call it an anon hash */
5955 /* XXX it could be a comma expression with loop modifiers */
5956 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5957 || (*t == '=' && t[1] == '>')))
5958 OPERATOR(HASHBRACK);
5959 if (PL_expect == XREF)
5962 /* If there is an opening brace or 'sub:', treat it
5963 as a term to make ${{...}}{k} and &{sub:attr...}
5964 dwim. Otherwise, treat it as a statement, so
5965 map {no strict; ...} works.
5972 if (strEQs(s, "sub")) {
5983 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5989 pl_yylval.ival = CopLINE(PL_curcop);
5990 PL_copline = NOLINE; /* invalidate current command line number */
5991 TOKEN(formbrack ? '=' : '{');
5993 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5997 if (PL_lex_brackets <= 0)
5998 /* diag_listed_as: Unmatched right %s bracket */
5999 yyerror("Unmatched right curly bracket");
6001 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6002 PL_lex_allbrackets--;
6003 if (PL_lex_state == LEX_INTERPNORMAL) {
6004 if (PL_lex_brackets == 0) {
6005 if (PL_expect & XFAKEBRACK) {
6006 PL_expect &= XENUMMASK;
6007 PL_lex_state = LEX_INTERPEND;
6009 return yylex(); /* ignore fake brackets */
6011 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6012 && SvEVALED(PL_lex_repl))
6013 PL_lex_state = LEX_INTERPEND;
6014 else if (*s == '-' && s[1] == '>')
6015 PL_lex_state = LEX_INTERPENDMAYBE;
6016 else if (*s != '[' && *s != '{')
6017 PL_lex_state = LEX_INTERPEND;
6020 if (PL_expect & XFAKEBRACK) {
6021 PL_expect &= XENUMMASK;
6023 return yylex(); /* ignore fake brackets */
6025 force_next(formbrack ? '.' : '}');
6026 if (formbrack) LEAVE;
6027 if (formbrack == 2) { /* means . where arguments were expected */
6033 if (PL_expect == XPOSTDEREF) POSTDEREF('&');
6036 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6037 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6044 if (PL_expect == XOPERATOR) {
6045 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
6046 && isIDFIRST_lazy_if(s,UTF))
6048 CopLINE_dec(PL_curcop);
6049 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6050 CopLINE_inc(PL_curcop);
6053 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6055 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6056 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6062 PL_parser->saw_infix_sigil = 1;
6063 BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
6069 PL_tokenbuf[0] = '&';
6070 s = scan_ident(s - 1, PL_tokenbuf + 1,
6071 sizeof PL_tokenbuf - 1, TRUE);
6072 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6073 if (PL_tokenbuf[1]) {
6074 force_ident_maybe_lex('&');
6083 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6084 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6092 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6094 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6095 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6099 BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
6103 const char tmp = *s++;
6105 if ((s == PL_linestart+2 || s[-3] == '\n') && strEQs(s, "=====")) {
6106 s = vcs_conflict_marker(s + 5);
6109 if (!PL_lex_allbrackets
6110 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6118 if (!PL_lex_allbrackets
6119 && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
6128 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6129 && strchr("+-*/%.^&|<",tmp))
6130 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6131 "Reversed %c= operator",(int)tmp);
6133 if (PL_expect == XSTATE
6135 && (s == PL_linestart+1 || s[-2] == '\n') )
6137 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6138 || PL_lex_state != LEX_NORMAL) {
6143 if (strEQs(s,"=cut")) {
6157 PL_parser->in_pod = 1;
6161 if (PL_expect == XBLOCK) {
6163 #ifdef PERL_STRICT_CR
6164 while (SPACE_OR_TAB(*t))
6166 while (SPACE_OR_TAB(*t) || *t == '\r')
6169 if (*t == '\n' || *t == '#') {
6172 SAVEI8(PL_parser->form_lex_state);
6173 SAVEI32(PL_lex_formbrack);
6174 PL_parser->form_lex_state = PL_lex_state;
6175 PL_lex_formbrack = PL_lex_brackets + 1;
6179 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6188 const char tmp = *s++;
6190 /* was this !=~ where !~ was meant?
6191 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6193 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6194 const char *t = s+1;
6196 while (t < PL_bufend && isSPACE(*t))
6199 if (*t == '/' || *t == '?'
6200 || ((*t == 'm' || *t == 's' || *t == 'y')
6201 && !isWORDCHAR(t[1]))
6202 || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6203 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6204 "!=~ should be !~");
6206 if (!PL_lex_allbrackets
6207 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6220 if (PL_expect != XOPERATOR) {
6221 if (s[1] != '<' && !strchr(s,'>'))
6223 if (s[1] == '<' && s[2] != '>') {
6224 if ((s == PL_linestart || s[-1] == '\n') && strEQs(s+2, "<<<<<")) {
6225 s = vcs_conflict_marker(s + 7);
6228 s = scan_heredoc(s);
6231 s = scan_inputsymbol(s);
6232 PL_expect = XOPERATOR;
6233 TOKEN(sublex_start());
6239 if ((s == PL_linestart+2 || s[-3] == '\n') && strEQs(s, "<<<<<")) {
6240 s = vcs_conflict_marker(s + 5);
6243 if (*s == '=' && !PL_lex_allbrackets
6244 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6249 SHop(OP_LEFT_SHIFT);
6254 if (!PL_lex_allbrackets
6255 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6263 if (!PL_lex_allbrackets
6264 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6273 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6281 const char tmp = *s++;
6283 if ((s == PL_linestart+2 || s[-3] == '\n') && strEQs(s, ">>>>>")) {
6284 s = vcs_conflict_marker(s + 5);
6287 if (*s == '=' && !PL_lex_allbrackets
6288 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6293 SHop(OP_RIGHT_SHIFT);
6295 else if (tmp == '=') {
6296 if (!PL_lex_allbrackets
6297 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6306 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6315 if (PL_expect == XOPERATOR) {
6316 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6317 return deprecate_commaless_var_list();
6320 else if (PL_expect == XPOSTDEREF) {
6323 POSTDEREF(DOLSHARP);
6328 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6329 PL_tokenbuf[0] = '@';
6330 s = scan_ident(s + 1, PL_tokenbuf + 1,
6331 sizeof PL_tokenbuf - 1, FALSE);
6332 if (PL_expect == XOPERATOR) {
6334 if (PL_bufptr > s) {
6336 PL_bufptr = PL_oldbufptr;
6338 no_op("Array length", d);
6340 if (!PL_tokenbuf[1])
6342 PL_expect = XOPERATOR;
6343 force_ident_maybe_lex('#');
6347 PL_tokenbuf[0] = '$';
6348 s = scan_ident(s, PL_tokenbuf + 1,
6349 sizeof PL_tokenbuf - 1, FALSE);
6350 if (PL_expect == XOPERATOR) {
6352 if (PL_bufptr > s) {
6354 PL_bufptr = PL_oldbufptr;
6358 if (!PL_tokenbuf[1]) {
6360 yyerror("Final $ should be \\$ or $name");
6366 const char tmp = *s;
6367 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6370 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6371 && intuit_more(s)) {
6373 PL_tokenbuf[0] = '@';
6374 if (ckWARN(WARN_SYNTAX)) {
6377 while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
6378 t += UTF ? UTF8SKIP(t) : 1;
6380 PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
6381 while (t < PL_bufend && *t != ']')
6383 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6384 "Multidimensional syntax %" UTF8f " not supported",
6385 UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
6389 else if (*s == '{') {
6391 PL_tokenbuf[0] = '%';
6392 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6393 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6395 char tmpbuf[sizeof PL_tokenbuf];
6398 } while (isSPACE(*t));
6399 if (isIDFIRST_lazy_if(t,UTF)) {
6401 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6406 && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6407 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6408 "You need to quote \"%" UTF8f "\"",
6409 UTF8fARG(UTF, len, tmpbuf));
6415 PL_expect = XOPERATOR;
6416 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6417 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6418 if (!islop || PL_last_lop_op == OP_GREPSTART)
6419 PL_expect = XOPERATOR;
6420 else if (strchr("$@\"'`q", *s))
6421 PL_expect = XTERM; /* e.g. print $fh "foo" */
6422 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6423 PL_expect = XTERM; /* e.g. print $fh &sub */
6424 else if (isIDFIRST_lazy_if(s,UTF)) {
6425 char tmpbuf[sizeof PL_tokenbuf];
6427 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6428 if ((t2 = keyword(tmpbuf, len, 0))) {
6429 /* binary operators exclude handle interpretations */
6441 PL_expect = XTERM; /* e.g. print $fh length() */
6446 PL_expect = XTERM; /* e.g. print $fh subr() */
6449 else if (isDIGIT(*s))
6450 PL_expect = XTERM; /* e.g. print $fh 3 */
6451 else if (*s == '.' && isDIGIT(s[1]))
6452 PL_expect = XTERM; /* e.g. print $fh .3 */
6453 else if ((*s == '?' || *s == '-' || *s == '+')
6454 && !isSPACE(s[1]) && s[1] != '=')
6455 PL_expect = XTERM; /* e.g. print $fh -1 */
6456 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6458 PL_expect = XTERM; /* e.g. print $fh /.../
6459 XXX except DORDOR operator
6461 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6463 PL_expect = XTERM; /* print $fh <<"EOF" */
6466 force_ident_maybe_lex('$');
6470 if (PL_expect == XPOSTDEREF)
6472 PL_tokenbuf[0] = '@';
6473 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6474 if (PL_expect == XOPERATOR) {
6476 if (PL_bufptr > s) {
6478 PL_bufptr = PL_oldbufptr;
6483 if (!PL_tokenbuf[1]) {
6486 if (PL_lex_state == LEX_NORMAL)
6488 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6490 PL_tokenbuf[0] = '%';
6492 /* Warn about @ where they meant $. */
6493 if (*s == '[' || *s == '{') {
6494 if (ckWARN(WARN_SYNTAX)) {
6495 S_check_scalar_slice(aTHX_ s);
6499 PL_expect = XOPERATOR;
6500 force_ident_maybe_lex('@');
6503 case '/': /* may be division, defined-or, or pattern */
6504 if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6505 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6506 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6511 else if (PL_expect == XOPERATOR) {
6513 if (*s == '=' && !PL_lex_allbrackets
6514 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6522 /* Disable warning on "study /blah/" */
6523 if (PL_oldoldbufptr == PL_last_uni
6524 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6525 || memNE(PL_last_uni, "study", 5)
6526 || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
6529 s = scan_pat(s,OP_MATCH);
6530 TERM(sublex_start());
6533 case '?': /* conditional */
6535 if (!PL_lex_allbrackets
6536 && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
6541 PL_lex_allbrackets++;
6545 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6546 #ifdef PERL_STRICT_CR
6549 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6551 && (s == PL_linestart || s[-1] == '\n') )
6554 formbrack = 2; /* dot seen where arguments expected */
6557 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6561 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6564 if (!PL_lex_allbrackets
6565 && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
6573 pl_yylval.ival = OPf_SPECIAL;
6579 if (*s == '=' && !PL_lex_allbrackets
6580 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6588 case '0': case '1': case '2': case '3': case '4':
6589 case '5': case '6': case '7': case '8': case '9':
6590 s = scan_num(s, &pl_yylval);
6591 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6592 if (PL_expect == XOPERATOR)
6597 if ( PL_expect == XOPERATOR
6598 && (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack))
6599 return deprecate_commaless_var_list();
6601 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6604 COPLINE_SET_FROM_MULTI_END;
6605 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6606 if (PL_expect == XOPERATOR) {
6609 pl_yylval.ival = OP_CONST;
6610 TERM(sublex_start());
6613 if ( PL_expect == XOPERATOR
6614 && (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack))
6615 return deprecate_commaless_var_list();
6617 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6620 printbuf("### Saw string before %s\n", s);
6622 PerlIO_printf(Perl_debug_log,
6623 "### Saw unterminated string\n");
6625 if (PL_expect == XOPERATOR) {
6630 pl_yylval.ival = OP_CONST;
6631 /* FIXME. I think that this can be const if char *d is replaced by
6632 more localised variables. */
6633 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6634 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6635 pl_yylval.ival = OP_STRINGIFY;
6639 if (pl_yylval.ival == OP_CONST)
6640 COPLINE_SET_FROM_MULTI_END;
6641 TERM(sublex_start());
6644 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6647 printbuf("### Saw backtick string before %s\n", s);
6649 PerlIO_printf(Perl_debug_log,
6650 "### Saw unterminated backtick string\n");
6652 if (PL_expect == XOPERATOR)
6653 no_op("Backticks",s);
6656 pl_yylval.ival = OP_BACKTICK;
6657 TERM(sublex_start());
6661 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6663 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6665 if (PL_expect == XOPERATOR)
6666 no_op("Backslash",s);
6670 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6671 char *start = s + 2;
6672 while (isDIGIT(*start) || *start == '_')
6674 if (*start == '.' && isDIGIT(start[1])) {
6675 s = scan_num(s, &pl_yylval);
6678 else if ((*start == ':' && start[1] == ':')
6679 || (PL_expect == XSTATE && *start == ':'))
6681 else if (PL_expect == XSTATE) {
6683 while (d < PL_bufend && isSPACE(*d)) d++;
6684 if (*d == ':') goto keylookup;
6686 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6687 if (!isALPHA(*start) && (PL_expect == XTERM
6688 || PL_expect == XREF || PL_expect == XSTATE
6689 || PL_expect == XTERMORDORDOR)) {
6690 GV *const gv = gv_fetchpvn_flags(s, start - s,
6691 UTF ? SVf_UTF8 : 0, SVt_PVCV);
6693 s = scan_num(s, &pl_yylval);
6700 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6753 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6755 /* Some keywords can be followed by any delimiter, including ':' */
6756 anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
6758 /* x::* is just a word, unless x is "CORE" */
6759 if (!anydelim && *s == ':' && s[1] == ':') {
6760 if (strEQ(PL_tokenbuf, "CORE")) goto case_KEY_CORE;
6765 while (d < PL_bufend && isSPACE(*d))
6766 d++; /* no comments skipped here, or s### is misparsed */
6768 /* Is this a word before a => operator? */
6769 if (*d == '=' && d[1] == '>') {
6773 = newSVOP(OP_CONST, 0,
6774 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6775 pl_yylval.opval->op_private = OPpCONST_BARE;
6779 /* Check for plugged-in keyword */
6783 char *saved_bufptr = PL_bufptr;
6785 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6787 if (result == KEYWORD_PLUGIN_DECLINE) {
6788 /* not a plugged-in keyword */
6789 PL_bufptr = saved_bufptr;
6790 } else if (result == KEYWORD_PLUGIN_STMT) {
6791 pl_yylval.opval = o;
6793 if (!PL_nexttoke) PL_expect = XSTATE;
6794 return REPORT(PLUGSTMT);
6795 } else if (result == KEYWORD_PLUGIN_EXPR) {
6796 pl_yylval.opval = o;
6798 if (!PL_nexttoke) PL_expect = XOPERATOR;
6799 return REPORT(PLUGEXPR);
6801 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6806 /* Check for built-in keyword */
6807 tmp = keyword(PL_tokenbuf, len, 0);
6809 /* Is this a label? */
6810 if (!anydelim && PL_expect == XSTATE
6811 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6813 pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
6814 pl_yylval.pval[len] = '\0';
6815 pl_yylval.pval[len+1] = UTF ? 1 : 0;
6820 /* Check for lexical sub */
6821 if (PL_expect != XOPERATOR) {
6822 char tmpbuf[sizeof PL_tokenbuf + 1];
6824 Copy(PL_tokenbuf, tmpbuf+1, len, char);
6825 off = pad_findmy_pvn(tmpbuf, len+1, 0);
6826 if (off != NOT_IN_PAD) {
6827 assert(off); /* we assume this is boolean-true below */
6828 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
6829 HV * const stash = PAD_COMPNAME_OURSTASH(off);
6830 HEK * const stashname = HvNAME_HEK(stash);
6831 sv = newSVhek(stashname);
6832 sv_catpvs(sv, "::");
6833 sv_catpvn_flags(sv, PL_tokenbuf, len,
6834 (UTF ? SV_CATUTF8 : SV_CATBYTES));
6835 gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
6845 rv2cv_op = newOP(OP_PADANY, 0);
6846 rv2cv_op->op_targ = off;
6847 cv = find_lexical_cv(off);
6855 if (tmp < 0) { /* second-class keyword? */
6856 GV *ogv = NULL; /* override (winner) */
6857 GV *hgv = NULL; /* hidden (loser) */
6858 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6860 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6861 (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
6863 && (cv = GvCVu(gv)))
6865 if (GvIMPORTED_CV(gv))
6867 else if (! CvMETHOD(cv))
6871 && (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
6874 && (isGV_with_GP(gv)
6875 ? GvCVu(gv) && GvIMPORTED_CV(gv)
6876 : SvPCS_IMPORTED(gv)
6877 && (gv_init(gv, PL_globalstash, PL_tokenbuf,
6885 tmp = 0; /* overridden by import or by GLOBAL */
6888 && -tmp==KEY_lock /* XXX generalizable kludge */
6891 tmp = 0; /* any sub overrides "weak" keyword */
6893 else { /* no override */
6895 if (tmp == KEY_dump) {
6896 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6897 "dump() better written as CORE::dump()");
6901 if (hgv && tmp != KEY_x) /* never ambiguous */
6902 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6903 "Ambiguous call resolved as CORE::%s(), "
6904 "qualify as such or use &",
6909 if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
6910 && (!anydelim || *s != '#')) {
6911 /* no override, and not s### either; skipspace is safe here
6912 * check for => on following line */
6914 STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
6915 STRLEN soff = s - SvPVX(PL_linestr);
6917 arrow = *s == '=' && s[1] == '>';
6918 PL_bufptr = SvPVX(PL_linestr) + bufoff;
6919 s = SvPVX(PL_linestr) + soff;
6927 /* Trade off - by using this evil construction we can pull the
6928 variable gv into the block labelled keylookup. If not, then
6929 we have to give it function scope so that the goto from the
6930 earlier ':' case doesn't bypass the initialisation. */
6931 just_a_word_zero_gv:
6940 default: /* not a keyword */
6943 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6947 /* Get the rest if it looks like a package qualifier */
6949 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6951 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6954 Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
6955 UTF8fARG(UTF, len, PL_tokenbuf),
6956 *s == '\'' ? "'" : "::");
6961 if (PL_expect == XOPERATOR) {
6962 if (PL_bufptr == PL_linestart) {
6963 CopLINE_dec(PL_curcop);
6964 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6965 CopLINE_inc(PL_curcop);
6968 no_op("Bareword",s);
6971 /* See if the name is "Foo::",
6972 in which case Foo is a bareword
6973 (and a package name). */
6976 && PL_tokenbuf[len - 2] == ':'
6977 && PL_tokenbuf[len - 1] == ':')
6979 if (ckWARN(WARN_BAREWORD)
6980 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
6981 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6982 "Bareword \"%" UTF8f
6983 "\" refers to nonexistent package",
6984 UTF8fARG(UTF, len, PL_tokenbuf));
6986 PL_tokenbuf[len] = '\0';
6995 /* if we saw a global override before, get the right name */
6998 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
7001 SV * const tmp_sv = sv;
7002 sv = newSVpvs("CORE::GLOBAL::");
7003 sv_catsv(sv, tmp_sv);
7004 SvREFCNT_dec(tmp_sv);
7008 /* Presume this is going to be a bareword of some sort. */
7010 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
7011 pl_yylval.opval->op_private = OPpCONST_BARE;
7013 /* And if "Foo::", then that's what it certainly is. */
7019 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
7020 const_op->op_private = OPpCONST_BARE;
7022 newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
7026 : SvROK(gv) && SvTYPE(SvRV(gv)) == SVt_PVCV
7029 : rv2cv_op_cv(rv2cv_op, RV2CVOPCV_RETURN_STUB);
7032 /* Use this var to track whether intuit_method has been
7033 called. intuit_method returns 0 or > 255. */
7036 /* See if it's the indirect object for a list operator. */
7039 && PL_oldoldbufptr < PL_bufptr
7040 && (PL_oldoldbufptr == PL_last_lop
7041 || PL_oldoldbufptr == PL_last_uni)
7042 && /* NO SKIPSPACE BEFORE HERE! */
7044 || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
7047 bool immediate_paren = *s == '(';
7049 /* (Now we can afford to cross potential line boundary.) */
7052 /* Two barewords in a row may indicate method call. */
7054 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$')
7055 && (tmp = intuit_method(s, lex ? NULL : sv, cv)))
7060 /* If not a declared subroutine, it's an indirect object. */
7061 /* (But it's an indir obj regardless for sort.) */
7062 /* Also, if "_" follows a filetest operator, it's a bareword */
7065 ( !immediate_paren && (PL_last_lop_op == OP_SORT
7067 && (PL_last_lop_op != OP_MAPSTART
7068 && PL_last_lop_op != OP_GREPSTART))))
7069 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7070 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
7074 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7079 PL_expect = XOPERATOR;
7082 /* Is this a word before a => operator? */
7083 if (*s == '=' && s[1] == '>' && !pkgname) {
7086 if (gvp || (lex && !off)) {
7087 assert (cSVOPx(pl_yylval.opval)->op_sv == sv);
7088 /* This is our own scalar, created a few lines
7089 above, so this is safe. */
7091 sv_setpv(sv, PL_tokenbuf);
7092 if (UTF && !IN_BYTES
7093 && is_utf8_string((U8*)PL_tokenbuf, len))
7100 /* If followed by a paren, it's certainly a subroutine. */
7105 while (SPACE_OR_TAB(*d))
7107 if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
7112 NEXTVAL_NEXTTOKE.opval =
7113 off ? rv2cv_op : pl_yylval.opval;
7115 op_free(pl_yylval.opval), force_next(PRIVATEREF);
7116 else op_free(rv2cv_op), force_next(BAREWORD);
7121 /* If followed by var or block, call it a method (unless sub) */
7123 if ((*s == '$' || *s == '{') && !cv) {
7125 PL_last_lop = PL_oldbufptr;
7126 PL_last_lop_op = OP_METHOD;
7127 if (!PL_lex_allbrackets
7128 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7130 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7132 PL_expect = XBLOCKTERM;
7134 return REPORT(METHOD);
7137 /* If followed by a bareword, see if it looks like indir obj. */
7139 if (tmp == 1 && !orig_keyword
7140 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
7141 && (tmp = intuit_method(s, lex ? NULL : sv, cv))) {
7144 assert(cSVOPx(pl_yylval.opval)->op_sv == sv);
7146 sv_setpvn(sv, PL_tokenbuf, len);
7147 if (UTF && !IN_BYTES
7148 && is_utf8_string((U8*)PL_tokenbuf, len))
7150 else SvUTF8_off(sv);
7153 if (tmp == METHOD && !PL_lex_allbrackets
7154 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7156 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7161 /* Not a method, so call it a subroutine (if defined) */
7164 /* Check for a constant sub */
7165 if ((sv = cv_const_sv_or_av(cv))) {
7168 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7169 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7170 if (SvTYPE(sv) == SVt_PVAV)
7171 pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7174 pl_yylval.opval->op_private = 0;
7175 pl_yylval.opval->op_folded = 1;
7176 pl_yylval.opval->op_flags |= OPf_SPECIAL;
7181 op_free(pl_yylval.opval);
7183 off ? newCVREF(0, rv2cv_op) : rv2cv_op;
7184 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7185 PL_last_lop = PL_oldbufptr;
7186 PL_last_lop_op = OP_ENTERSUB;
7187 /* Is there a prototype? */
7191 STRLEN protolen = CvPROTOLEN(cv);
7192 const char *proto = CvPROTO(cv);
7194 proto = S_strip_spaces(aTHX_ proto, &protolen);
7197 if ((optional = *proto == ';'))
7200 while (*proto == ';');
7204 *proto == '$' || *proto == '_'
7205 || *proto == '*' || *proto == '+'
7210 *proto == '\\' && proto[1] && proto[2] == '\0'
7213 UNIPROTO(UNIOPSUB,optional);
7214 if (*proto == '\\' && proto[1] == '[') {
7215 const char *p = proto + 2;
7216 while(*p && *p != ']')
7218 if(*p == ']' && !p[1])
7219 UNIPROTO(UNIOPSUB,optional);
7221 if (*proto == '&' && *s == '{') {
7223 sv_setpvs(PL_subname, "__ANON__");
7225 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7226 if (!PL_lex_allbrackets
7227 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7229 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7234 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7236 force_next(off ? PRIVATEREF : BAREWORD);
7237 if (!PL_lex_allbrackets
7238 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7240 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7245 /* Call it a bare word */
7247 if (PL_hints & HINT_STRICT_SUBS)
7248 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7251 /* after "print" and similar functions (corresponding to
7252 * "F? L" in opcode.pl), whatever wasn't already parsed as
7253 * a filehandle should be subject to "strict subs".
7254 * Likewise for the optional indirect-object argument to system
7255 * or exec, which can't be a bareword */
7256 if ((PL_last_lop_op == OP_PRINT
7257 || PL_last_lop_op == OP_PRTF
7258 || PL_last_lop_op == OP_SAY
7259 || PL_last_lop_op == OP_SYSTEM
7260 || PL_last_lop_op == OP_EXEC)
7261 && (PL_hints & HINT_STRICT_SUBS))
7262 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7263 if (lastchar != '-') {
7264 if (ckWARN(WARN_RESERVED)) {
7268 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7270 /* PL_warn_reserved is constant */
7271 GCC_DIAG_IGNORE(-Wformat-nonliteral);
7272 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7282 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7283 && saw_infix_sigil) {
7284 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7285 "Operator or semicolon missing before %c%" UTF8f,
7287 UTF8fARG(UTF, strlen(PL_tokenbuf),
7289 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7290 "Ambiguous use of %c resolved as operator %c",
7291 lastchar, lastchar);
7298 newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7303 newSVOP(OP_CONST, 0,
7304 Perl_newSVpvf(aTHX_ "%" IVdf, (IV)CopLINE(PL_curcop)))
7307 case KEY___PACKAGE__:
7309 newSVOP(OP_CONST, 0,
7311 ? newSVhek(HvNAME_HEK(PL_curstash))
7318 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7319 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
7322 gv = (GV *)*hv_fetchs(stash, "DATA", 1);
7324 gv_init(gv,stash,"DATA",4,0);
7327 GvIOp(gv) = newIO();
7328 IoIFP(GvIOp(gv)) = PL_rsfp;
7329 #if defined(HAS_FCNTL) && defined(F_SETFD) && defined(FD_CLOEXEC)
7331 const int fd = PerlIO_fileno(PL_rsfp);
7333 fcntl(fd,F_SETFD, FD_CLOEXEC);
7337 /* Mark this internal pseudo-handle as clean */
7338 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7339 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7340 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7342 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7343 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7344 /* if the script was opened in binmode, we need to revert
7345 * it to text mode for compatibility; but only iff it has CRs
7346 * XXX this is a questionable hack at best. */
7347 if (PL_bufend-PL_bufptr > 2
7348 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7351 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7352 loc = PerlIO_tell(PL_rsfp);
7353 (void)PerlIO_seek(PL_rsfp, 0L, 0);
7356 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7358 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7359 #endif /* NETWARE */
7361 PerlIO_seek(PL_rsfp, loc, 0);
7365 #ifdef PERLIO_LAYERS
7368 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7377 FUN0OP(CvCLONE(PL_compcv)
7378 ? newOP(OP_RUNCV, 0)
7379 : newPVOP(OP_RUNCV,0,NULL));
7388 if (PL_expect == XSTATE) {
7399 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7400 if ((*s == ':' && s[1] == ':')
7401 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7405 Copy(PL_bufptr, PL_tokenbuf, olen, char);
7409 Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword",
7410 UTF8fARG(UTF, len, PL_tokenbuf));
7413 else if (tmp == KEY_require || tmp == KEY_do
7415 /* that's a way to remember we saw "CORE::" */
7427 LOP(OP_ACCEPT,XTERM);
7430 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7435 LOP(OP_ATAN2,XTERM);
7441 LOP(OP_BINMODE,XTERM);
7444 LOP(OP_BLESS,XTERM);
7453 /* We have to disambiguate the two senses of
7454 "continue". If the next token is a '{' then
7455 treat it as the start of a continue block;
7456 otherwise treat it as a control operator.
7466 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7476 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7485 if (!PL_cryptseen) {
7486 PL_cryptseen = TRUE;
7490 LOP(OP_CRYPT,XTERM);
7493 LOP(OP_CHMOD,XTERM);
7496 LOP(OP_CHOWN,XTERM);
7499 LOP(OP_CONNECT,XTERM);
7519 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7521 if (len && (len != 4 || strNE(PL_tokenbuf+1, "CORE"))
7522 && !keyword(PL_tokenbuf + 1, len, 0)) {
7523 SSize_t off = s-SvPVX(PL_linestr);
7525 s = SvPVX(PL_linestr)+off;
7527 force_ident_maybe_lex('&');
7532 if (orig_keyword == KEY_do) {
7541 PL_hints |= HINT_BLOCK_SCOPE;
7551 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7552 STR_WITH_LEN("NDBM_File::"),
7553 STR_WITH_LEN("DB_File::"),
7554 STR_WITH_LEN("GDBM_File::"),
7555 STR_WITH_LEN("SDBM_File::"),
7556 STR_WITH_LEN("ODBM_File::"),
7558 LOP(OP_DBMOPEN,XTERM);
7570 pl_yylval.ival = CopLINE(PL_curcop);
7574 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7586 if (*s == '{') { /* block eval */
7587 PL_expect = XTERMBLOCK;
7588 UNIBRACK(OP_ENTERTRY);
7590 else { /* string eval */
7592 UNIBRACK(OP_ENTEREVAL);
7597 UNIBRACK(-OP_ENTEREVAL);
7611 case KEY_endhostent:
7617 case KEY_endservent:
7620 case KEY_endprotoent:
7631 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7633 pl_yylval.ival = CopLINE(PL_curcop);
7635 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7638 if ((PL_bufend - p) >= 3
7639 && strEQs(p, "my") && isSPACE(*(p + 2)))
7643 else if ((PL_bufend - p) >= 4
7644 && strEQs(p, "our") && isSPACE(*(p + 3)))
7647 /* skip optional package name, as in "for my abc $x (..)" */
7648 if (isIDFIRST_lazy_if(p,UTF)) {
7649 p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7652 if (*p != '$' && *p != '\\')
7653 Perl_croak(aTHX_ "Missing $ on loop variable");
7658 LOP(OP_FORMLINE,XTERM);
7667 LOP(OP_FCNTL,XTERM);
7673 LOP(OP_FLOCK,XTERM);
7676 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7681 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7686 LOP(OP_GREPSTART, XREF);
7703 case KEY_getpriority:
7704 LOP(OP_GETPRIORITY,XTERM);
7706 case KEY_getprotobyname:
7709 case KEY_getprotobynumber:
7710 LOP(OP_GPBYNUMBER,XTERM);
7712 case KEY_getprotoent:
7724 case KEY_getpeername:
7725 UNI(OP_GETPEERNAME);
7727 case KEY_gethostbyname:
7730 case KEY_gethostbyaddr:
7731 LOP(OP_GHBYADDR,XTERM);
7733 case KEY_gethostent:
7736 case KEY_getnetbyname:
7739 case KEY_getnetbyaddr:
7740 LOP(OP_GNBYADDR,XTERM);
7745 case KEY_getservbyname:
7746 LOP(OP_GSBYNAME,XTERM);
7748 case KEY_getservbyport:
7749 LOP(OP_GSBYPORT,XTERM);
7751 case KEY_getservent:
7754 case KEY_getsockname:
7755 UNI(OP_GETSOCKNAME);
7757 case KEY_getsockopt:
7758 LOP(OP_GSOCKOPT,XTERM);
7773 pl_yylval.ival = CopLINE(PL_curcop);
7774 Perl_ck_warner_d(aTHX_
7775 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
7776 "given is experimental");
7781 orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB,
7789 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7791 pl_yylval.ival = CopLINE(PL_curcop);
7795 LOP(OP_INDEX,XTERM);
7801 LOP(OP_IOCTL,XTERM);
7828 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7833 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7847 LOP(OP_LISTEN,XTERM);
7856 s = scan_pat(s,OP_MATCH);
7857 TERM(sublex_start());
7860 LOP(OP_MAPSTART, XREF);
7863 LOP(OP_MKDIR,XTERM);
7866 LOP(OP_MSGCTL,XTERM);
7869 LOP(OP_MSGGET,XTERM);
7872 LOP(OP_MSGRCV,XTERM);
7875 LOP(OP_MSGSND,XTERM);
7882 yyerror(Perl_form(aTHX_
7883 "Can't redeclare \"%s\" in \"%s\"",
7884 tmp == KEY_my ? "my" :
7885 tmp == KEY_state ? "state" : "our",
7886 PL_in_my == KEY_my ? "my" :
7887 PL_in_my == KEY_state ? "state" : "our"));
7889 PL_in_my = (U16)tmp;
7891 if (isIDFIRST_lazy_if(s,UTF)) {
7892 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7893 if (len == 3 && strEQs(PL_tokenbuf, "sub"))
7895 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7896 if (!PL_in_my_stash) {
7900 len = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7901 PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(tmpbuf));
7902 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7905 else if (*s == '\\') {
7906 if (!FEATURE_MYREF_IS_ENABLED)
7907 Perl_croak(aTHX_ "The experimental declared_refs "
7908 "feature is not enabled");
7909 Perl_ck_warner_d(aTHX_
7910 packWARN(WARN_EXPERIMENTAL__DECLARED_REFS),
7911 "Declaring references is experimental");
7919 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7924 s = tokenize_use(0, s);
7928 if (*s == '(' || (s = skipspace(s), *s == '('))
7931 if (!PL_lex_allbrackets
7932 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7934 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7941 if (isIDFIRST_lazy_if(s,UTF)) {
7943 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
7945 for (t=d; isSPACE(*t);)
7947 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7949 && !(t[0] == '=' && t[1] == '>')
7950 && !(t[0] == ':' && t[1] == ':')
7951 && !keyword(s, d-s, 0)
7953 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7954 "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")",
7955 UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
7961 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7963 pl_yylval.ival = OP_OR;
7973 LOP(OP_OPEN_DIR,XTERM);
7976 checkcomma(s,PL_tokenbuf,"filehandle");
7980 checkcomma(s,PL_tokenbuf,"filehandle");
7999 s = force_word(s,BAREWORD,FALSE,TRUE);
8001 s = force_strict_version(s);
8005 LOP(OP_PIPE_OP,XTERM);
8008 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8011 COPLINE_SET_FROM_MULTI_END;
8012 pl_yylval.ival = OP_CONST;
8013 TERM(sublex_start());
8020 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8023 COPLINE_SET_FROM_MULTI_END;
8024 PL_expect = XOPERATOR;
8025 if (SvCUR(PL_lex_stuff)) {
8026 int warned_comma = !ckWARN(WARN_QW);
8027 int warned_comment = warned_comma;
8028 d = SvPV_force(PL_lex_stuff, len);
8030 for (; isSPACE(*d) && len; --len, ++d)
8035 if (!warned_comma || !warned_comment) {
8036 for (; !isSPACE(*d) && len; --len, ++d) {
8037 if (!warned_comma && *d == ',') {
8038 Perl_warner(aTHX_ packWARN(WARN_QW),
8039 "Possible attempt to separate words with commas");
8042 else if (!warned_comment && *d == '#') {
8043 Perl_warner(aTHX_ packWARN(WARN_QW),
8044 "Possible attempt to put comments in qw() list");
8050 for (; !isSPACE(*d) && len; --len, ++d)
8053 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8054 words = op_append_elem(OP_LIST, words,
8055 newSVOP(OP_CONST, 0, tokeq(sv)));
8060 words = newNULLLIST();
8061 SvREFCNT_dec_NN(PL_lex_stuff);
8062 PL_lex_stuff = NULL;
8063 PL_expect = XOPERATOR;
8064 pl_yylval.opval = sawparens(words);
8069 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8072 pl_yylval.ival = OP_STRINGIFY;
8073 if (SvIVX(PL_lex_stuff) == '\'')
8074 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
8075 TERM(sublex_start());
8078 s = scan_pat(s,OP_QR);
8079 TERM(sublex_start());
8082 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8085 pl_yylval.ival = OP_BACKTICK;
8086 TERM(sublex_start());
8094 s = force_version(s, FALSE);
8096 else if (*s != 'v' || !isDIGIT(s[1])
8097 || (s = force_version(s, TRUE), *s == 'v'))
8099 *PL_tokenbuf = '\0';
8100 s = force_word(s,BAREWORD,TRUE,TRUE);
8101 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
8102 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8103 GV_ADD | (UTF ? SVf_UTF8 : 0));
8105 yyerror("<> at require-statement should be quotes");
8107 if (orig_keyword == KEY_require) {
8113 PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
8115 PL_last_uni = PL_oldbufptr;
8116 PL_last_lop_op = OP_REQUIRE;
8118 return REPORT( (int)REQUIRE );
8127 LOP(OP_RENAME,XTERM);
8136 LOP(OP_RINDEX,XTERM);
8145 UNIDOR(OP_READLINE);
8148 UNIDOR(OP_BACKTICK);
8157 LOP(OP_REVERSE,XTERM);
8160 UNIDOR(OP_READLINK);
8167 if (pl_yylval.opval)
8168 TERM(sublex_start());
8170 TOKEN(1); /* force error */
8173 checkcomma(s,PL_tokenbuf,"filehandle");
8183 LOP(OP_SELECT,XTERM);
8189 LOP(OP_SEMCTL,XTERM);
8192 LOP(OP_SEMGET,XTERM);
8195 LOP(OP_SEMOP,XTERM);
8201 LOP(OP_SETPGRP,XTERM);
8203 case KEY_setpriority:
8204 LOP(OP_SETPRIORITY,XTERM);
8206 case KEY_sethostent:
8212 case KEY_setservent:
8215 case KEY_setprotoent:
8225 LOP(OP_SEEKDIR,XTERM);
8227 case KEY_setsockopt:
8228 LOP(OP_SSOCKOPT,XTERM);
8234 LOP(OP_SHMCTL,XTERM);
8237 LOP(OP_SHMGET,XTERM);
8240 LOP(OP_SHMREAD,XTERM);
8243 LOP(OP_SHMWRITE,XTERM);
8246 LOP(OP_SHUTDOWN,XTERM);
8255 LOP(OP_SOCKET,XTERM);
8257 case KEY_socketpair:
8258 LOP(OP_SOCKPAIR,XTERM);
8261 checkcomma(s,PL_tokenbuf,"subroutine name");
8264 s = force_word(s,BAREWORD,TRUE,TRUE);
8268 LOP(OP_SPLIT,XTERM);
8271 LOP(OP_SPRINTF,XTERM);
8274 LOP(OP_SPLICE,XTERM);
8289 LOP(OP_SUBSTR,XTERM);
8295 char * const tmpbuf = PL_tokenbuf + 1;
8296 expectation attrful;
8297 bool have_name, have_proto;
8298 const int key = tmp;
8299 SV *format_name = NULL;
8301 SSize_t off = s-SvPVX(PL_linestr);
8303 d = SvPVX(PL_linestr)+off;
8305 if (isIDFIRST_lazy_if(s,UTF)
8307 || (*s == ':' && s[1] == ':'))
8311 attrful = XATTRBLOCK;
8312 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8314 if (key == KEY_format)
8315 format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
8317 if (memchr(tmpbuf, ':', len) || key != KEY_sub
8319 PL_tokenbuf, len + 1, 0
8321 sv_setpvn(PL_subname, tmpbuf, len);
8323 sv_setsv(PL_subname,PL_curstname);
8324 sv_catpvs(PL_subname,"::");
8325 sv_catpvn(PL_subname,tmpbuf,len);
8327 if (SvUTF8(PL_linestr))
8328 SvUTF8_on(PL_subname);
8335 if (key == KEY_my || key == KEY_our || key==KEY_state)
8338 /* diag_listed_as: Missing name in "%s sub" */
8340 "Missing name in \"%s\"", PL_bufptr);
8342 PL_expect = XTERMBLOCK;
8343 attrful = XATTRTERM;
8344 sv_setpvs(PL_subname,"?");
8348 if (key == KEY_format) {
8350 NEXTVAL_NEXTTOKE.opval
8351 = newSVOP(OP_CONST,0, format_name);
8352 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
8353 force_next(BAREWORD);
8358 /* Look for a prototype */
8359 if (*s == '(' && !FEATURE_SIGNATURES_IS_ENABLED) {
8360 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8361 COPLINE_SET_FROM_MULTI_END;
8363 Perl_croak(aTHX_ "Prototype not terminated");
8364 (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
8372 if (*s == ':' && s[1] != ':')
8373 PL_expect = attrful;
8374 else if ((*s != '{' && *s != '(') && key != KEY_format) {
8375 assert(key == KEY_sub || key == KEY_AUTOLOAD ||
8376 key == KEY_DESTROY || key == KEY_BEGIN ||
8377 key == KEY_UNITCHECK || key == KEY_CHECK ||
8378 key == KEY_INIT || key == KEY_END ||
8379 key == KEY_my || key == KEY_state ||
8382 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8383 else if (*s != ';' && *s != '}')
8384 Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname));
8388 NEXTVAL_NEXTTOKE.opval =
8389 newSVOP(OP_CONST, 0, PL_lex_stuff);
8390 PL_lex_stuff = NULL;
8395 sv_setpvs(PL_subname, "__ANON__");
8397 sv_setpvs(PL_subname, "__ANON__::__ANON__");
8400 force_ident_maybe_lex('&');
8405 LOP(OP_SYSTEM,XREF);
8408 LOP(OP_SYMLINK,XTERM);
8411 LOP(OP_SYSCALL,XTERM);
8414 LOP(OP_SYSOPEN,XTERM);
8417 LOP(OP_SYSSEEK,XTERM);
8420 LOP(OP_SYSREAD,XTERM);
8423 LOP(OP_SYSWRITE,XTERM);
8428 TERM(sublex_start());
8449 LOP(OP_TRUNCATE,XTERM);
8461 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8463 pl_yylval.ival = CopLINE(PL_curcop);
8467 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8469 pl_yylval.ival = CopLINE(PL_curcop);
8473 LOP(OP_UNLINK,XTERM);
8479 LOP(OP_UNPACK,XTERM);
8482 LOP(OP_UTIME,XTERM);
8488 LOP(OP_UNSHIFT,XTERM);
8491 s = tokenize_use(1, s);
8501 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8503 pl_yylval.ival = CopLINE(PL_curcop);
8504 Perl_ck_warner_d(aTHX_
8505 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8506 "when is experimental");
8510 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8512 pl_yylval.ival = CopLINE(PL_curcop);
8516 PL_hints |= HINT_BLOCK_SCOPE;
8523 LOP(OP_WAITPID,XTERM);
8529 /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8530 * we use the same number on EBCDIC */
8531 gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8535 if (PL_expect == XOPERATOR) {
8536 if (*s == '=' && !PL_lex_allbrackets
8537 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8547 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8549 pl_yylval.ival = OP_XOR;
8558 Looks up an identifier in the pad or in a package
8560 is_sig indicates that this is a subroutine signature variable
8561 rather than a plain pad var.
8564 PRIVATEREF if this is a lexical name.
8565 BAREWORD if this belongs to a package.
8568 if we're in a my declaration
8569 croak if they tried to say my($foo::bar)
8570 build the ops for a my() declaration
8571 if it's an access to a my() variable
8572 build ops for access to a my() variable
8573 if in a dq string, and they've said @foo and we can't find @foo
8575 build ops for a bareword
8579 S_pending_ident(pTHX)
8582 const char pit = (char)pl_yylval.ival;
8583 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8584 /* All routes through this function want to know if there is a colon. */
8585 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8587 DEBUG_T({ PerlIO_printf(Perl_debug_log,
8588 "### Pending identifier '%s'\n", PL_tokenbuf); });
8590 /* if we're in a my(), we can't allow dynamics here.
8591 $foo'bar has already been turned into $foo::bar, so
8592 just check for colons.
8594 if it's a legal name, the OP is a PADANY.
8597 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
8599 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8600 "variable %s in \"our\"",
8601 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
8602 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8607 /* "my" variable %s can't be in a package */
8608 /* PL_no_myglob is constant */
8609 GCC_DIAG_IGNORE(-Wformat-nonliteral);
8610 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8611 PL_in_my == KEY_my ? "my" : "state",
8612 *PL_tokenbuf == '&' ? "subroutin" : "variabl",
8614 UTF ? SVf_UTF8 : 0);
8618 if (PL_in_my == KEY_sigvar) {
8619 /* A signature 'padop' needs in addition, an op_first to
8620 * point to a child sigdefelem, and an extra field to hold
8621 * the signature index. We can achieve both by using an
8622 * UNOP_AUX and (ab)using the op_aux field to hold the
8623 * index. If we ever need more fields, use a real malloced
8624 * aux strut instead.
8626 o = newUNOP_AUX(OP_ARGELEM, 0, NULL,
8627 INT2PTR(UNOP_AUX_item *,
8628 (PL_parser->sig_elems)));
8629 o->op_private |= ( PL_tokenbuf[0] == '$' ? OPpARGELEM_SV
8630 : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV
8634 o = newOP(OP_PADANY, 0);
8635 o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8636 UTF ? SVf_UTF8 : 0);
8637 if (PL_in_my == KEY_sigvar)
8640 pl_yylval.opval = o;
8646 build the ops for accesses to a my() variable.
8651 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8653 if (tmp != NOT_IN_PAD) {
8654 /* might be an "our" variable" */
8655 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8656 /* build ops for a bareword */
8657 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8658 HEK * const stashname = HvNAME_HEK(stash);
8659 SV * const sym = newSVhek(stashname);
8660 sv_catpvs(sym, "::");
8661 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
8662 pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
8663 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8667 ((PL_tokenbuf[0] == '$') ? SVt_PV
8668 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8673 pl_yylval.opval = newOP(OP_PADANY, 0);
8674 pl_yylval.opval->op_targ = tmp;
8680 Whine if they've said @foo or @foo{key} in a doublequoted string,
8681 and @foo (or %foo) isn't a variable we can find in the symbol
8684 if (ckWARN(WARN_AMBIGUOUS)
8686 && PL_lex_state != LEX_NORMAL
8687 && !PL_lex_brackets)
8689 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8690 ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
8692 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8695 /* Downgraded from fatal to warning 20000522 mjd */
8696 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8697 "Possible unintended interpolation of %" UTF8f
8699 UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
8703 /* build ops for a bareword */
8704 pl_yylval.opval = newSVOP(OP_CONST, 0,
8705 newSVpvn_flags(PL_tokenbuf + 1,
8707 UTF ? SVf_UTF8 : 0 ));
8708 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8710 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8711 (PL_in_eval ? GV_ADDMULTI : GV_ADD)
8712 | ( UTF ? SVf_UTF8 : 0 ),
8713 ((PL_tokenbuf[0] == '$') ? SVt_PV
8714 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8720 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8722 PERL_ARGS_ASSERT_CHECKCOMMA;
8724 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
8725 if (ckWARN(WARN_SYNTAX)) {
8728 for (w = s+2; *w && level; w++) {
8736 /* the list of chars below is for end of statements or
8737 * block / parens, boolean operators (&&, ||, //) and branch
8738 * constructs (or, and, if, until, unless, while, err, for).
8739 * Not a very solid hack... */
8740 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8741 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8742 "%s (...) interpreted as function",name);
8745 while (s < PL_bufend && isSPACE(*s))
8749 while (s < PL_bufend && isSPACE(*s))
8751 if (isIDFIRST_lazy_if(s,UTF)) {
8752 const char * const w = s;
8753 s += UTF ? UTF8SKIP(s) : 1;
8754 while (isWORDCHAR_lazy_if(s,UTF))
8755 s += UTF ? UTF8SKIP(s) : 1;
8756 while (s < PL_bufend && isSPACE(*s))
8761 if (keyword(w, s - w, 0))
8764 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8765 if (gv && GvCVu(gv))
8769 Copy(w, tmpbuf+1, s - w, char);
8771 off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
8772 if (off != NOT_IN_PAD) return;
8774 Perl_croak(aTHX_ "No comma allowed after %s", what);
8779 /* S_new_constant(): do any overload::constant lookup.
8781 Either returns sv, or mortalizes/frees sv and returns a new SV*.
8782 Best used as sv=new_constant(..., sv, ...).
8783 If s, pv are NULL, calls subroutine with one argument,
8784 and <type> is used with error messages only.
8785 <type> is assumed to be well formed UTF-8 */
8788 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8789 SV *sv, SV *pv, const char *type, STRLEN typelen)
8792 HV * table = GvHV(PL_hintgv); /* ^H */
8797 const char *why1 = "", *why2 = "", *why3 = "";
8799 PERL_ARGS_ASSERT_NEW_CONSTANT;
8800 /* We assume that this is true: */
8801 if (*key == 'c') { assert (strEQ(key, "charnames")); }
8804 /* charnames doesn't work well if there have been errors found */
8805 if (PL_error_count > 0 && *key == 'c')
8807 SvREFCNT_dec_NN(sv);
8808 return &PL_sv_undef;
8811 sv_2mortal(sv); /* Parent created it permanently */
8813 || ! (PL_hints & HINT_LOCALIZE_HH)
8814 || ! (cvp = hv_fetch(table, key, keylen, FALSE))
8819 /* Here haven't found what we're looking for. If it is charnames,
8820 * perhaps it needs to be loaded. Try doing that before giving up */
8822 Perl_load_module(aTHX_
8824 newSVpvs("_charnames"),
8825 /* version parameter; no need to specify it, as if
8826 * we get too early a version, will fail anyway,
8827 * not being able to find '_charnames' */
8832 assert(sp == PL_stack_sp);
8833 table = GvHV(PL_hintgv);
8835 && (PL_hints & HINT_LOCALIZE_HH)
8836 && (cvp = hv_fetch(table, key, keylen, FALSE))
8842 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8843 msg = Perl_form(aTHX_
8844 "Constant(%.*s) unknown",
8845 (int)(type ? typelen : len),
8851 why3 = "} is not defined";
8854 msg = Perl_form(aTHX_
8855 /* The +3 is for '\N{'; -4 for that, plus '}' */
8856 "Unknown charname '%.*s'", (int)typelen - 4, type + 3
8860 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
8861 (int)(type ? typelen : len),
8862 (type ? type: s), why1, why2, why3);
8865 yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
8866 return SvREFCNT_inc_simple_NN(sv);
8871 pv = newSVpvn_flags(s, len, SVs_TEMP);
8873 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
8875 typesv = &PL_sv_undef;
8877 PUSHSTACKi(PERLSI_OVERLOAD);
8889 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
8893 /* Check the eval first */
8894 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
8896 const char * errstr;
8897 sv_catpvs(errsv, "Propagated");
8898 errstr = SvPV_const(errsv, errlen);
8899 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
8901 res = SvREFCNT_inc_simple_NN(sv);
8905 SvREFCNT_inc_simple_void_NN(res);
8914 why1 = "Call to &{$^H{";
8916 why3 = "}} did not return a defined value";
8918 (void)sv_2mortal(sv);
8925 PERL_STATIC_INLINE void
8926 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
8927 bool is_utf8, bool check_dollar) {
8928 PERL_ARGS_ASSERT_PARSE_IDENT;
8932 Perl_croak(aTHX_ "%s", ident_too_long);
8933 if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
8934 /* The UTF-8 case must come first, otherwise things
8935 * like c\N{COMBINING TILDE} would start failing, as the
8936 * isWORDCHAR_A case below would gobble the 'c' up.
8939 char *t = *s + UTF8SKIP(*s);
8940 while (isIDCONT_utf8((U8*)t))
8942 if (*d + (t - *s) > e)
8943 Perl_croak(aTHX_ "%s", ident_too_long);
8944 Copy(*s, *d, t - *s, char);
8948 else if ( isWORDCHAR_A(**s) ) {
8951 } while (isWORDCHAR_A(**s) && *d < e);
8953 else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
8958 else if (allow_package && **s == ':' && (*s)[1] == ':'
8959 /* Disallow things like Foo::$bar. For the curious, this is
8960 * the code path that triggers the "Bad name after" warning
8961 * when looking for barewords.
8963 && !(check_dollar && (*s)[2] == '$')) {
8973 /* Returns a NUL terminated string, with the length of the string written to
8977 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
8980 char * const e = d + destlen - 3; /* two-character token, ending NUL */
8981 bool is_utf8 = cBOOL(UTF);
8983 PERL_ARGS_ASSERT_SCAN_WORD;
8985 parse_ident(&s, &d, e, allow_package, is_utf8, TRUE);
8991 /* Is the byte 'd' a legal single character identifier name? 'u' is true
8992 * iff Unicode semantics are to be used. The legal ones are any of:
8993 * a) all ASCII characters except:
8994 * 1) control and space-type ones, like NUL, SOH, \t, and SPACE;
8996 * The final case currently doesn't get this far in the program, so we
8997 * don't test for it. If that were to change, it would be ok to allow it.
8998 * b) When not under Unicode rules, any upper Latin1 character
8999 * c) Otherwise, when unicode rules are used, all XIDS characters.
9001 * Because all ASCII characters have the same representation whether
9002 * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
9003 * '{' without knowing if is UTF-8 or not. */
9004 #define VALID_LEN_ONE_IDENT(s, is_utf8) \
9005 (isGRAPH_A(*(s)) || ((is_utf8) \
9006 ? isIDFIRST_utf8((U8*) (s)) \
9008 && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
9011 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
9013 I32 herelines = PL_parser->herelines;
9014 SSize_t bracket = -1;
9017 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9018 bool is_utf8 = cBOOL(UTF);
9019 I32 orig_copline = 0, tmp_copline = 0;
9021 PERL_ARGS_ASSERT_SCAN_IDENT;
9023 if (isSPACE(*s) || !*s)
9026 while (isDIGIT(*s)) {
9028 Perl_croak(aTHX_ "%s", ident_too_long);
9032 else { /* See if it is a "normal" identifier */
9033 parse_ident(&s, &d, e, 1, is_utf8, FALSE);
9038 /* Either a digit variable, or parse_ident() found an identifier
9039 (anything valid as a bareword), so job done and return. */
9040 if (PL_lex_state != LEX_NORMAL)
9041 PL_lex_state = LEX_INTERPENDMAYBE;
9045 /* Here, it is not a run-of-the-mill identifier name */
9047 if (*s == '$' && s[1]
9048 && (isIDFIRST_lazy_if(s+1,is_utf8)
9049 || isDIGIT_A((U8)s[1])
9052 || strEQs(s+1,"::")) )
9054 /* Dereferencing a value in a scalar variable.
9055 The alternatives are different syntaxes for a scalar variable.
9056 Using ' as a leading package separator isn't allowed. :: is. */
9059 /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */
9061 bracket = s - SvPVX(PL_linestr);
9063 orig_copline = CopLINE(PL_curcop);
9064 if (s < PL_bufend && isSPACE(*s)) {
9068 if ((s <= PL_bufend - (is_utf8)
9071 && VALID_LEN_ONE_IDENT(s, is_utf8))
9074 const STRLEN skip = UTF8SKIP(s);
9077 for ( i = 0; i < skip; i++ )
9085 /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
9086 if (*d == '^' && *s && isCONTROLVAR(*s)) {
9090 /* Warn about ambiguous code after unary operators if {...} notation isn't
9091 used. There's no difference in ambiguity; it's merely a heuristic
9092 about when not to warn. */
9093 else if (ck_uni && bracket == -1)
9095 if (bracket != -1) {
9098 /* If we were processing {...} notation then... */
9099 if (isIDFIRST_lazy_if(d,is_utf8)) {
9100 /* if it starts as a valid identifier, assume that it is one.
9101 (the later check for } being at the expected point will trap
9102 cases where this doesn't pan out.) */
9103 d += is_utf8 ? UTF8SKIP(d) : 1;
9104 parse_ident(&s, &d, e, 1, is_utf8, TRUE);
9106 tmp_copline = CopLINE(PL_curcop);
9107 if (s < PL_bufend && isSPACE(*s)) {
9110 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9111 /* ${foo[0]} and ${foo{bar}} notation. */
9112 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9113 const char * const brack =
9115 ((*s == '[') ? "[...]" : "{...}");
9116 orig_copline = CopLINE(PL_curcop);
9117 CopLINE_set(PL_curcop, tmp_copline);
9118 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9119 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9120 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9121 funny, dest, brack, funny, dest, brack);
9122 CopLINE_set(PL_curcop, orig_copline);
9125 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9126 PL_lex_allbrackets++;
9130 /* Handle extended ${^Foo} variables
9131 * 1999-02-27 mjd-perl-patch@plover.com */
9132 else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
9136 while (isWORDCHAR(*s) && d < e) {
9140 Perl_croak(aTHX_ "%s", ident_too_long);
9145 tmp_copline = CopLINE(PL_curcop);
9146 if ((skip = s < PL_bufend && isSPACE(*s)))
9147 /* Avoid incrementing line numbers or resetting PL_linestart,
9148 in case we have to back up. */
9153 /* Expect to find a closing } after consuming any trailing whitespace.
9156 /* Now increment line numbers if applicable. */
9160 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9161 PL_lex_state = LEX_INTERPEND;
9164 if (PL_lex_state == LEX_NORMAL) {
9165 if (ckWARN(WARN_AMBIGUOUS)
9166 && (keyword(dest, d - dest, 0)
9167 || get_cvn_flags(dest, d - dest, is_utf8
9171 SV *tmp = newSVpvn_flags( dest, d - dest,
9172 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
9175 orig_copline = CopLINE(PL_curcop);
9176 CopLINE_set(PL_curcop, tmp_copline);
9177 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9178 "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
9179 funny, SVfARG(tmp), funny, SVfARG(tmp));
9180 CopLINE_set(PL_curcop, orig_copline);
9185 /* Didn't find the closing } at the point we expected, so restore
9186 state such that the next thing to process is the opening { and */
9187 s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
9188 CopLINE_set(PL_curcop, orig_copline);
9189 PL_parser->herelines = herelines;
9193 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9194 PL_lex_state = LEX_INTERPEND;
9199 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
9201 /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
9202 * found in the parse starting at 's', based on the subset that are valid
9203 * in this context input to this routine in 'valid_flags'. Advances s.
9204 * Returns TRUE if the input should be treated as a valid flag, so the next
9205 * char may be as well; otherwise FALSE. 'charset' should point to a NUL
9206 * upon first call on the current regex. This routine will set it to any
9207 * charset modifier found. The caller shouldn't change it. This way,
9208 * another charset modifier encountered in the parse can be detected as an
9209 * error, as we have decided to allow only one */
9212 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9214 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9215 if (isWORDCHAR_lazy_if(*s, UTF)) {
9216 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9217 UTF ? SVf_UTF8 : 0);
9219 /* Pretend that it worked, so will continue processing before
9228 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
9229 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
9230 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
9231 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
9232 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
9233 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9234 case LOCALE_PAT_MOD:
9236 goto multiple_charsets;
9238 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9241 case UNICODE_PAT_MOD:
9243 goto multiple_charsets;
9245 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9248 case ASCII_RESTRICT_PAT_MOD:
9250 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9254 /* Error if previous modifier wasn't an 'a', but if it was, see
9255 * if, and accept, a second occurrence (only) */
9257 || get_regex_charset(*pmfl)
9258 != REGEX_ASCII_RESTRICTED_CHARSET)
9260 goto multiple_charsets;
9262 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9266 case DEPENDS_PAT_MOD:
9268 goto multiple_charsets;
9270 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9279 if (*charset != c) {
9280 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9282 else if (c == 'a') {
9283 /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
9284 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9287 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9290 /* Pretend that it worked, so will continue processing before dieing */
9296 S_scan_pat(pTHX_ char *start, I32 type)
9300 const char * const valid_flags =
9301 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9302 char charset = '\0'; /* character set modifier */
9303 unsigned int x_mod_count = 0;
9305 PERL_ARGS_ASSERT_SCAN_PAT;
9307 s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
9309 Perl_croak(aTHX_ "Search pattern not terminated");
9311 pm = (PMOP*)newPMOP(type, 0);
9312 if (PL_multi_open == '?') {
9313 /* This is the only point in the code that sets PMf_ONCE: */
9314 pm->op_pmflags |= PMf_ONCE;
9316 /* Hence it's safe to do this bit of PMOP book-keeping here, which
9317 allows us to restrict the list needed by reset to just the ??
9319 assert(type != OP_TRANS);
9321 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9324 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9327 elements = mg->mg_len / sizeof(PMOP**);
9328 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9329 ((PMOP**)mg->mg_ptr) [elements++] = pm;
9330 mg->mg_len = elements * sizeof(PMOP**);
9331 PmopSTASH_set(pm,PL_curstash);
9335 /* if qr/...(?{..}).../, then need to parse the pattern within a new
9336 * anon CV. False positives like qr/[(?{]/ are harmless */
9338 if (type == OP_QR) {
9340 char *e, *p = SvPV(PL_lex_stuff, len);
9342 for (; p < e; p++) {
9343 if (p[0] == '(' && p[1] == '?'
9344 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9346 pm->op_pmflags |= PMf_HAS_CV;
9350 pm->op_pmflags |= PMf_IS_QR;
9353 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
9354 &s, &charset, &x_mod_count))
9356 /* issue a warning if /c is specified,but /g is not */
9357 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9359 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9360 "Use of /c modifier is meaningless without /g" );
9363 if (UNLIKELY((x_mod_count) > 1)) {
9364 yyerror("Only one /x regex modifier is allowed");
9367 PL_lex_op = (OP*)pm;
9368 pl_yylval.ival = OP_MATCH;
9373 S_scan_subst(pTHX_ char *start)
9379 line_t linediff = 0;
9381 char charset = '\0'; /* character set modifier */
9382 unsigned int x_mod_count = 0;
9385 PERL_ARGS_ASSERT_SCAN_SUBST;
9387 pl_yylval.ival = OP_NULL;
9389 s = scan_str(start, TRUE, FALSE, FALSE, &t);
9392 Perl_croak(aTHX_ "Substitution pattern not terminated");
9396 first_start = PL_multi_start;
9397 first_line = CopLINE(PL_curcop);
9398 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9400 SvREFCNT_dec_NN(PL_lex_stuff);
9401 PL_lex_stuff = NULL;
9402 Perl_croak(aTHX_ "Substitution replacement not terminated");
9404 PL_multi_start = first_start; /* so whole substitution is taken together */
9406 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9410 if (*s == EXEC_PAT_MOD) {
9414 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
9415 &s, &charset, &x_mod_count))
9421 if (UNLIKELY((x_mod_count) > 1)) {
9422 yyerror("Only one /x regex modifier is allowed");
9425 if ((pm->op_pmflags & PMf_CONTINUE)) {
9426 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9430 SV * const repl = newSVpvs("");
9433 pm->op_pmflags |= PMf_EVAL;
9436 sv_catpvs(repl, "eval ");
9438 sv_catpvs(repl, "do ");
9440 sv_catpvs(repl, "{");
9441 sv_catsv(repl, PL_parser->lex_sub_repl);
9442 sv_catpvs(repl, "}");
9443 SvREFCNT_dec(PL_parser->lex_sub_repl);
9444 PL_parser->lex_sub_repl = repl;
9449 linediff = CopLINE(PL_curcop) - first_line;
9451 CopLINE_set(PL_curcop, first_line);
9453 if (linediff || es) {
9454 /* the IVX field indicates that the replacement string is a s///e;
9455 * the NVX field indicates how many src code lines the replacement
9457 sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
9458 ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = 0;
9459 ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen = es;
9462 PL_lex_op = (OP*)pm;
9463 pl_yylval.ival = OP_SUBST;
9468 S_scan_trans(pTHX_ char *start)
9475 bool nondestruct = 0;
9478 PERL_ARGS_ASSERT_SCAN_TRANS;
9480 pl_yylval.ival = OP_NULL;
9482 s = scan_str(start,FALSE,FALSE,FALSE,&t);
9484 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9488 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9490 SvREFCNT_dec_NN(PL_lex_stuff);
9491 PL_lex_stuff = NULL;
9492 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9495 complement = del = squash = 0;
9499 complement = OPpTRANS_COMPLEMENT;
9502 del = OPpTRANS_DELETE;
9505 squash = OPpTRANS_SQUASH;
9517 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9518 o->op_private &= ~OPpTRANS_ALL;
9519 o->op_private |= del|squash|complement|
9520 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9521 (DO_UTF8(PL_parser->lex_sub_repl) ? OPpTRANS_TO_UTF : 0);
9524 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9531 Takes a pointer to the first < in <<FOO.
9532 Returns a pointer to the byte following <<FOO.
9534 This function scans a heredoc, which involves different methods
9535 depending on whether we are in a string eval, quoted construct, etc.
9536 This is because PL_linestr could containing a single line of input, or
9537 a whole string being evalled, or the contents of the current quote-
9540 The two basic methods are:
9541 - Steal lines from the input stream
9542 - Scan the heredoc in PL_linestr and remove it therefrom
9544 In a file scope or filtered eval, the first method is used; in a
9545 string eval, the second.
9547 In a quote-like operator, we have to choose between the two,
9548 depending on where we can find a newline. We peek into outer lex-
9549 ing scopes until we find one with a newline in it. If we reach the
9550 outermost lexing scope and it is a file, we use the stream method.
9551 Otherwise it is treated as an eval.
9555 S_scan_heredoc(pTHX_ char *s)
9557 I32 op_type = OP_SCALAR;
9566 bool indented = FALSE;
9567 const bool infile = PL_rsfp || PL_parser->filtered;
9568 const line_t origline = CopLINE(PL_curcop);
9569 LEXSHARED *shared = PL_parser->lex_shared;
9571 PERL_ARGS_ASSERT_SCAN_HEREDOC;
9574 d = PL_tokenbuf + 1;
9575 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9576 *PL_tokenbuf = '\n';
9582 while (SPACE_OR_TAB(*peek))
9584 if (*peek == '`' || *peek == '\'' || *peek =='"') {
9587 s = delimcpy(d, e, s, PL_bufend, term, &len);
9589 Perl_croak(aTHX_ "Unterminated delimiter for here document");
9595 /* <<\FOO is equivalent to <<'FOO' */
9599 if (!isWORDCHAR_lazy_if(s,UTF))
9600 deprecate("bare << to mean <<\"\"");
9602 while (isWORDCHAR_lazy_if(peek,UTF)) {
9603 peek += UTF ? UTF8SKIP(peek) : 1;
9605 len = (peek - s >= e - d) ? (e - d) : (peek - s);
9606 Copy(s, d, len, char);
9610 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9611 Perl_croak(aTHX_ "Delimiter for here document is too long");
9614 len = d - PL_tokenbuf;
9616 #ifndef PERL_STRICT_CR
9617 d = strchr(s, '\r');
9619 char * const olds = s;
9621 while (s < PL_bufend) {
9627 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9636 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9641 tmpstr = newSV_type(SVt_PVIV);
9645 SvIV_set(tmpstr, -1);
9647 else if (term == '`') {
9648 op_type = OP_BACKTICK;
9649 SvIV_set(tmpstr, '\\');
9652 PL_multi_start = origline + 1 + PL_parser->herelines;
9653 PL_multi_open = PL_multi_close = '<';
9654 /* inside a string eval or quote-like operator */
9655 if (!infile || PL_lex_inwhat) {
9658 char * const olds = s;
9659 PERL_CONTEXT * const cx = CX_CUR();
9660 /* These two fields are not set until an inner lexing scope is
9661 entered. But we need them set here. */
9662 shared->ls_bufptr = s;
9663 shared->ls_linestr = PL_linestr;
9665 /* Look for a newline. If the current buffer does not have one,
9666 peek into the line buffer of the parent lexing scope, going
9667 up as many levels as necessary to find one with a newline
9670 while (!(s = (char *)memchr(
9671 (void *)shared->ls_bufptr, '\n',
9672 SvEND(shared->ls_linestr)-shared->ls_bufptr
9674 shared = shared->ls_prev;
9675 /* shared is only null if we have gone beyond the outermost
9676 lexing scope. In a file, we will have broken out of the
9677 loop in the previous iteration. In an eval, the string buf-
9678 fer ends with "\n;", so the while condition above will have
9679 evaluated to false. So shared can never be null. Or so you
9680 might think. Odd syntax errors like s;@{<<; can gobble up
9681 the implicit semicolon at the end of a flie, causing the
9682 file handle to be closed even when we are not in a string
9683 eval. So shared may be null in that case.
9684 (Closing '}' here to balance the earlier open brace for
9685 editors that look for matched pairs.) */
9686 if (UNLIKELY(!shared))
9688 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
9689 most lexing scope. In a file, shared->ls_linestr at that
9690 level is just one line, so there is no body to steal. */
9691 if (infile && !shared->ls_prev) {
9696 else { /* eval or we've already hit EOF */
9697 s = (char*)memchr((void*)s, '\n', PL_bufend - s);
9701 linestr = shared->ls_linestr;
9702 bufend = SvEND(linestr);
9707 while (s < bufend - len + 1) {
9709 ++PL_parser->herelines;
9711 if (memEQ(s, PL_tokenbuf + 1, len - 1)) {
9715 /* Only valid if it's preceded by whitespace only */
9716 while (backup != myolds && --backup >= myolds) {
9717 if (*backup != ' ' && *backup != '\t') {
9724 /* No whitespace or all! */
9725 if (backup == s || *backup == '\n') {
9726 Newxz(indent, indent_len + 1, char);
9727 memcpy(indent, backup + 1, indent_len);
9728 s--; /* before our delimiter */
9729 PL_parser->herelines--; /* this line doesn't count */
9735 while (s < bufend - len + 1
9736 && memNE(s,PL_tokenbuf,len) )
9739 ++PL_parser->herelines;
9743 if (s >= bufend - len + 1) {
9746 sv_setpvn(tmpstr,d+1,s-d);
9748 /* the preceding stmt passes a newline */
9749 PL_parser->herelines++;
9751 /* s now points to the newline after the heredoc terminator.
9752 d points to the newline before the body of the heredoc.
9755 /* We are going to modify linestr in place here, so set
9756 aside copies of the string if necessary for re-evals or
9758 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
9759 check shared->re_eval_str. */
9760 if (shared->re_eval_start || shared->re_eval_str) {
9761 /* Set aside the rest of the regexp */
9762 if (!shared->re_eval_str)
9763 shared->re_eval_str =
9764 newSVpvn(shared->re_eval_start,
9765 bufend - shared->re_eval_start);
9766 shared->re_eval_start -= s-d;
9769 && CxTYPE(cx) == CXt_EVAL
9770 && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
9771 && cx->blk_eval.cur_text == linestr)
9773 cx->blk_eval.cur_text = newSVsv(linestr);
9774 cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
9776 /* Copy everything from s onwards back to d. */
9777 Move(s,d,bufend-s + 1,char);
9778 SvCUR_set(linestr, SvCUR(linestr) - (s-d));
9779 /* Setting PL_bufend only applies when we have not dug deeper
9780 into other scopes, because sublex_done sets PL_bufend to
9781 SvEND(PL_linestr). */
9782 if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
9788 char *oldbufptr_save;
9789 char *oldoldbufptr_save;
9791 SvPVCLEAR(tmpstr); /* avoid "uninitialized" warning */
9792 term = PL_tokenbuf[1];
9794 linestr_save = PL_linestr; /* must restore this afterwards */
9795 d = s; /* and this */
9796 oldbufptr_save = PL_oldbufptr;
9797 oldoldbufptr_save = PL_oldoldbufptr;
9798 PL_linestr = newSVpvs("");
9799 PL_bufend = SvPVX(PL_linestr);
9801 PL_bufptr = PL_bufend;
9802 CopLINE_set(PL_curcop,
9803 origline + 1 + PL_parser->herelines);
9804 if (!lex_next_chunk(LEX_NO_TERM)
9805 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
9806 /* Simply freeing linestr_save might seem simpler here, as it
9807 does not matter what PL_linestr points to, since we are
9808 about to croak; but in a quote-like op, linestr_save
9809 will have been prospectively freed already, via
9810 SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
9811 restore PL_linestr. */
9812 SvREFCNT_dec_NN(PL_linestr);
9813 PL_linestr = linestr_save;
9814 PL_oldbufptr = oldbufptr_save;
9815 PL_oldoldbufptr = oldoldbufptr_save;
9818 CopLINE_set(PL_curcop, origline);
9819 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
9820 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
9821 /* ^That should be enough to avoid this needing to grow: */
9822 sv_catpvs(PL_linestr, "\n\0");
9823 assert(s == SvPVX(PL_linestr));
9824 PL_bufend = SvEND(PL_linestr);
9827 PL_parser->herelines++;
9828 PL_last_lop = PL_last_uni = NULL;
9829 #ifndef PERL_STRICT_CR
9830 if (PL_bufend - PL_linestart >= 2) {
9831 if ( (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
9832 || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9834 PL_bufend[-2] = '\n';
9836 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9838 else if (PL_bufend[-1] == '\r')
9839 PL_bufend[-1] = '\n';
9841 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9842 PL_bufend[-1] = '\n';
9844 if (indented && (PL_bufend-s) >= len) {
9845 char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
9848 char *backup = found;
9851 /* Only valid if it's preceded by whitespace only */
9852 while (backup != s && --backup >= s) {
9853 if (*backup != ' ' && *backup != '\t') {
9859 /* All whitespace or none! */
9860 if (backup == found || *backup == ' ' || *backup == '\t') {
9861 Newxz(indent, indent_len + 1, char);
9862 memcpy(indent, backup, indent_len);
9863 SvREFCNT_dec(PL_linestr);
9864 PL_linestr = linestr_save;
9865 PL_linestart = SvPVX(linestr_save);
9866 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9867 PL_oldbufptr = oldbufptr_save;
9868 PL_oldoldbufptr = oldoldbufptr_save;
9874 /* Didn't find it */
9875 sv_catsv(tmpstr,PL_linestr);
9877 if (*s == term && PL_bufend-s >= len
9878 && memEQ(s,PL_tokenbuf + 1,len))
9880 SvREFCNT_dec(PL_linestr);
9881 PL_linestr = linestr_save;
9882 PL_linestart = SvPVX(linestr_save);
9883 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9884 PL_oldbufptr = oldbufptr_save;
9885 PL_oldoldbufptr = oldoldbufptr_save;
9889 sv_catsv(tmpstr,PL_linestr);
9894 PL_multi_end = origline + PL_parser->herelines;
9895 if (indented && indent) {
9896 STRLEN linecount = 1;
9897 STRLEN herelen = SvCUR(tmpstr);
9898 char *ss = SvPVX(tmpstr);
9899 char *se = ss + herelen;
9900 SV *newstr = newSV(herelen+1);
9903 /* Trim leading whitespace */
9905 /* newline only? Copy and move on */
9907 sv_catpv(newstr,"\n");
9911 /* Found our indentation? Strip it */
9912 } else if (se - ss >= indent_len
9913 && memEQ(ss, indent, indent_len))
9919 while ((ss + le) < se && *(ss + le) != '\n')
9922 sv_catpvn(newstr, ss, le);
9926 /* Line doesn't begin with our indentation? Croak */
9929 "Indentation on line %d of here-doc doesn't match delimiter",
9934 /* avoid sv_setsv() as we dont wan't to COW here */
9935 sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
9937 SvREFCNT_dec_NN(newstr);
9939 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9940 SvPV_shrink_to_cur(tmpstr);
9943 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9946 PL_lex_stuff = tmpstr;
9947 pl_yylval.ival = op_type;
9951 SvREFCNT_dec(tmpstr);
9952 CopLINE_set(PL_curcop, origline);
9953 missingterm(PL_tokenbuf + 1);
9957 takes: position of first '<' in input buffer
9958 returns: position of first char following the matching '>' in
9960 side-effects: pl_yylval and lex_op are set.
9965 <<>> read from ARGV without magic open
9966 <FH> read from filehandle
9967 <pkg::FH> read from package qualified filehandle
9968 <pkg'FH> read from package qualified filehandle
9969 <$fh> read from filehandle in $fh
9975 S_scan_inputsymbol(pTHX_ char *start)
9977 char *s = start; /* current position in buffer */
9980 bool nomagicopen = FALSE;
9981 char *d = PL_tokenbuf; /* start of temp holding space */
9982 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
9984 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9986 end = strchr(s, '\n');
9989 if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
9996 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
9998 /* die if we didn't have space for the contents of the <>,
9999 or if it didn't end, or if we see a newline
10002 if (len >= (I32)sizeof PL_tokenbuf)
10003 Perl_croak(aTHX_ "Excessively long <> operator");
10005 Perl_croak(aTHX_ "Unterminated <> operator");
10010 Remember, only scalar variables are interpreted as filehandles by
10011 this code. Anything more complex (e.g., <$fh{$num}>) will be
10012 treated as a glob() call.
10013 This code makes use of the fact that except for the $ at the front,
10014 a scalar variable and a filehandle look the same.
10016 if (*d == '$' && d[1]) d++;
10018 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10019 while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
10020 d += UTF ? UTF8SKIP(d) : 1;
10022 /* If we've tried to read what we allow filehandles to look like, and
10023 there's still text left, then it must be a glob() and not a getline.
10024 Use scan_str to pull out the stuff between the <> and treat it
10025 as nothing more than a string.
10028 if (d - PL_tokenbuf != len) {
10029 pl_yylval.ival = OP_GLOB;
10030 s = scan_str(start,FALSE,FALSE,FALSE,NULL);
10032 Perl_croak(aTHX_ "Glob not terminated");
10036 bool readline_overriden = FALSE;
10038 /* we're in a filehandle read situation */
10041 /* turn <> into <ARGV> */
10043 Copy("ARGV",d,5,char);
10045 /* Check whether readline() is overriden */
10046 if ((gv_readline = gv_override("readline",8)))
10047 readline_overriden = TRUE;
10049 /* if <$fh>, create the ops to turn the variable into a
10053 /* try to find it in the pad for this block, otherwise find
10054 add symbol table ops
10056 const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
10057 if (tmp != NOT_IN_PAD) {
10058 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10059 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10060 HEK * const stashname = HvNAME_HEK(stash);
10061 SV * const sym = sv_2mortal(newSVhek(stashname));
10062 sv_catpvs(sym, "::");
10063 sv_catpv(sym, d+1);
10068 OP * const o = newOP(OP_PADSV, 0);
10070 PL_lex_op = readline_overriden
10071 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
10072 op_append_elem(OP_LIST, o,
10073 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10074 : newUNOP(OP_READLINE, 0, o);
10082 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
10084 PL_lex_op = readline_overriden
10085 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
10086 op_append_elem(OP_LIST,
10087 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10088 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10089 : newUNOP(OP_READLINE, 0,
10090 newUNOP(OP_RV2SV, 0,
10091 newGVOP(OP_GV, 0, gv)));
10093 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10094 pl_yylval.ival = OP_NULL;
10097 /* If it's none of the above, it must be a literal filehandle
10098 (<Foo::BAR> or <FOO>) so build a simple readline OP */
10100 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
10101 PL_lex_op = readline_overriden
10102 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
10103 op_append_elem(OP_LIST,
10104 newGVOP(OP_GV, 0, gv),
10105 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10106 : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
10107 pl_yylval.ival = OP_NULL;
10117 start position in buffer
10118 keep_bracketed_quoted preserve \ quoting of embedded delimiters, but
10119 only if they are of the open/close form
10120 keep_delims preserve the delimiters around the string
10121 re_reparse compiling a run-time /(?{})/:
10122 collapse // to /, and skip encoding src
10123 delimp if non-null, this is set to the position of
10124 the closing delimiter, or just after it if
10125 the closing and opening delimiters differ
10126 (i.e., the opening delimiter of a substitu-
10128 returns: position to continue reading from buffer
10129 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10130 updates the read buffer.
10132 This subroutine pulls a string out of the input. It is called for:
10133 q single quotes q(literal text)
10134 ' single quotes 'literal text'
10135 qq double quotes qq(interpolate $here please)
10136 " double quotes "interpolate $here please"
10137 qx backticks qx(/bin/ls -l)
10138 ` backticks `/bin/ls -l`
10139 qw quote words @EXPORT_OK = qw( func() $spam )
10140 m// regexp match m/this/
10141 s/// regexp substitute s/this/that/
10142 tr/// string transliterate tr/this/that/
10143 y/// string transliterate y/this/that/
10144 ($*@) sub prototypes sub foo ($)
10145 (stuff) sub attr parameters sub foo : attr(stuff)
10146 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
10148 In most of these cases (all but <>, patterns and transliterate)
10149 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
10150 calls scan_str(). s/// makes yylex() call scan_subst() which calls
10151 scan_str(). tr/// and y/// make yylex() call scan_trans() which
10154 It skips whitespace before the string starts, and treats the first
10155 character as the delimiter. If the delimiter is one of ([{< then
10156 the corresponding "close" character )]}> is used as the closing
10157 delimiter. It allows quoting of delimiters, and if the string has
10158 balanced delimiters ([{<>}]) it allows nesting.
10160 On success, the SV with the resulting string is put into lex_stuff or,
10161 if that is already non-NULL, into lex_repl. The second case occurs only
10162 when parsing the RHS of the special constructs s/// and tr/// (y///).
10163 For convenience, the terminating delimiter character is stuffed into
10168 S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
10172 SV *sv; /* scalar value: string */
10173 const char *tmps; /* temp string, used for delimiter matching */
10174 char *s = start; /* current position in the buffer */
10175 char term; /* terminating character */
10176 char *to; /* current position in the sv's data */
10177 I32 brackets = 1; /* bracket nesting level */
10178 bool has_utf8 = FALSE; /* is there any utf8 content? */
10179 IV termcode; /* terminating char. code */
10180 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
10181 STRLEN termlen; /* length of terminating string */
10184 PERL_ARGS_ASSERT_SCAN_STR;
10186 /* skip space before the delimiter */
10191 /* mark where we are, in case we need to report errors */
10194 /* after skipping whitespace, the next character is the terminator */
10197 termcode = termstr[0] = term;
10201 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10202 Copy(s, termstr, termlen, U8);
10203 if (!UTF8_IS_INVARIANT(term))
10207 /* mark where we are */
10208 PL_multi_start = CopLINE(PL_curcop);
10209 PL_multi_open = termcode;
10210 herelines = PL_parser->herelines;
10212 /* find corresponding closing delimiter */
10213 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10214 termcode = termstr[0] = term = tmps[5];
10216 PL_multi_close = termcode;
10218 if (PL_multi_open == PL_multi_close) {
10219 keep_bracketed_quoted = FALSE;
10222 /* create a new SV to hold the contents. 79 is the SV's initial length.
10223 What a random number. */
10224 sv = newSV_type(SVt_PVIV);
10226 SvIV_set(sv, termcode);
10227 (void)SvPOK_only(sv); /* validate pointer */
10229 /* move past delimiter and try to read a complete string */
10231 sv_catpvn(sv, s, termlen);
10234 /* extend sv if need be */
10235 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10236 /* set 'to' to the next character in the sv's string */
10237 to = SvPVX(sv)+SvCUR(sv);
10239 /* if open delimiter is the close delimiter read unbridle */
10240 if (PL_multi_open == PL_multi_close) {
10241 for (; s < PL_bufend; s++,to++) {
10242 /* embedded newlines increment the current line number */
10243 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10244 COPLINE_INC_WITH_HERELINES;
10245 /* handle quoted delimiters */
10246 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10247 if (!keep_bracketed_quoted
10249 || (re_reparse && s[1] == '\\'))
10252 else /* any other quotes are simply copied straight through */
10255 /* terminate when run out of buffer (the for() condition), or
10256 have found the terminator */
10257 else if (*s == term) {
10260 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10263 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10269 /* if the terminator isn't the same as the start character (e.g.,
10270 matched brackets), we have to allow more in the quoting, and
10271 be prepared for nested brackets.
10274 /* read until we run out of string, or we find the terminator */
10275 for (; s < PL_bufend; s++,to++) {
10276 /* embedded newlines increment the line count */
10277 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10278 COPLINE_INC_WITH_HERELINES;
10279 /* backslashes can escape the open or closing characters */
10280 if (*s == '\\' && s+1 < PL_bufend) {
10281 if (!keep_bracketed_quoted
10282 && ( ((UV)s[1] == PL_multi_open)
10283 || ((UV)s[1] == PL_multi_close) ))
10290 /* allow nested opens and closes */
10291 else if ((UV)*s == PL_multi_close && --brackets <= 0)
10293 else if ((UV)*s == PL_multi_open)
10295 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10300 /* terminate the copied string and update the sv's end-of-string */
10302 SvCUR_set(sv, to - SvPVX_const(sv));
10305 * this next chunk reads more into the buffer if we're not done yet
10309 break; /* handle case where we are done yet :-) */
10311 #ifndef PERL_STRICT_CR
10312 if (to - SvPVX_const(sv) >= 2) {
10313 if ( (to[-2] == '\r' && to[-1] == '\n')
10314 || (to[-2] == '\n' && to[-1] == '\r'))
10318 SvCUR_set(sv, to - SvPVX_const(sv));
10320 else if (to[-1] == '\r')
10323 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10327 /* if we're out of file, or a read fails, bail and reset the current
10328 line marker so we can report where the unterminated string began
10330 COPLINE_INC_WITH_HERELINES;
10331 PL_bufptr = PL_bufend;
10332 if (!lex_next_chunk(0)) {
10334 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10340 /* at this point, we have successfully read the delimited string */
10343 sv_catpvn(sv, s, termlen);
10349 PL_multi_end = CopLINE(PL_curcop);
10350 CopLINE_set(PL_curcop, PL_multi_start);
10351 PL_parser->herelines = herelines;
10353 /* if we allocated too much space, give some back */
10354 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10355 SvLEN_set(sv, SvCUR(sv) + 1);
10356 SvPV_renew(sv, SvLEN(sv));
10359 /* decide whether this is the first or second quoted string we've read
10364 PL_parser->lex_sub_repl = sv;
10367 if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
10373 takes: pointer to position in buffer
10374 returns: pointer to new position in buffer
10375 side-effects: builds ops for the constant in pl_yylval.op
10377 Read a number in any of the formats that Perl accepts:
10379 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10380 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
10381 0b[01](_?[01])* binary integers
10382 0[0-7](_?[0-7])* octal integers
10383 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* hexadecimal integers
10384 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+ hexadecimal floats
10386 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10389 If it reads a number without a decimal point or an exponent, it will
10390 try converting the number to an integer and see if it can do so
10391 without loss of precision.
10395 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10397 const char *s = start; /* current position in buffer */
10398 char *d; /* destination in temp buffer */
10399 char *e; /* end of temp buffer */
10400 NV nv; /* number read, as a double */
10401 SV *sv = NULL; /* place to put the converted number */
10402 bool floatit; /* boolean: int or float? */
10403 const char *lastub = NULL; /* position of last underbar */
10404 static const char* const number_too_long = "Number too long";
10405 /* Hexadecimal floating point.
10407 * In many places (where we have quads and NV is IEEE 754 double)
10408 * we can fit the mantissa bits of a NV into an unsigned quad.
10409 * (Note that UVs might not be quads even when we have quads.)
10410 * This will not work everywhere, though (either no quads, or
10411 * using long doubles), in which case we have to resort to NV,
10412 * which will probably mean horrible loss of precision due to
10413 * multiple fp operations. */
10414 bool hexfp = FALSE;
10415 int total_bits = 0;
10416 int significant_bits = 0;
10417 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
10418 # define HEXFP_UQUAD
10419 Uquad_t hexfp_uquad = 0;
10420 int hexfp_frac_bits = 0;
10425 NV hexfp_mult = 1.0;
10426 UV high_non_zero = 0; /* highest digit */
10427 int non_zero_integer_digits = 0;
10429 PERL_ARGS_ASSERT_SCAN_NUM;
10431 /* We use the first character to decide what type of number this is */
10435 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
10437 /* if it starts with a 0, it could be an octal number, a decimal in
10438 0.13 disguise, or a hexadecimal number, or a binary number. */
10442 u holds the "number so far"
10443 shift the power of 2 of the base
10444 (hex == 4, octal == 3, binary == 1)
10445 overflowed was the number more than we can hold?
10447 Shift is used when we add a digit. It also serves as an "are
10448 we in octal/hex/binary?" indicator to disallow hex characters
10449 when in octal mode.
10454 bool overflowed = FALSE;
10455 bool just_zero = TRUE; /* just plain 0 or binary number? */
10456 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10457 static const char* const bases[5] =
10458 { "", "binary", "", "octal", "hexadecimal" };
10459 static const char* const Bases[5] =
10460 { "", "Binary", "", "Octal", "Hexadecimal" };
10461 static const char* const maxima[5] =
10463 "0b11111111111111111111111111111111",
10467 const char *base, *Base, *max;
10469 /* check for hex */
10470 if (isALPHA_FOLD_EQ(s[1], 'x')) {
10474 } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
10479 /* check for a decimal in disguise */
10480 else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
10482 /* so it must be octal */
10489 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10490 "Misplaced _ in number");
10494 base = bases[shift];
10495 Base = Bases[shift];
10496 max = maxima[shift];
10498 /* read the rest of the number */
10500 /* x is used in the overflow test,
10501 b is the digit we're adding on. */
10506 /* if we don't mention it, we're done */
10510 /* _ are ignored -- but warned about if consecutive */
10512 if (lastub && s == lastub + 1)
10513 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10514 "Misplaced _ in number");
10518 /* 8 and 9 are not octal */
10519 case '8': case '9':
10521 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10525 case '2': case '3': case '4':
10526 case '5': case '6': case '7':
10528 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10531 case '0': case '1':
10532 b = *s++ & 15; /* ASCII digit -> value of digit */
10536 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10537 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10538 /* make sure they said 0x */
10541 b = (*s++ & 7) + 9;
10543 /* Prepare to put the digit we have onto the end
10544 of the number so far. We check for overflows.
10550 x = u << shift; /* make room for the digit */
10552 total_bits += shift;
10554 if ((x >> shift) != u
10555 && !(PL_hints & HINT_NEW_BINARY)) {
10558 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10559 "Integer overflow in %s number",
10562 u = x | b; /* add the digit to the end */
10565 n *= nvshift[shift];
10566 /* If an NV has not enough bits in its
10567 * mantissa to represent an UV this summing of
10568 * small low-order numbers is a waste of time
10569 * (because the NV cannot preserve the
10570 * low-order bits anyway): we could just
10571 * remember when did we overflow and in the
10572 * end just multiply n by the right
10577 if (high_non_zero == 0 && b > 0)
10581 non_zero_integer_digits++;
10583 /* this could be hexfp, but peek ahead
10584 * to avoid matching ".." */
10585 if (UNLIKELY(HEXFP_PEEK(s))) {
10593 /* if we get here, we had success: make a scalar value from
10598 /* final misplaced underbar check */
10599 if (s[-1] == '_') {
10600 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10603 if (UNLIKELY(HEXFP_PEEK(s))) {
10604 /* Do sloppy (on the underbars) but quick detection
10605 * (and value construction) for hexfp, the decimal
10606 * detection will shortly be more thorough with the
10607 * underbar checks. */
10609 significant_bits = non_zero_integer_digits * shift;
10612 #else /* HEXFP_NV */
10615 /* Ignore the leading zero bits of
10616 * the high (first) non-zero digit. */
10617 if (high_non_zero) {
10618 if (high_non_zero < 0x8)
10619 significant_bits--;
10620 if (high_non_zero < 0x4)
10621 significant_bits--;
10622 if (high_non_zero < 0x2)
10623 significant_bits--;
10630 bool accumulate = TRUE;
10631 for (h++; (isXDIGIT(*h) || *h == '_'); h++) {
10632 if (isXDIGIT(*h)) {
10633 U8 b = XDIGIT_VALUE(*h);
10634 significant_bits += shift;
10637 if (significant_bits < NV_MANT_DIG) {
10638 /* We are in the long "run" of xdigits,
10639 * accumulate the full four bits. */
10640 hexfp_uquad <<= shift;
10642 hexfp_frac_bits += shift;
10644 /* We are at a hexdigit either at,
10645 * or straddling, the edge of mantissa.
10646 * We will try grabbing as many as
10647 * possible bits. */
10649 significant_bits - NV_MANT_DIG;
10652 hexfp_uquad <<= tail;
10653 hexfp_uquad |= b >> (shift - tail);
10654 hexfp_frac_bits += tail;
10656 /* Ignore the trailing zero bits
10657 * of the last non-zero xdigit.
10659 * The assumption here is that if
10660 * one has input of e.g. the xdigit
10661 * eight (0x8), there is only one
10662 * bit being input, not the full
10663 * four bits. Conversely, if one
10664 * specifies a zero xdigit, the
10665 * assumption is that one really
10666 * wants all those bits to be zero. */
10668 if ((b & 0x1) == 0x0) {
10669 significant_bits--;
10670 if ((b & 0x2) == 0x0) {
10671 significant_bits--;
10672 if ((b & 0x4) == 0x0) {
10673 significant_bits--;
10679 accumulate = FALSE;
10682 /* Keep skipping the xdigits, and
10683 * accumulating the significant bits,
10684 * but do not shift the uquad
10685 * (which would catastrophically drop
10686 * high-order bits) or accumulate the
10687 * xdigits anymore. */
10689 #else /* HEXFP_NV */
10693 hexfp_nv += b * nv_mult;
10695 accumulate = FALSE;
10699 if (significant_bits >= NV_MANT_DIG)
10700 accumulate = FALSE;
10704 if ((total_bits > 0 || significant_bits > 0) &&
10705 isALPHA_FOLD_EQ(*h, 'p')) {
10706 bool negexp = FALSE;
10710 else if (*h == '-') {
10716 while (isDIGIT(*h) || *h == '_') {
10719 hexfp_exp += *h - '0';
10722 && -hexfp_exp < NV_MIN_EXP - 1) {
10723 /* NOTE: this means that the exponent
10724 * underflow warning happens for
10725 * the IEEE 754 subnormals (denormals),
10726 * because DBL_MIN_EXP etc are the lowest
10727 * possible binary (or, rather, DBL_RADIX-base)
10728 * exponent for normals, not subnormals.
10730 * This may or may not be a good thing. */
10731 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10732 "Hexadecimal float: exponent underflow");
10738 && hexfp_exp > NV_MAX_EXP - 1) {
10739 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10740 "Hexadecimal float: exponent overflow");
10748 hexfp_exp = -hexfp_exp;
10750 hexfp_exp -= hexfp_frac_bits;
10752 hexfp_mult = Perl_pow(2.0, hexfp_exp);
10760 if (n > 4294967295.0)
10761 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10762 "%s number > %s non-portable",
10768 if (u > 0xffffffff)
10769 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10770 "%s number > %s non-portable",
10775 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10776 sv = new_constant(start, s - start, "integer",
10777 sv, NULL, NULL, 0);
10778 else if (PL_hints & HINT_NEW_BINARY)
10779 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10784 handle decimal numbers.
10785 we're also sent here when we read a 0 as the first digit
10787 case '1': case '2': case '3': case '4': case '5':
10788 case '6': case '7': case '8': case '9': case '.':
10791 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10800 /* read next group of digits and _ and copy into d */
10803 || UNLIKELY(hexfp && isXDIGIT(*s)))
10805 /* skip underscores, checking for misplaced ones
10809 if (lastub && s == lastub + 1)
10810 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10811 "Misplaced _ in number");
10815 /* check for end of fixed-length buffer */
10817 Perl_croak(aTHX_ "%s", number_too_long);
10818 /* if we're ok, copy the character */
10823 /* final misplaced underbar check */
10824 if (lastub && s == lastub + 1) {
10825 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10828 /* read a decimal portion if there is one. avoid
10829 3..5 being interpreted as the number 3. followed
10832 if (*s == '.' && s[1] != '.') {
10837 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10838 "Misplaced _ in number");
10842 /* copy, ignoring underbars, until we run out of digits.
10846 || UNLIKELY(hexfp && isXDIGIT(*s));
10849 /* fixed length buffer check */
10851 Perl_croak(aTHX_ "%s", number_too_long);
10853 if (lastub && s == lastub + 1)
10854 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10855 "Misplaced _ in number");
10861 /* fractional part ending in underbar? */
10862 if (s[-1] == '_') {
10863 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10864 "Misplaced _ in number");
10866 if (*s == '.' && isDIGIT(s[1])) {
10867 /* oops, it's really a v-string, but without the "v" */
10873 /* read exponent part, if present */
10874 if ((isALPHA_FOLD_EQ(*s, 'e')
10875 || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
10876 && strchr("+-0123456789_", s[1]))
10880 /* regardless of whether user said 3E5 or 3e5, use lower 'e',
10881 ditto for p (hexfloats) */
10882 if ((isALPHA_FOLD_EQ(*s, 'e'))) {
10883 /* At least some Mach atof()s don't grok 'E' */
10886 else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
10893 /* stray preinitial _ */
10895 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10896 "Misplaced _ in number");
10900 /* allow positive or negative exponent */
10901 if (*s == '+' || *s == '-')
10904 /* stray initial _ */
10906 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10907 "Misplaced _ in number");
10911 /* read digits of exponent */
10912 while (isDIGIT(*s) || *s == '_') {
10915 Perl_croak(aTHX_ "%s", number_too_long);
10919 if (((lastub && s == lastub + 1)
10920 || (!isDIGIT(s[1]) && s[1] != '_')))
10921 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10922 "Misplaced _ in number");
10930 We try to do an integer conversion first if no characters
10931 indicating "float" have been found.
10936 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10938 if (flags == IS_NUMBER_IN_UV) {
10940 sv = newSViv(uv); /* Prefer IVs over UVs. */
10943 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10944 if (uv <= (UV) IV_MIN)
10945 sv = newSViv(-(IV)uv);
10952 STORE_LC_NUMERIC_UNDERLYING_SET_STANDARD();
10953 /* terminate the string */
10955 if (UNLIKELY(hexfp)) {
10956 # ifdef NV_MANT_DIG
10957 if (significant_bits > NV_MANT_DIG)
10958 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10959 "Hexadecimal float: mantissa overflow");
10962 nv = hexfp_uquad * hexfp_mult;
10963 #else /* HEXFP_NV */
10964 nv = hexfp_nv * hexfp_mult;
10967 nv = Atof(PL_tokenbuf);
10969 RESTORE_LC_NUMERIC_UNDERLYING();
10974 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10975 const char *const key = floatit ? "float" : "integer";
10976 const STRLEN keylen = floatit ? 5 : 7;
10977 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10978 key, keylen, sv, NULL, NULL, 0);
10982 /* if it starts with a v, it could be a v-string */
10985 sv = newSV(5); /* preallocate storage space */
10986 ENTER_with_name("scan_vstring");
10988 s = scan_vstring(s, PL_bufend, sv);
10989 SvREFCNT_inc_simple_void_NN(sv);
10990 LEAVE_with_name("scan_vstring");
10994 /* make the op for the constant and return */
10997 lvalp->opval = newSVOP(OP_CONST, 0, sv);
10999 lvalp->opval = NULL;
11005 S_scan_formline(pTHX_ char *s)
11009 SV * const stuff = newSVpvs("");
11010 bool needargs = FALSE;
11011 bool eofmt = FALSE;
11013 PERL_ARGS_ASSERT_SCAN_FORMLINE;
11015 while (!needargs) {
11018 #ifdef PERL_STRICT_CR
11019 while (SPACE_OR_TAB(*t))
11022 while (SPACE_OR_TAB(*t) || *t == '\r')
11025 if (*t == '\n' || t == PL_bufend) {
11030 eol = (char *) memchr(s,'\n',PL_bufend-s);
11034 for (t = s; t < eol; t++) {
11035 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
11037 goto enough; /* ~~ must be first line in formline */
11039 if (*t == '@' || *t == '^')
11043 sv_catpvn(stuff, s, eol-s);
11044 #ifndef PERL_STRICT_CR
11045 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
11046 char *end = SvPVX(stuff) + SvCUR(stuff);
11049 SvCUR_set(stuff, SvCUR(stuff) - 1);
11057 if ((PL_rsfp || PL_parser->filtered)
11058 && PL_parser->form_lex_state == LEX_NORMAL) {
11060 PL_bufptr = PL_bufend;
11061 COPLINE_INC_WITH_HERELINES;
11062 got_some = lex_next_chunk(0);
11063 CopLINE_dec(PL_curcop);
11071 if (!SvCUR(stuff) || needargs)
11072 PL_lex_state = PL_parser->form_lex_state;
11073 if (SvCUR(stuff)) {
11074 PL_expect = XSTATE;
11076 const char *s2 = s;
11077 while (*s2 == '\r' || *s2 == ' ' || *s2 == '\t' || *s2 == '\f'
11081 PL_expect = XTERMBLOCK;
11082 NEXTVAL_NEXTTOKE.ival = 0;
11085 NEXTVAL_NEXTTOKE.ival = 0;
11086 force_next(FORMLBRACK);
11089 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
11092 NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff);
11096 SvREFCNT_dec(stuff);
11098 PL_lex_formbrack = 0;
11104 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
11106 const I32 oldsavestack_ix = PL_savestack_ix;
11107 CV* const outsidecv = PL_compcv;
11109 SAVEI32(PL_subline);
11110 save_item(PL_subname);
11111 SAVESPTR(PL_compcv);
11113 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
11114 CvFLAGS(PL_compcv) |= flags;
11116 PL_subline = CopLINE(PL_curcop);
11117 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
11118 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
11119 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
11120 if (outsidecv && CvPADLIST(outsidecv))
11121 CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
11123 return oldsavestack_ix;
11127 S_yywarn(pTHX_ const char *const s, U32 flags)
11129 PERL_ARGS_ASSERT_YYWARN;
11131 PL_in_eval |= EVAL_WARNONLY;
11132 yyerror_pv(s, flags);
11137 Perl_yyerror(pTHX_ const char *const s)
11139 PERL_ARGS_ASSERT_YYERROR;
11140 return yyerror_pvn(s, strlen(s), 0);
11144 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11146 PERL_ARGS_ASSERT_YYERROR_PV;
11147 return yyerror_pvn(s, strlen(s), flags);
11151 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11153 const char *context = NULL;
11156 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
11157 int yychar = PL_parser->yychar;
11159 PERL_ARGS_ASSERT_YYERROR_PVN;
11161 if (!yychar || (yychar == ';' && !PL_rsfp))
11162 sv_catpvs(where_sv, "at EOF");
11163 else if ( PL_oldoldbufptr
11164 && PL_bufptr > PL_oldoldbufptr
11165 && PL_bufptr - PL_oldoldbufptr < 200
11166 && PL_oldoldbufptr != PL_oldbufptr
11167 && PL_oldbufptr != PL_bufptr)
11171 The code below is removed for NetWare because it abends/crashes on NetWare
11172 when the script has error such as not having the closing quotes like:
11173 if ($var eq "value)
11174 Checking of white spaces is anyway done in NetWare code.
11177 while (isSPACE(*PL_oldoldbufptr))
11180 context = PL_oldoldbufptr;
11181 contlen = PL_bufptr - PL_oldoldbufptr;
11183 else if ( PL_oldbufptr
11184 && PL_bufptr > PL_oldbufptr
11185 && PL_bufptr - PL_oldbufptr < 200
11186 && PL_oldbufptr != PL_bufptr) {
11189 The code below is removed for NetWare because it abends/crashes on NetWare
11190 when the script has error such as not having the closing quotes like:
11191 if ($var eq "value)
11192 Checking of white spaces is anyway done in NetWare code.
11195 while (isSPACE(*PL_oldbufptr))
11198 context = PL_oldbufptr;
11199 contlen = PL_bufptr - PL_oldbufptr;
11201 else if (yychar > 255)
11202 sv_catpvs(where_sv, "next token ???");
11203 else if (yychar == YYEMPTY) {
11204 if (PL_lex_state == LEX_NORMAL)
11205 sv_catpvs(where_sv, "at end of line");
11206 else if (PL_lex_inpat)
11207 sv_catpvs(where_sv, "within pattern");
11209 sv_catpvs(where_sv, "within string");
11212 sv_catpvs(where_sv, "next char ");
11214 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11215 else if (isPRINT_LC(yychar)) {
11216 const char string = yychar;
11217 sv_catpvn(where_sv, &string, 1);
11220 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11222 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
11223 Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ",
11224 OutCopFILE(PL_curcop),
11225 (IV)(PL_parser->preambling == NOLINE
11226 ? CopLINE(PL_curcop)
11227 : PL_parser->preambling));
11229 Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
11230 UTF8fARG(UTF, contlen, context));
11232 Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
11233 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11234 Perl_sv_catpvf(aTHX_ msg,
11235 " (Might be a runaway multi-line %c%c string starting on line %" IVdf ")\n",
11236 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11239 if (PL_in_eval & EVAL_WARNONLY) {
11240 PL_in_eval &= ~EVAL_WARNONLY;
11241 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
11245 if (PL_error_count >= 10) {
11247 if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
11248 Perl_croak(aTHX_ "%" SVf "%s has too many errors.\n",
11249 SVfARG(errsv), OutCopFILE(PL_curcop));
11251 Perl_croak(aTHX_ "%s has too many errors.\n",
11252 OutCopFILE(PL_curcop));
11255 PL_in_my_stash = NULL;
11260 S_swallow_bom(pTHX_ U8 *s)
11262 const STRLEN slen = SvCUR(PL_linestr);
11264 PERL_ARGS_ASSERT_SWALLOW_BOM;
11268 if (s[1] == 0xFE) {
11269 /* UTF-16 little-endian? (or UTF-32LE?) */
11270 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
11271 /* diag_listed_as: Unsupported script encoding %s */
11272 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11273 #ifndef PERL_NO_UTF16_FILTER
11274 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11276 if (PL_bufend > (char*)s) {
11277 s = add_utf16_textfilter(s, TRUE);
11280 /* diag_listed_as: Unsupported script encoding %s */
11281 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11286 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
11287 #ifndef PERL_NO_UTF16_FILTER
11288 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11290 if (PL_bufend > (char *)s) {
11291 s = add_utf16_textfilter(s, FALSE);
11294 /* diag_listed_as: Unsupported script encoding %s */
11295 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11299 case BOM_UTF8_FIRST_BYTE: {
11300 const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
11301 if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
11302 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11303 s += len + 1; /* UTF-8 */
11310 if (s[2] == 0xFE && s[3] == 0xFF) {
11311 /* UTF-32 big-endian */
11312 /* diag_listed_as: Unsupported script encoding %s */
11313 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11316 else if (s[2] == 0 && s[3] != 0) {
11319 * are a good indicator of UTF-16BE. */
11320 #ifndef PERL_NO_UTF16_FILTER
11321 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11322 s = add_utf16_textfilter(s, FALSE);
11324 /* diag_listed_as: Unsupported script encoding %s */
11325 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11332 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11335 * are a good indicator of UTF-16LE. */
11336 #ifndef PERL_NO_UTF16_FILTER
11337 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11338 s = add_utf16_textfilter(s, TRUE);
11340 /* diag_listed_as: Unsupported script encoding %s */
11341 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11349 #ifndef PERL_NO_UTF16_FILTER
11351 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11353 SV *const filter = FILTER_DATA(idx);
11354 /* We re-use this each time round, throwing the contents away before we
11356 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11357 SV *const utf8_buffer = filter;
11358 IV status = IoPAGE(filter);
11359 const bool reverse = cBOOL(IoLINES(filter));
11362 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11364 /* As we're automatically added, at the lowest level, and hence only called
11365 from this file, we can be sure that we're not called in block mode. Hence
11366 don't bother writing code to deal with block mode. */
11368 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11371 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
11373 DEBUG_P(PerlIO_printf(Perl_debug_log,
11374 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
11375 FPTR2DPTR(void *, S_utf16_textfilter),
11376 reverse ? 'l' : 'b', idx, maxlen, status,
11377 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11384 /* First, look in our buffer of existing UTF-8 data: */
11385 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11389 } else if (status == 0) {
11391 IoPAGE(filter) = 0;
11392 nl = SvEND(utf8_buffer);
11395 STRLEN got = nl - SvPVX(utf8_buffer);
11396 /* Did we have anything to append? */
11398 sv_catpvn(sv, SvPVX(utf8_buffer), got);
11399 /* Everything else in this code works just fine if SVp_POK isn't
11400 set. This, however, needs it, and we need it to work, else
11401 we loop infinitely because the buffer is never consumed. */
11402 sv_chop(utf8_buffer, nl);
11406 /* OK, not a complete line there, so need to read some more UTF-16.
11407 Read an extra octect if the buffer currently has an odd number. */
11411 if (SvCUR(utf16_buffer) >= 2) {
11412 /* Location of the high octet of the last complete code point.
11413 Gosh, UTF-16 is a pain. All the benefits of variable length,
11414 *coupled* with all the benefits of partial reads and
11416 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11417 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11419 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11423 /* We have the first half of a surrogate. Read more. */
11424 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11427 status = FILTER_READ(idx + 1, utf16_buffer,
11428 160 + (SvCUR(utf16_buffer) & 1));
11429 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
11430 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11433 IoPAGE(filter) = status;
11438 chars = SvCUR(utf16_buffer) >> 1;
11439 have = SvCUR(utf8_buffer);
11440 SvGROW(utf8_buffer, have + chars * 3 + 1);
11443 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11444 (U8*)SvPVX_const(utf8_buffer) + have,
11445 chars * 2, &newlen);
11447 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11448 (U8*)SvPVX_const(utf8_buffer) + have,
11449 chars * 2, &newlen);
11451 SvCUR_set(utf8_buffer, have + newlen);
11454 /* No need to keep this SV "well-formed" with a '\0' after the end, as
11455 it's private to us, and utf16_to_utf8{,reversed} take a
11456 (pointer,length) pair, rather than a NUL-terminated string. */
11457 if(SvCUR(utf16_buffer) & 1) {
11458 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11459 SvCUR_set(utf16_buffer, 1);
11461 SvCUR_set(utf16_buffer, 0);
11464 DEBUG_P(PerlIO_printf(Perl_debug_log,
11465 "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
11467 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11468 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11473 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11475 SV *filter = filter_add(S_utf16_textfilter, NULL);
11477 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11479 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11481 IoLINES(filter) = reversed;
11482 IoPAGE(filter) = 1; /* Not EOF */
11484 /* Sadly, we have to return a valid pointer, come what may, so we have to
11485 ignore any error return from this. */
11486 SvCUR_set(PL_linestr, 0);
11487 if (FILTER_READ(0, PL_linestr, 0)) {
11488 SvUTF8_on(PL_linestr);
11490 SvUTF8_on(PL_linestr);
11492 PL_bufend = SvEND(PL_linestr);
11493 return (U8*)SvPVX(PL_linestr);
11498 Returns a pointer to the next character after the parsed
11499 vstring, as well as updating the passed in sv.
11501 Function must be called like
11503 sv = sv_2mortal(newSV(5));
11504 s = scan_vstring(s,e,sv);
11506 where s and e are the start and end of the string.
11507 The sv should already be large enough to store the vstring
11508 passed in, for performance reasons.
11510 This function may croak if fatal warnings are enabled in the
11511 calling scope, hence the sv_2mortal in the example (to prevent
11512 a leak). Make sure to do SvREFCNT_inc afterwards if you use
11518 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11520 const char *pos = s;
11521 const char *start = s;
11523 PERL_ARGS_ASSERT_SCAN_VSTRING;
11525 if (*pos == 'v') pos++; /* get past 'v' */
11526 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11528 if ( *pos != '.') {
11529 /* this may not be a v-string if followed by => */
11530 const char *next = pos;
11531 while (next < e && isSPACE(*next))
11533 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11534 /* return string not v-string */
11535 sv_setpvn(sv,(char *)s,pos-s);
11536 return (char *)pos;
11540 if (!isALPHA(*pos)) {
11541 U8 tmpbuf[UTF8_MAXBYTES+1];
11544 s++; /* get past 'v' */
11549 /* this is atoi() that tolerates underscores */
11552 const char *end = pos;
11554 while (--end >= s) {
11556 const UV orev = rev;
11557 rev += (*end - '0') * mult;
11560 /* diag_listed_as: Integer overflow in %s number */
11561 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11562 "Integer overflow in decimal number");
11566 /* Append native character for the rev point */
11567 tmpend = uvchr_to_utf8(tmpbuf, rev);
11568 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11569 if (!UVCHR_IS_INVARIANT(rev))
11571 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11577 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11581 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11588 Perl_keyword_plugin_standard(pTHX_
11589 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11591 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11592 PERL_UNUSED_CONTEXT;
11593 PERL_UNUSED_ARG(keyword_ptr);
11594 PERL_UNUSED_ARG(keyword_len);
11595 PERL_UNUSED_ARG(op_ptr);
11596 return KEYWORD_PLUGIN_DECLINE;
11599 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11601 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11603 SAVEI32(PL_lex_brackets);
11604 if (PL_lex_brackets > 100)
11605 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11606 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11607 SAVEI32(PL_lex_allbrackets);
11608 PL_lex_allbrackets = 0;
11609 SAVEI8(PL_lex_fakeeof);
11610 PL_lex_fakeeof = (U8)fakeeof;
11611 if(yyparse(gramtype) && !PL_parser->error_count)
11612 qerror(Perl_mess(aTHX_ "Parse error"));
11615 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11617 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11621 SAVEVPTR(PL_eval_root);
11622 PL_eval_root = NULL;
11623 parse_recdescent(gramtype, fakeeof);
11629 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11631 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11634 if (flags & ~PARSE_OPTIONAL)
11635 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11636 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11637 if (!exprop && !(flags & PARSE_OPTIONAL)) {
11638 if (!PL_parser->error_count)
11639 qerror(Perl_mess(aTHX_ "Parse error"));
11640 exprop = newOP(OP_NULL, 0);
11646 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11648 Parse a Perl arithmetic expression. This may contain operators of precedence
11649 down to the bit shift operators. The expression must be followed (and thus
11650 terminated) either by a comparison or lower-precedence operator or by
11651 something that would normally terminate an expression such as semicolon.
11652 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
11653 otherwise it is mandatory. It is up to the caller to ensure that the
11654 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11655 the source of the code to be parsed and the lexical context for the
11658 The op tree representing the expression is returned. If an optional
11659 expression is absent, a null pointer is returned, otherwise the pointer
11662 If an error occurs in parsing or compilation, in most cases a valid op
11663 tree is returned anyway. The error is reflected in the parser state,
11664 normally resulting in a single exception at the top level of parsing
11665 which covers all the compilation errors that occurred. Some compilation
11666 errors, however, will throw an exception immediately.
11672 Perl_parse_arithexpr(pTHX_ U32 flags)
11674 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11678 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11680 Parse a Perl term expression. This may contain operators of precedence
11681 down to the assignment operators. The expression must be followed (and thus
11682 terminated) either by a comma or lower-precedence operator or by
11683 something that would normally terminate an expression such as semicolon.
11684 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
11685 otherwise it is mandatory. It is up to the caller to ensure that the
11686 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11687 the source of the code to be parsed and the lexical context for the
11690 The op tree representing the expression is returned. If an optional
11691 expression is absent, a null pointer is returned, otherwise the pointer
11694 If an error occurs in parsing or compilation, in most cases a valid op
11695 tree is returned anyway. The error is reflected in the parser state,
11696 normally resulting in a single exception at the top level of parsing
11697 which covers all the compilation errors that occurred. Some compilation
11698 errors, however, will throw an exception immediately.
11704 Perl_parse_termexpr(pTHX_ U32 flags)
11706 return parse_expr(LEX_FAKEEOF_COMMA, flags);
11710 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11712 Parse a Perl list expression. This may contain operators of precedence
11713 down to the comma operator. The expression must be followed (and thus
11714 terminated) either by a low-precedence logic operator such as C<or> or by
11715 something that would normally terminate an expression such as semicolon.
11716 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
11717 otherwise it is mandatory. It is up to the caller to ensure that the
11718 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11719 the source of the code to be parsed and the lexical context for the
11722 The op tree representing the expression is returned. If an optional
11723 expression is absent, a null pointer is returned, otherwise the pointer
11726 If an error occurs in parsing or compilation, in most cases a valid op
11727 tree is returned anyway. The error is reflected in the parser state,
11728 normally resulting in a single exception at the top level of parsing
11729 which covers all the compilation errors that occurred. Some compilation
11730 errors, however, will throw an exception immediately.
11736 Perl_parse_listexpr(pTHX_ U32 flags)
11738 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11742 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
11744 Parse a single complete Perl expression. This allows the full
11745 expression grammar, including the lowest-precedence operators such
11746 as C<or>. The expression must be followed (and thus terminated) by a
11747 token that an expression would normally be terminated by: end-of-file,
11748 closing bracketing punctuation, semicolon, or one of the keywords that
11749 signals a postfix expression-statement modifier. If C<flags> has the
11750 C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
11751 mandatory. It is up to the caller to ensure that the dynamic parser
11752 state (L</PL_parser> et al) is correctly set to reflect the source of
11753 the code to be parsed and the lexical context for the expression.
11755 The op tree representing the expression is returned. If an optional
11756 expression is absent, a null pointer is returned, otherwise the pointer
11759 If an error occurs in parsing or compilation, in most cases a valid op
11760 tree is returned anyway. The error is reflected in the parser state,
11761 normally resulting in a single exception at the top level of parsing
11762 which covers all the compilation errors that occurred. Some compilation
11763 errors, however, will throw an exception immediately.
11769 Perl_parse_fullexpr(pTHX_ U32 flags)
11771 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11775 =for apidoc Amx|OP *|parse_block|U32 flags
11777 Parse a single complete Perl code block. This consists of an opening
11778 brace, a sequence of statements, and a closing brace. The block
11779 constitutes a lexical scope, so C<my> variables and various compile-time
11780 effects can be contained within it. It is up to the caller to ensure
11781 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11782 reflect the source of the code to be parsed and the lexical context for
11785 The op tree representing the code block is returned. This is always a
11786 real op, never a null pointer. It will normally be a C<lineseq> list,
11787 including C<nextstate> or equivalent ops. No ops to construct any kind
11788 of runtime scope are included by virtue of it being a block.
11790 If an error occurs in parsing or compilation, in most cases a valid op
11791 tree (most likely null) is returned anyway. The error is reflected in
11792 the parser state, normally resulting in a single exception at the top
11793 level of parsing which covers all the compilation errors that occurred.
11794 Some compilation errors, however, will throw an exception immediately.
11796 The C<flags> parameter is reserved for future use, and must always
11803 Perl_parse_block(pTHX_ U32 flags)
11806 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11807 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11811 =for apidoc Amx|OP *|parse_barestmt|U32 flags
11813 Parse a single unadorned Perl statement. This may be a normal imperative
11814 statement or a declaration that has compile-time effect. It does not
11815 include any label or other affixture. It is up to the caller to ensure
11816 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11817 reflect the source of the code to be parsed and the lexical context for
11820 The op tree representing the statement is returned. This may be a
11821 null pointer if the statement is null, for example if it was actually
11822 a subroutine definition (which has compile-time side effects). If not
11823 null, it will be ops directly implementing the statement, suitable to
11824 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
11825 equivalent op (except for those embedded in a scope contained entirely
11826 within the statement).
11828 If an error occurs in parsing or compilation, in most cases a valid op
11829 tree (most likely null) is returned anyway. The error is reflected in
11830 the parser state, normally resulting in a single exception at the top
11831 level of parsing which covers all the compilation errors that occurred.
11832 Some compilation errors, however, will throw an exception immediately.
11834 The C<flags> parameter is reserved for future use, and must always
11841 Perl_parse_barestmt(pTHX_ U32 flags)
11844 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11845 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11849 =for apidoc Amx|SV *|parse_label|U32 flags
11851 Parse a single label, possibly optional, of the type that may prefix a
11852 Perl statement. It is up to the caller to ensure that the dynamic parser
11853 state (L</PL_parser> et al) is correctly set to reflect the source of
11854 the code to be parsed. If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
11855 label is optional, otherwise it is mandatory.
11857 The name of the label is returned in the form of a fresh scalar. If an
11858 optional label is absent, a null pointer is returned.
11860 If an error occurs in parsing, which can only occur if the label is
11861 mandatory, a valid label is returned anyway. The error is reflected in
11862 the parser state, normally resulting in a single exception at the top
11863 level of parsing which covers all the compilation errors that occurred.
11869 Perl_parse_label(pTHX_ U32 flags)
11871 if (flags & ~PARSE_OPTIONAL)
11872 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11874 PL_parser->yychar = yylex();
11875 if (PL_parser->yychar == LABEL) {
11876 char * const lpv = pl_yylval.pval;
11877 STRLEN llen = strlen(lpv);
11878 PL_parser->yychar = YYEMPTY;
11879 return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
11886 STRLEN wlen, bufptr_pos;
11889 if (!isIDFIRST_lazy_if(s, UTF))
11891 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
11892 if (word_takes_any_delimiter(s, wlen))
11894 bufptr_pos = s - SvPVX(PL_linestr);
11896 lex_read_space(LEX_KEEP_PREVIOUS);
11898 s = SvPVX(PL_linestr) + bufptr_pos;
11899 if (t[0] == ':' && t[1] != ':') {
11900 PL_oldoldbufptr = PL_oldbufptr;
11903 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
11907 if (flags & PARSE_OPTIONAL) {
11910 qerror(Perl_mess(aTHX_ "Parse error"));
11911 return newSVpvs("x");
11918 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
11920 Parse a single complete Perl statement. This may be a normal imperative
11921 statement or a declaration that has compile-time effect, and may include
11922 optional labels. It is up to the caller to ensure that the dynamic
11923 parser state (L</PL_parser> et al) is correctly set to reflect the source
11924 of the code to be parsed and the lexical context for the statement.
11926 The op tree representing the statement is returned. This may be a
11927 null pointer if the statement is null, for example if it was actually
11928 a subroutine definition (which has compile-time side effects). If not
11929 null, it will be the result of a L</newSTATEOP> call, normally including
11930 a C<nextstate> or equivalent op.
11932 If an error occurs in parsing or compilation, in most cases a valid op
11933 tree (most likely null) is returned anyway. The error is reflected in
11934 the parser state, normally resulting in a single exception at the top
11935 level of parsing which covers all the compilation errors that occurred.
11936 Some compilation errors, however, will throw an exception immediately.
11938 The C<flags> parameter is reserved for future use, and must always
11945 Perl_parse_fullstmt(pTHX_ U32 flags)
11948 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11949 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11953 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
11955 Parse a sequence of zero or more Perl statements. These may be normal
11956 imperative statements, including optional labels, or declarations
11957 that have compile-time effect, or any mixture thereof. The statement
11958 sequence ends when a closing brace or end-of-file is encountered in a
11959 place where a new statement could have validly started. It is up to
11960 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11961 is correctly set to reflect the source of the code to be parsed and the
11962 lexical context for the statements.
11964 The op tree representing the statement sequence is returned. This may
11965 be a null pointer if the statements were all null, for example if there
11966 were no statements or if there were only subroutine definitions (which
11967 have compile-time side effects). If not null, it will be a C<lineseq>
11968 list, normally including C<nextstate> or equivalent ops.
11970 If an error occurs in parsing or compilation, in most cases a valid op
11971 tree is returned anyway. The error is reflected in the parser state,
11972 normally resulting in a single exception at the top level of parsing
11973 which covers all the compilation errors that occurred. Some compilation
11974 errors, however, will throw an exception immediately.
11976 The C<flags> parameter is reserved for future use, and must always
11983 Perl_parse_stmtseq(pTHX_ U32 flags)
11988 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11989 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
11990 c = lex_peek_unichar(0);
11991 if (c != -1 && c != /*{*/'}')
11992 qerror(Perl_mess(aTHX_ "Parse error"));
11997 * ex: set ts=8 sts=4 sw=4 et: