3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 =head1 Lexer interface
26 This is the lower layer of the Perl parser, managing characters and tokens.
28 =for apidoc AmU|yy_parser *|PL_parser
30 Pointer to a structure encapsulating the state of the parsing operation
31 currently in progress. The pointer can be locally changed to perform
32 a nested parse without interfering with the state of an outer parse.
33 Individual members of C<PL_parser> have their own documentation.
39 #define PERL_IN_TOKE_C
41 #include "dquote_inline.h"
43 #define new_constant(a,b,c,d,e,f,g) \
44 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
46 #define pl_yylval (PL_parser->yylval)
48 /* XXX temporary backwards compatibility */
49 #define PL_lex_brackets (PL_parser->lex_brackets)
50 #define PL_lex_allbrackets (PL_parser->lex_allbrackets)
51 #define PL_lex_fakeeof (PL_parser->lex_fakeeof)
52 #define PL_lex_brackstack (PL_parser->lex_brackstack)
53 #define PL_lex_casemods (PL_parser->lex_casemods)
54 #define PL_lex_casestack (PL_parser->lex_casestack)
55 #define PL_lex_dojoin (PL_parser->lex_dojoin)
56 #define PL_lex_formbrack (PL_parser->lex_formbrack)
57 #define PL_lex_inpat (PL_parser->lex_inpat)
58 #define PL_lex_inwhat (PL_parser->lex_inwhat)
59 #define PL_lex_op (PL_parser->lex_op)
60 #define PL_lex_repl (PL_parser->lex_repl)
61 #define PL_lex_starts (PL_parser->lex_starts)
62 #define PL_lex_stuff (PL_parser->lex_stuff)
63 #define PL_multi_start (PL_parser->multi_start)
64 #define PL_multi_open (PL_parser->multi_open)
65 #define PL_multi_close (PL_parser->multi_close)
66 #define PL_preambled (PL_parser->preambled)
67 #define PL_linestr (PL_parser->linestr)
68 #define PL_expect (PL_parser->expect)
69 #define PL_copline (PL_parser->copline)
70 #define PL_bufptr (PL_parser->bufptr)
71 #define PL_oldbufptr (PL_parser->oldbufptr)
72 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
73 #define PL_linestart (PL_parser->linestart)
74 #define PL_bufend (PL_parser->bufend)
75 #define PL_last_uni (PL_parser->last_uni)
76 #define PL_last_lop (PL_parser->last_lop)
77 #define PL_last_lop_op (PL_parser->last_lop_op)
78 #define PL_lex_state (PL_parser->lex_state)
79 #define PL_rsfp (PL_parser->rsfp)
80 #define PL_rsfp_filters (PL_parser->rsfp_filters)
81 #define PL_in_my (PL_parser->in_my)
82 #define PL_in_my_stash (PL_parser->in_my_stash)
83 #define PL_tokenbuf (PL_parser->tokenbuf)
84 #define PL_multi_end (PL_parser->multi_end)
85 #define PL_error_count (PL_parser->error_count)
87 # define PL_nexttoke (PL_parser->nexttoke)
88 # define PL_nexttype (PL_parser->nexttype)
89 # define PL_nextval (PL_parser->nextval)
91 static const char* const ident_too_long = "Identifier too long";
93 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
95 #define XENUMMASK 0x3f
97 #define XFAKEBRACK 0x80
99 #ifdef USE_UTF8_SCRIPTS
100 # define UTF cBOOL(!IN_BYTES)
102 # define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
105 /* The maximum number of characters preceding the unrecognized one to display */
106 #define UNRECOGNIZED_PRECEDE_COUNT 10
108 /* In variables named $^X, these are the legal values for X.
109 * 1999-02-27 mjd-perl-patch@plover.com */
110 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
112 #define SPACE_OR_TAB(c) isBLANK_A(c)
114 #define HEXFP_PEEK(s) \
116 (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
117 isALPHA_FOLD_EQ(s[0], 'p'))
119 /* LEX_* are values for PL_lex_state, the state of the lexer.
120 * They are arranged oddly so that the guard on the switch statement
121 * can get by with a single comparison (if the compiler is smart enough).
123 * These values refer to the various states within a sublex parse,
124 * i.e. within a double quotish string
127 /* #define LEX_NOTPARSING 11 is done in perl.h. */
129 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
130 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
131 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
132 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
133 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
135 /* at end of code, eg "$x" followed by: */
136 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
137 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
139 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
140 string or after \E, $foo, etc */
141 #define LEX_INTERPCONST 2 /* NOT USED */
142 #define LEX_FORMLINE 1 /* expecting a format line */
146 static const char* const lex_state_names[] = {
161 #include "keywords.h"
163 /* CLINE is a macro that ensures PL_copline has a sane value */
165 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
168 * Convenience functions to return different tokens and prime the
169 * lexer for the next token. They all take an argument.
171 * TOKEN : generic token (used for '(', DOLSHARP, etc)
172 * OPERATOR : generic operator
173 * AOPERATOR : assignment operator
174 * PREBLOCK : beginning the block after an if, while, foreach, ...
175 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
176 * PREREF : *EXPR where EXPR is not a simple identifier
177 * TERM : expression term
178 * POSTDEREF : postfix dereference (->$* ->@[...] etc.)
179 * LOOPX : loop exiting command (goto, last, dump, etc)
180 * FTST : file test operator
181 * FUN0 : zero-argument function
182 * FUN0OP : zero-argument function, with its op created in this file
183 * FUN1 : not used, except for not, which isn't a UNIOP
184 * BOop : bitwise or or xor
186 * BCop : bitwise complement
187 * SHop : shift operator
188 * PWop : power operator
189 * PMop : pattern-matching operator
190 * Aop : addition-level operator
191 * AopNOASSIGN : addition-level operator that is never part of .=
192 * Mop : multiplication-level operator
193 * Eop : equality-testing operator
194 * Rop : relational operator <= != gt
196 * Also see LOP and lop() below.
199 #ifdef DEBUGGING /* Serve -DT. */
200 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
202 # define REPORT(retval) (retval)
205 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
206 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
207 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
208 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
209 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
210 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
211 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
212 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
213 #define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \
215 PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
217 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
218 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
219 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
220 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
221 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
222 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
223 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
225 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
226 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
227 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
228 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
229 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
230 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
231 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
232 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
234 /* This bit of chicanery makes a unary function followed by
235 * a parenthesis into a function with one argument, highest precedence.
236 * The UNIDOR macro is for unary functions that can be followed by the //
237 * operator (such as C<shift // 0>).
239 #define UNI3(f,x,have_x) { \
240 pl_yylval.ival = f; \
241 if (have_x) PL_expect = x; \
243 PL_last_uni = PL_oldbufptr; \
244 PL_last_lop_op = f; \
246 return REPORT( (int)FUNC1 ); \
248 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
250 #define UNI(f) UNI3(f,XTERM,1)
251 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
252 #define UNIPROTO(f,optional) { \
253 if (optional) PL_last_uni = PL_oldbufptr; \
257 #define UNIBRACK(f) UNI3(f,0,0)
259 /* grandfather return to old style */
262 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
263 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
264 pl_yylval.ival = (f); \
270 #define COPLINE_INC_WITH_HERELINES \
272 CopLINE_inc(PL_curcop); \
273 if (PL_parser->herelines) \
274 CopLINE(PL_curcop) += PL_parser->herelines, \
275 PL_parser->herelines = 0; \
277 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
278 * is no sublex_push to follow. */
279 #define COPLINE_SET_FROM_MULTI_END \
281 CopLINE_set(PL_curcop, PL_multi_end); \
282 if (PL_multi_end != PL_multi_start) \
283 PL_parser->herelines = 0; \
289 /* how to interpret the pl_yylval associated with the token */
293 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
298 static struct debug_tokens {
300 enum token_type type;
302 } const debug_tokens[] =
304 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
305 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
306 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
307 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
308 { ARROW, TOKENTYPE_NONE, "ARROW" },
309 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
310 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
311 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
312 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
313 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
314 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
315 { DO, TOKENTYPE_NONE, "DO" },
316 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
317 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
318 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
319 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
320 { ELSE, TOKENTYPE_NONE, "ELSE" },
321 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
322 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
323 { FOR, TOKENTYPE_IVAL, "FOR" },
324 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
325 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
326 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
327 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
328 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
329 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
330 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
331 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
332 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
333 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
334 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
335 { IF, TOKENTYPE_IVAL, "IF" },
336 { LABEL, TOKENTYPE_PVAL, "LABEL" },
337 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
338 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
339 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
340 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
341 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
342 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
343 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
344 { MY, TOKENTYPE_IVAL, "MY" },
345 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
346 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
347 { OROP, TOKENTYPE_IVAL, "OROP" },
348 { OROR, TOKENTYPE_NONE, "OROR" },
349 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
350 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
351 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
352 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
353 { POSTJOIN, TOKENTYPE_NONE, "POSTJOIN" },
354 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
355 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
356 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
357 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
358 { PREINC, TOKENTYPE_NONE, "PREINC" },
359 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
360 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
361 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
362 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
363 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
364 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
365 { SUB, TOKENTYPE_NONE, "SUB" },
366 { THING, TOKENTYPE_OPVAL, "THING" },
367 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
368 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
369 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
370 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
371 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
372 { USE, TOKENTYPE_IVAL, "USE" },
373 { WHEN, TOKENTYPE_IVAL, "WHEN" },
374 { WHILE, TOKENTYPE_IVAL, "WHILE" },
375 { BAREWORD, TOKENTYPE_OPVAL, "BAREWORD" },
376 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
377 { 0, TOKENTYPE_NONE, NULL }
380 /* dump the returned token in rv, plus any optional arg in pl_yylval */
383 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
385 PERL_ARGS_ASSERT_TOKEREPORT;
388 const char *name = NULL;
389 enum token_type type = TOKENTYPE_NONE;
390 const struct debug_tokens *p;
391 SV* const report = newSVpvs("<== ");
393 for (p = debug_tokens; p->token; p++) {
394 if (p->token == (int)rv) {
401 Perl_sv_catpv(aTHX_ report, name);
402 else if (isGRAPH(rv))
404 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
406 sv_catpvs(report, " (pending identifier)");
409 sv_catpvs(report, "EOF");
411 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
416 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
418 case TOKENTYPE_OPNUM:
419 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
420 PL_op_name[lvalp->ival]);
423 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
425 case TOKENTYPE_OPVAL:
427 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
428 PL_op_name[lvalp->opval->op_type]);
429 if (lvalp->opval->op_type == OP_CONST) {
430 Perl_sv_catpvf(aTHX_ report, " %s",
431 SvPEEK(cSVOPx_sv(lvalp->opval)));
436 sv_catpvs(report, "(opval=null)");
439 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
445 /* print the buffer with suitable escapes */
448 S_printbuf(pTHX_ const char *const fmt, const char *const s)
450 SV* const tmp = newSVpvs("");
452 PERL_ARGS_ASSERT_PRINTBUF;
454 GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */
455 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
463 S_deprecate_commaless_var_list(pTHX) {
465 deprecate("comma-less variable list");
466 return REPORT(','); /* grandfather non-comma-format format */
472 * This subroutine looks for an '=' next to the operator that has just been
473 * parsed and turns it into an ASSIGNOP if it finds one.
477 S_ao(pTHX_ int toketype)
479 if (*PL_bufptr == '=') {
481 if (toketype == ANDAND)
482 pl_yylval.ival = OP_ANDASSIGN;
483 else if (toketype == OROR)
484 pl_yylval.ival = OP_ORASSIGN;
485 else if (toketype == DORDOR)
486 pl_yylval.ival = OP_DORASSIGN;
489 return REPORT(toketype);
494 * When Perl expects an operator and finds something else, no_op
495 * prints the warning. It always prints "<something> found where
496 * operator expected. It prints "Missing semicolon on previous line?"
497 * if the surprise occurs at the start of the line. "do you need to
498 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
499 * where the compiler doesn't know if foo is a method call or a function.
500 * It prints "Missing operator before end of line" if there's nothing
501 * after the missing operator, or "... before <...>" if there is something
502 * after the missing operator.
504 * PL_bufptr is expected to point to the start of the thing that was found,
505 * and s after the next token or partial token.
509 S_no_op(pTHX_ const char *const what, char *s)
511 char * const oldbp = PL_bufptr;
512 const bool is_first = (PL_oldbufptr == PL_linestart);
514 PERL_ARGS_ASSERT_NO_OP;
520 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
521 if (ckWARN_d(WARN_SYNTAX)) {
523 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
524 "\t(Missing semicolon on previous line?)\n");
525 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
527 for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
528 t += UTF ? UTF8SKIP(t) : 1)
530 if (t < PL_bufptr && isSPACE(*t))
531 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
532 "\t(Do you need to predeclare %"UTF8f"?)\n",
533 UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
537 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
538 "\t(Missing operator before %"UTF8f"?)\n",
539 UTF8fARG(UTF, s - oldbp, oldbp));
547 * Complain about missing quote/regexp/heredoc terminator.
548 * If it's called with NULL then it cauterizes the line buffer.
549 * If we're in a delimited string and the delimiter is a control
550 * character, it's reformatted into a two-char sequence like ^C.
555 S_missingterm(pTHX_ char *s)
557 char tmpbuf[UTF8_MAXBYTES];
562 char * const nl = strrchr(s,'\n');
567 else if (PL_multi_close < 32) {
569 tmpbuf[1] = (char)toCTRL(PL_multi_close);
574 if (LIKELY(PL_multi_close < 256)) {
575 *tmpbuf = (char)PL_multi_close;
580 *uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close) = 0;
584 q = strchr(s,'"') ? '\'' : '"';
585 sv = sv_2mortal(newSVpv(s,0));
588 Perl_croak(aTHX_ "Can't find string terminator %c%"SVf
589 "%c anywhere before EOF",q,SVfARG(sv),q);
595 * Check whether the named feature is enabled.
598 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
600 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
602 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
604 assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
606 if (namelen > MAX_FEATURE_LEN)
608 memcpy(&he_name[8], name, namelen);
610 return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
611 REFCOUNTED_HE_EXISTS));
615 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
616 * utf16-to-utf8-reversed.
619 #ifdef PERL_CR_FILTER
623 const char *s = SvPVX_const(sv);
624 const char * const e = s + SvCUR(sv);
626 PERL_ARGS_ASSERT_STRIP_RETURN;
628 /* outer loop optimized to do nothing if there are no CR-LFs */
630 if (*s++ == '\r' && *s == '\n') {
631 /* hit a CR-LF, need to copy the rest */
635 if (*s == '\r' && s[1] == '\n')
646 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
648 const I32 count = FILTER_READ(idx+1, sv, maxlen);
649 if (count > 0 && !maxlen)
656 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
658 Creates and initialises a new lexer/parser state object, supplying
659 a context in which to lex and parse from a new source of Perl code.
660 A pointer to the new state object is placed in L</PL_parser>. An entry
661 is made on the save stack so that upon unwinding the new state object
662 will be destroyed and the former value of L</PL_parser> will be restored.
663 Nothing else need be done to clean up the parsing context.
665 The code to be parsed comes from C<line> and C<rsfp>. C<line>, if
666 non-null, provides a string (in SV form) containing code to be parsed.
667 A copy of the string is made, so subsequent modification of C<line>
668 does not affect parsing. C<rsfp>, if non-null, provides an input stream
669 from which code will be read to be parsed. If both are non-null, the
670 code in C<line> comes first and must consist of complete lines of input,
671 and C<rsfp> supplies the remainder of the source.
673 The C<flags> parameter is reserved for future use. Currently it is only
674 used by perl internally, so extensions should always pass zero.
679 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
680 can share filters with the current parser.
681 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
682 caller, hence isn't owned by the parser, so shouldn't be closed on parser
683 destruction. This is used to handle the case of defaulting to reading the
684 script from the standard input because no filename was given on the command
685 line (without getting confused by situation where STDIN has been closed, so
686 the script handle is opened on fd 0) */
689 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
691 const char *s = NULL;
692 yy_parser *parser, *oparser;
693 if (flags && flags & ~LEX_START_FLAGS)
694 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
696 /* create and initialise a parser */
698 Newxz(parser, 1, yy_parser);
699 parser->old_parser = oparser = PL_parser;
702 parser->stack = NULL;
704 parser->stack_size = 0;
706 /* on scope exit, free this parser and restore any outer one */
708 parser->saved_curcop = PL_curcop;
710 /* initialise lexer state */
712 parser->nexttoke = 0;
713 parser->error_count = oparser ? oparser->error_count : 0;
714 parser->copline = parser->preambling = NOLINE;
715 parser->lex_state = LEX_NORMAL;
716 parser->expect = XSTATE;
718 parser->rsfp_filters =
719 !(flags & LEX_START_SAME_FILTER) || !oparser
721 : MUTABLE_AV(SvREFCNT_inc(
722 oparser->rsfp_filters
723 ? oparser->rsfp_filters
724 : (oparser->rsfp_filters = newAV())
727 Newx(parser->lex_brackstack, 120, char);
728 Newx(parser->lex_casestack, 12, char);
729 *parser->lex_casestack = '\0';
730 Newxz(parser->lex_shared, 1, LEXSHARED);
734 s = SvPV_const(line, len);
735 parser->linestr = flags & LEX_START_COPIED
736 ? SvREFCNT_inc_simple_NN(line)
737 : newSVpvn_flags(s, len, SvUTF8(line));
739 sv_catpvs(parser->linestr, "\n;");
741 parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
743 parser->oldoldbufptr =
746 parser->linestart = SvPVX(parser->linestr);
747 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
748 parser->last_lop = parser->last_uni = NULL;
750 STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
751 |LEX_DONT_CLOSE_RSFP));
752 parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
753 |LEX_DONT_CLOSE_RSFP));
755 parser->in_pod = parser->filtered = 0;
759 /* delete a parser object */
762 Perl_parser_free(pTHX_ const yy_parser *parser)
764 PERL_ARGS_ASSERT_PARSER_FREE;
766 PL_curcop = parser->saved_curcop;
767 SvREFCNT_dec(parser->linestr);
769 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
770 PerlIO_clearerr(parser->rsfp);
771 else if (parser->rsfp && (!parser->old_parser
772 || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
773 PerlIO_close(parser->rsfp);
774 SvREFCNT_dec(parser->rsfp_filters);
775 SvREFCNT_dec(parser->lex_stuff);
776 SvREFCNT_dec(parser->lex_sub_repl);
778 Safefree(parser->lex_brackstack);
779 Safefree(parser->lex_casestack);
780 Safefree(parser->lex_shared);
781 PL_parser = parser->old_parser;
786 Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab)
788 I32 nexttoke = parser->nexttoke;
789 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
791 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
792 && parser->nextval[nexttoke].opval
793 && parser->nextval[nexttoke].opval->op_slabbed
794 && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
795 op_free(parser->nextval[nexttoke].opval);
796 parser->nextval[nexttoke].opval = NULL;
803 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
805 Buffer scalar containing the chunk currently under consideration of the
806 text currently being lexed. This is always a plain string scalar (for
807 which C<SvPOK> is true). It is not intended to be used as a scalar by
808 normal scalar means; instead refer to the buffer directly by the pointer
809 variables described below.
811 The lexer maintains various C<char*> pointers to things in the
812 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
813 reallocated, all of these pointers must be updated. Don't attempt to
814 do this manually, but rather use L</lex_grow_linestr> if you need to
815 reallocate the buffer.
817 The content of the text chunk in the buffer is commonly exactly one
818 complete line of input, up to and including a newline terminator,
819 but there are situations where it is otherwise. The octets of the
820 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
821 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
822 flag on this scalar, which may disagree with it.
824 For direct examination of the buffer, the variable
825 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
826 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
827 of these pointers is usually preferable to examination of the scalar
828 through normal scalar means.
830 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
832 Direct pointer to the end of the chunk of text currently being lexed, the
833 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
834 + SvCUR(PL_parser-E<gt>linestr)>. A C<NUL> character (zero octet) is
835 always located at the end of the buffer, and does not count as part of
836 the buffer's contents.
838 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
840 Points to the current position of lexing inside the lexer buffer.
841 Characters around this point may be freely examined, within
842 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
843 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
844 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
846 Lexing code (whether in the Perl core or not) moves this pointer past
847 the characters that it consumes. It is also expected to perform some
848 bookkeeping whenever a newline character is consumed. This movement
849 can be more conveniently performed by the function L</lex_read_to>,
850 which handles newlines appropriately.
852 Interpretation of the buffer's octets can be abstracted out by
853 using the slightly higher-level functions L</lex_peek_unichar> and
854 L</lex_read_unichar>.
856 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
858 Points to the start of the current line inside the lexer buffer.
859 This is useful for indicating at which column an error occurred, and
860 not much else. This must be updated by any lexing code that consumes
861 a newline; the function L</lex_read_to> handles this detail.
867 =for apidoc Amx|bool|lex_bufutf8
869 Indicates whether the octets in the lexer buffer
870 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
871 of Unicode characters. If not, they should be interpreted as Latin-1
872 characters. This is analogous to the C<SvUTF8> flag for scalars.
874 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
875 contains valid UTF-8. Lexing code must be robust in the face of invalid
878 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
879 is significant, but not the whole story regarding the input character
880 encoding. Normally, when a file is being read, the scalar contains octets
881 and its C<SvUTF8> flag is off, but the octets should be interpreted as
882 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
883 however, the scalar may have the C<SvUTF8> flag on, and in this case its
884 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
885 is in effect. This logic may change in the future; use this function
886 instead of implementing the logic yourself.
892 Perl_lex_bufutf8(pTHX)
898 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
900 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
901 at least C<len> octets (including terminating C<NUL>). Returns a
902 pointer to the reallocated buffer. This is necessary before making
903 any direct modification of the buffer that would increase its length.
904 L</lex_stuff_pvn> provides a more convenient way to insert text into
907 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
908 this function updates all of the lexer's variables that point directly
915 Perl_lex_grow_linestr(pTHX_ STRLEN len)
919 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
920 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
921 linestr = PL_parser->linestr;
922 buf = SvPVX(linestr);
923 if (len <= SvLEN(linestr))
925 bufend_pos = PL_parser->bufend - buf;
926 bufptr_pos = PL_parser->bufptr - buf;
927 oldbufptr_pos = PL_parser->oldbufptr - buf;
928 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
929 linestart_pos = PL_parser->linestart - buf;
930 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
931 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
932 re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
933 PL_parser->lex_shared->re_eval_start - buf : 0;
935 buf = sv_grow(linestr, len);
937 PL_parser->bufend = buf + bufend_pos;
938 PL_parser->bufptr = buf + bufptr_pos;
939 PL_parser->oldbufptr = buf + oldbufptr_pos;
940 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
941 PL_parser->linestart = buf + linestart_pos;
942 if (PL_parser->last_uni)
943 PL_parser->last_uni = buf + last_uni_pos;
944 if (PL_parser->last_lop)
945 PL_parser->last_lop = buf + last_lop_pos;
946 if (PL_parser->lex_shared->re_eval_start)
947 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
952 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
954 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
955 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
956 reallocating the buffer if necessary. This means that lexing code that
957 runs later will see the characters as if they had appeared in the input.
958 It is not recommended to do this as part of normal parsing, and most
959 uses of this facility run the risk of the inserted characters being
960 interpreted in an unintended manner.
962 The string to be inserted is represented by C<len> octets starting
963 at C<pv>. These octets are interpreted as either UTF-8 or Latin-1,
964 according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
965 The characters are recoded for the lexer buffer, according to how the
966 buffer is currently being interpreted (L</lex_bufutf8>). If a string
967 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
968 function is more convenient.
974 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
978 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
979 if (flags & ~(LEX_STUFF_UTF8))
980 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
982 if (flags & LEX_STUFF_UTF8) {
985 STRLEN highhalf = 0; /* Count of variants */
986 const char *p, *e = pv+len;
987 for (p = pv; p != e; p++) {
988 if (! UTF8_IS_INVARIANT(*p)) {
994 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
995 bufptr = PL_parser->bufptr;
996 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
997 SvCUR_set(PL_parser->linestr,
998 SvCUR(PL_parser->linestr) + len+highhalf);
999 PL_parser->bufend += len+highhalf;
1000 for (p = pv; p != e; p++) {
1002 if (! UTF8_IS_INVARIANT(c)) {
1003 *bufptr++ = UTF8_TWO_BYTE_HI(c);
1004 *bufptr++ = UTF8_TWO_BYTE_LO(c);
1006 *bufptr++ = (char)c;
1011 if (flags & LEX_STUFF_UTF8) {
1012 STRLEN highhalf = 0;
1013 const char *p, *e = pv+len;
1014 for (p = pv; p != e; p++) {
1016 if (UTF8_IS_ABOVE_LATIN1(c)) {
1017 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1018 "non-Latin-1 character into Latin-1 input");
1019 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1022 } else if (! UTF8_IS_INVARIANT(c)) {
1023 /* malformed UTF-8 */
1025 SAVESPTR(PL_warnhook);
1026 PL_warnhook = PERL_WARNHOOK_FATAL;
1027 utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
1033 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1034 bufptr = PL_parser->bufptr;
1035 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1036 SvCUR_set(PL_parser->linestr,
1037 SvCUR(PL_parser->linestr) + len-highhalf);
1038 PL_parser->bufend += len-highhalf;
1041 if (UTF8_IS_INVARIANT(*p)) {
1047 *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
1053 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1054 bufptr = PL_parser->bufptr;
1055 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1056 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1057 PL_parser->bufend += len;
1058 Copy(pv, bufptr, len, char);
1064 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1066 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1067 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1068 reallocating the buffer if necessary. This means that lexing code that
1069 runs later will see the characters as if they had appeared in the input.
1070 It is not recommended to do this as part of normal parsing, and most
1071 uses of this facility run the risk of the inserted characters being
1072 interpreted in an unintended manner.
1074 The string to be inserted is represented by octets starting at C<pv>
1075 and continuing to the first nul. These octets are interpreted as either
1076 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1077 in C<flags>. The characters are recoded for the lexer buffer, according
1078 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1079 If it is not convenient to nul-terminate a string to be inserted, the
1080 L</lex_stuff_pvn> function is more appropriate.
1086 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1088 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1089 lex_stuff_pvn(pv, strlen(pv), flags);
1093 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1095 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1096 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1097 reallocating the buffer if necessary. This means that lexing code that
1098 runs later will see the characters as if they had appeared in the input.
1099 It is not recommended to do this as part of normal parsing, and most
1100 uses of this facility run the risk of the inserted characters being
1101 interpreted in an unintended manner.
1103 The string to be inserted is the string value of C<sv>. The characters
1104 are recoded for the lexer buffer, according to how the buffer is currently
1105 being interpreted (L</lex_bufutf8>). If a string to be inserted is
1106 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1107 need to construct a scalar.
1113 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1117 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1119 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1121 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1125 =for apidoc Amx|void|lex_unstuff|char *ptr
1127 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1128 C<ptr>. Text following C<ptr> will be moved, and the buffer shortened.
1129 This hides the discarded text from any lexing code that runs later,
1130 as if the text had never appeared.
1132 This is not the normal way to consume lexed text. For that, use
1139 Perl_lex_unstuff(pTHX_ char *ptr)
1143 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1144 buf = PL_parser->bufptr;
1146 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1149 bufend = PL_parser->bufend;
1151 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1152 unstuff_len = ptr - buf;
1153 Move(ptr, buf, bufend+1-ptr, char);
1154 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1155 PL_parser->bufend = bufend - unstuff_len;
1159 =for apidoc Amx|void|lex_read_to|char *ptr
1161 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1162 to C<ptr>. This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
1163 performing the correct bookkeeping whenever a newline character is passed.
1164 This is the normal way to consume lexed text.
1166 Interpretation of the buffer's octets can be abstracted out by
1167 using the slightly higher-level functions L</lex_peek_unichar> and
1168 L</lex_read_unichar>.
1174 Perl_lex_read_to(pTHX_ char *ptr)
1177 PERL_ARGS_ASSERT_LEX_READ_TO;
1178 s = PL_parser->bufptr;
1179 if (ptr < s || ptr > PL_parser->bufend)
1180 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1181 for (; s != ptr; s++)
1183 COPLINE_INC_WITH_HERELINES;
1184 PL_parser->linestart = s+1;
1186 PL_parser->bufptr = ptr;
1190 =for apidoc Amx|void|lex_discard_to|char *ptr
1192 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1193 up to C<ptr>. The remaining content of the buffer will be moved, and
1194 all pointers into the buffer updated appropriately. C<ptr> must not
1195 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1196 it is not permitted to discard text that has yet to be lexed.
1198 Normally it is not necessarily to do this directly, because it suffices to
1199 use the implicit discarding behaviour of L</lex_next_chunk> and things
1200 based on it. However, if a token stretches across multiple lines,
1201 and the lexing code has kept multiple lines of text in the buffer for
1202 that purpose, then after completion of the token it would be wise to
1203 explicitly discard the now-unneeded earlier lines, to avoid future
1204 multi-line tokens growing the buffer without bound.
1210 Perl_lex_discard_to(pTHX_ char *ptr)
1214 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1215 buf = SvPVX(PL_parser->linestr);
1217 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1220 if (ptr > PL_parser->bufptr)
1221 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1222 discard_len = ptr - buf;
1223 if (PL_parser->oldbufptr < ptr)
1224 PL_parser->oldbufptr = ptr;
1225 if (PL_parser->oldoldbufptr < ptr)
1226 PL_parser->oldoldbufptr = ptr;
1227 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1228 PL_parser->last_uni = NULL;
1229 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1230 PL_parser->last_lop = NULL;
1231 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1232 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1233 PL_parser->bufend -= discard_len;
1234 PL_parser->bufptr -= discard_len;
1235 PL_parser->oldbufptr -= discard_len;
1236 PL_parser->oldoldbufptr -= discard_len;
1237 if (PL_parser->last_uni)
1238 PL_parser->last_uni -= discard_len;
1239 if (PL_parser->last_lop)
1240 PL_parser->last_lop -= discard_len;
1244 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1246 Reads in the next chunk of text to be lexed, appending it to
1247 L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1248 looked to the end of the current chunk and wants to know more. It is
1249 usual, but not necessary, for lexing to have consumed the entirety of
1250 the current chunk at this time.
1252 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1253 chunk (i.e., the current chunk has been entirely consumed), normally the
1254 current chunk will be discarded at the same time that the new chunk is
1255 read in. If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
1256 will not be discarded. If the current chunk has not been entirely
1257 consumed, then it will not be discarded regardless of the flag.
1259 Returns true if some new text was added to the buffer, or false if the
1260 buffer has reached the end of the input text.
1265 #define LEX_FAKE_EOF 0x80000000
1266 #define LEX_NO_TERM 0x40000000 /* here-doc */
1269 Perl_lex_next_chunk(pTHX_ U32 flags)
1273 STRLEN old_bufend_pos, new_bufend_pos;
1274 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1275 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1276 bool got_some_for_debugger = 0;
1278 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1279 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1280 if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1282 linestr = PL_parser->linestr;
1283 buf = SvPVX(linestr);
1284 if (!(flags & LEX_KEEP_PREVIOUS)
1285 && PL_parser->bufptr == PL_parser->bufend)
1287 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1289 if (PL_parser->last_uni != PL_parser->bufend)
1290 PL_parser->last_uni = NULL;
1291 if (PL_parser->last_lop != PL_parser->bufend)
1292 PL_parser->last_lop = NULL;
1293 last_uni_pos = last_lop_pos = 0;
1297 old_bufend_pos = PL_parser->bufend - buf;
1298 bufptr_pos = PL_parser->bufptr - buf;
1299 oldbufptr_pos = PL_parser->oldbufptr - buf;
1300 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1301 linestart_pos = PL_parser->linestart - buf;
1302 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1303 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1305 if (flags & LEX_FAKE_EOF) {
1307 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1309 } else if (filter_gets(linestr, old_bufend_pos)) {
1311 got_some_for_debugger = 1;
1312 } else if (flags & LEX_NO_TERM) {
1315 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1316 sv_setpvs(linestr, "");
1318 /* End of real input. Close filehandle (unless it was STDIN),
1319 * then add implicit termination.
1321 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1322 PerlIO_clearerr(PL_parser->rsfp);
1323 else if (PL_parser->rsfp)
1324 (void)PerlIO_close(PL_parser->rsfp);
1325 PL_parser->rsfp = NULL;
1326 PL_parser->in_pod = PL_parser->filtered = 0;
1327 if (!PL_in_eval && PL_minus_p) {
1329 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1330 PL_minus_n = PL_minus_p = 0;
1331 } else if (!PL_in_eval && PL_minus_n) {
1332 sv_catpvs(linestr, /*{*/";}");
1335 sv_catpvs(linestr, ";");
1338 buf = SvPVX(linestr);
1339 new_bufend_pos = SvCUR(linestr);
1340 PL_parser->bufend = buf + new_bufend_pos;
1341 PL_parser->bufptr = buf + bufptr_pos;
1342 PL_parser->oldbufptr = buf + oldbufptr_pos;
1343 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1344 PL_parser->linestart = buf + linestart_pos;
1345 if (PL_parser->last_uni)
1346 PL_parser->last_uni = buf + last_uni_pos;
1347 if (PL_parser->last_lop)
1348 PL_parser->last_lop = buf + last_lop_pos;
1349 if (PL_parser->preambling != NOLINE) {
1350 CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1351 PL_parser->preambling = NOLINE;
1353 if ( got_some_for_debugger
1354 && PERLDB_LINE_OR_SAVESRC
1355 && PL_curstash != PL_debstash)
1357 /* debugger active and we're not compiling the debugger code,
1358 * so store the line into the debugger's array of lines
1360 update_debugger_info(NULL, buf+old_bufend_pos,
1361 new_bufend_pos-old_bufend_pos);
1367 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1369 Looks ahead one (Unicode) character in the text currently being lexed.
1370 Returns the codepoint (unsigned integer value) of the next character,
1371 or -1 if lexing has reached the end of the input text. To consume the
1372 peeked character, use L</lex_read_unichar>.
1374 If the next character is in (or extends into) the next chunk of input
1375 text, the next chunk will be read in. Normally the current chunk will be
1376 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1377 bit set, then the current chunk will not be discarded.
1379 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1380 is encountered, an exception is generated.
1386 Perl_lex_peek_unichar(pTHX_ U32 flags)
1390 if (flags & ~(LEX_KEEP_PREVIOUS))
1391 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1392 s = PL_parser->bufptr;
1393 bufend = PL_parser->bufend;
1399 if (!lex_next_chunk(flags))
1401 s = PL_parser->bufptr;
1402 bufend = PL_parser->bufend;
1405 if (UTF8_IS_INVARIANT(head))
1407 if (UTF8_IS_START(head)) {
1408 len = UTF8SKIP(&head);
1409 while ((STRLEN)(bufend-s) < len) {
1410 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1412 s = PL_parser->bufptr;
1413 bufend = PL_parser->bufend;
1416 unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1417 if (retlen == (STRLEN)-1) {
1418 /* malformed UTF-8 */
1420 SAVESPTR(PL_warnhook);
1421 PL_warnhook = PERL_WARNHOOK_FATAL;
1422 utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
1428 if (!lex_next_chunk(flags))
1430 s = PL_parser->bufptr;
1437 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1439 Reads the next (Unicode) character in the text currently being lexed.
1440 Returns the codepoint (unsigned integer value) of the character read,
1441 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1442 if lexing has reached the end of the input text. To non-destructively
1443 examine the next character, use L</lex_peek_unichar> instead.
1445 If the next character is in (or extends into) the next chunk of input
1446 text, the next chunk will be read in. Normally the current chunk will be
1447 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1448 bit set, then the current chunk will not be discarded.
1450 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1451 is encountered, an exception is generated.
1457 Perl_lex_read_unichar(pTHX_ U32 flags)
1460 if (flags & ~(LEX_KEEP_PREVIOUS))
1461 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1462 c = lex_peek_unichar(flags);
1465 COPLINE_INC_WITH_HERELINES;
1467 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1469 ++(PL_parser->bufptr);
1475 =for apidoc Amx|void|lex_read_space|U32 flags
1477 Reads optional spaces, in Perl style, in the text currently being
1478 lexed. The spaces may include ordinary whitespace characters and
1479 Perl-style comments. C<#line> directives are processed if encountered.
1480 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1481 at a non-space character (or the end of the input text).
1483 If spaces extend into the next chunk of input text, the next chunk will
1484 be read in. Normally the current chunk will be discarded at the same
1485 time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
1486 chunk will not be discarded.
1491 #define LEX_NO_INCLINE 0x40000000
1492 #define LEX_NO_NEXT_CHUNK 0x80000000
1495 Perl_lex_read_space(pTHX_ U32 flags)
1498 const bool can_incline = !(flags & LEX_NO_INCLINE);
1499 bool need_incline = 0;
1500 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1501 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1502 s = PL_parser->bufptr;
1503 bufend = PL_parser->bufend;
1509 } while (!(c == '\n' || (c == 0 && s == bufend)));
1510 } else if (c == '\n') {
1513 PL_parser->linestart = s;
1519 } else if (isSPACE(c)) {
1521 } else if (c == 0 && s == bufend) {
1524 if (flags & LEX_NO_NEXT_CHUNK)
1526 PL_parser->bufptr = s;
1527 l = CopLINE(PL_curcop);
1528 CopLINE(PL_curcop) += PL_parser->herelines + 1;
1529 got_more = lex_next_chunk(flags);
1530 CopLINE_set(PL_curcop, l);
1531 s = PL_parser->bufptr;
1532 bufend = PL_parser->bufend;
1535 if (can_incline && need_incline && PL_parser->rsfp) {
1545 PL_parser->bufptr = s;
1550 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1552 This function performs syntax checking on a prototype, C<proto>.
1553 If C<warn> is true, any illegal characters or mismatched brackets
1554 will trigger illegalproto warnings, declaring that they were
1555 detected in the prototype for C<name>.
1557 The return value is C<true> if this is a valid prototype, and
1558 C<false> if it is not, regardless of whether C<warn> was C<true> or
1561 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1568 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
1570 STRLEN len, origlen;
1571 char *p = proto ? SvPV(proto, len) : NULL;
1572 bool bad_proto = FALSE;
1573 bool in_brackets = FALSE;
1574 bool after_slash = FALSE;
1575 char greedy_proto = ' ';
1576 bool proto_after_greedy_proto = FALSE;
1577 bool must_be_last = FALSE;
1578 bool underscore = FALSE;
1579 bool bad_proto_after_underscore = FALSE;
1581 PERL_ARGS_ASSERT_VALIDATE_PROTO;
1587 for (; len--; p++) {
1590 proto_after_greedy_proto = TRUE;
1592 if (!strchr(";@%", *p))
1593 bad_proto_after_underscore = TRUE;
1596 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1603 in_brackets = FALSE;
1604 else if ((*p == '@' || *p == '%')
1608 must_be_last = TRUE;
1617 after_slash = FALSE;
1622 SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1625 ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1626 origlen, UNI_DISPLAY_ISPRINT)
1627 : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1629 if (proto_after_greedy_proto)
1630 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1631 "Prototype after '%c' for %"SVf" : %s",
1632 greedy_proto, SVfARG(name), p);
1634 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1635 "Missing ']' in prototype for %"SVf" : %s",
1638 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1639 "Illegal character in prototype for %"SVf" : %s",
1641 if (bad_proto_after_underscore)
1642 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1643 "Illegal character after '_' in prototype for %"SVf" : %s",
1647 return (! (proto_after_greedy_proto || bad_proto) );
1652 * This subroutine has nothing to do with tilting, whether at windmills
1653 * or pinball tables. Its name is short for "increment line". It
1654 * increments the current line number in CopLINE(PL_curcop) and checks
1655 * to see whether the line starts with a comment of the form
1656 * # line 500 "foo.pm"
1657 * If so, it sets the current line number and file to the values in the comment.
1661 S_incline(pTHX_ const char *s)
1669 PERL_ARGS_ASSERT_INCLINE;
1671 COPLINE_INC_WITH_HERELINES;
1672 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1673 && s+1 == PL_bufend && *s == ';') {
1674 /* fake newline in string eval */
1675 CopLINE_dec(PL_curcop);
1680 while (SPACE_OR_TAB(*s))
1682 if (strnEQ(s, "line", 4))
1686 if (SPACE_OR_TAB(*s))
1690 while (SPACE_OR_TAB(*s))
1698 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1700 while (SPACE_OR_TAB(*s))
1702 if (*s == '"' && (t = strchr(s+1, '"'))) {
1708 while (*t && !isSPACE(*t))
1712 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1714 if (*e != '\n' && *e != '\0')
1715 return; /* false alarm */
1717 if (!grok_atoUV(n, &uv, &e))
1719 line_num = ((line_t)uv) - 1;
1722 const STRLEN len = t - s;
1724 if (!PL_rsfp && !PL_parser->filtered) {
1725 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1726 * to *{"::_<newfilename"} */
1727 /* However, the long form of evals is only turned on by the
1728 debugger - usually they're "(eval %lu)" */
1729 GV * const cfgv = CopFILEGV(PL_curcop);
1732 STRLEN tmplen2 = len;
1736 if (tmplen2 + 2 <= sizeof smallbuf)
1739 Newx(tmpbuf2, tmplen2 + 2, char);
1744 memcpy(tmpbuf2 + 2, s, tmplen2);
1747 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1749 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1750 /* adjust ${"::_<newfilename"} to store the new file name */
1751 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1752 /* The line number may differ. If that is the case,
1753 alias the saved lines that are in the array.
1754 Otherwise alias the whole array. */
1755 if (CopLINE(PL_curcop) == line_num) {
1756 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1757 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1759 else if (GvAV(cfgv)) {
1760 AV * const av = GvAV(cfgv);
1761 const I32 start = CopLINE(PL_curcop)+1;
1762 I32 items = AvFILLp(av) - start;
1764 AV * const av2 = GvAVn(gv2);
1765 SV **svp = AvARRAY(av) + start;
1766 I32 l = (I32)line_num+1;
1768 av_store(av2, l++, SvREFCNT_inc(*svp++));
1773 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1776 CopFILE_free(PL_curcop);
1777 CopFILE_setn(PL_curcop, s, len);
1779 CopLINE_set(PL_curcop, line_num);
1782 #define skipspace(s) skipspace_flags(s, 0)
1786 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1788 AV *av = CopFILEAVx(PL_curcop);
1791 if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1793 sv = *av_fetch(av, 0, 1);
1794 SvUPGRADE(sv, SVt_PVMG);
1796 if (!SvPOK(sv)) sv_setpvs(sv,"");
1798 sv_catsv(sv, orig_sv);
1800 sv_catpvn(sv, buf, len);
1805 if (PL_parser->preambling == NOLINE)
1806 av_store(av, CopLINE(PL_curcop), sv);
1812 * Called to gobble the appropriate amount and type of whitespace.
1813 * Skips comments as well.
1817 S_skipspace_flags(pTHX_ char *s, U32 flags)
1819 PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1820 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1821 while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
1824 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1826 lex_read_space(flags | LEX_KEEP_PREVIOUS |
1827 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
1828 LEX_NO_NEXT_CHUNK : 0));
1830 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1831 if (PL_linestart > PL_bufptr)
1832 PL_bufptr = PL_linestart;
1840 * Check the unary operators to ensure there's no ambiguity in how they're
1841 * used. An ambiguous piece of code would be:
1843 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1844 * the +5 is its argument.
1853 if (PL_oldoldbufptr != PL_last_uni)
1855 while (isSPACE(*PL_last_uni))
1858 while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
1859 s += UTF ? UTF8SKIP(s) : 1;
1860 if ((t = strchr(s, '(')) && t < PL_bufptr)
1863 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1864 "Warning: Use of \"%"UTF8f"\" without parentheses is ambiguous",
1865 UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
1869 * LOP : macro to build a list operator. Its behaviour has been replaced
1870 * with a subroutine, S_lop() for which LOP is just another name.
1873 #define LOP(f,x) return lop(f,x,s)
1877 * Build a list operator (or something that might be one). The rules:
1878 * - if we have a next token, then it's a list operator (no parens) for
1879 * which the next token has already been parsed; e.g.,
1882 * - if the next thing is an opening paren, then it's a function
1883 * - else it's a list operator
1887 S_lop(pTHX_ I32 f, int x, char *s)
1889 PERL_ARGS_ASSERT_LOP;
1894 PL_last_lop = PL_oldbufptr;
1895 PL_last_lop_op = (OPCODE)f;
1900 return REPORT(FUNC);
1903 return REPORT(FUNC);
1906 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1907 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1908 return REPORT(LSTOP);
1914 * When the lexer realizes it knows the next token (for instance,
1915 * it is reordering tokens for the parser) then it can call S_force_next
1916 * to know what token to return the next time the lexer is called. Caller
1917 * will need to set PL_nextval[] and possibly PL_expect to ensure
1918 * the lexer handles the token correctly.
1922 S_force_next(pTHX_ I32 type)
1926 PerlIO_printf(Perl_debug_log, "### forced token:\n");
1927 tokereport(type, &NEXTVAL_NEXTTOKE);
1930 assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
1931 PL_nexttype[PL_nexttoke] = type;
1938 * This subroutine handles postfix deref syntax after the arrow has already
1939 * been emitted. @* $* etc. are emitted as two separate token right here.
1940 * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
1941 * only the first, leaving yylex to find the next.
1945 S_postderef(pTHX_ int const funny, char const next)
1947 assert(funny == DOLSHARP || strchr("$@%&*", funny));
1949 PL_expect = XOPERATOR;
1950 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
1951 assert('@' == funny || '$' == funny || DOLSHARP == funny);
1952 PL_lex_state = LEX_INTERPEND;
1954 force_next(POSTJOIN);
1960 if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
1961 && !PL_lex_brackets)
1963 PL_expect = XOPERATOR;
1972 int yyc = PL_parser->yychar;
1973 if (yyc != YYEMPTY) {
1975 NEXTVAL_NEXTTOKE = PL_parser->yylval;
1976 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
1977 PL_lex_allbrackets--;
1979 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1980 } else if (yyc == '('/*)*/) {
1981 PL_lex_allbrackets--;
1986 PL_parser->yychar = YYEMPTY;
1991 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1993 SV * const sv = newSVpvn_utf8(start, len,
1996 && !is_invariant_string((const U8*)start, len)
1997 && is_utf8_string((const U8*)start, len));
2003 * When the lexer knows the next thing is a word (for instance, it has
2004 * just seen -> and it knows that the next char is a word char, then
2005 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2009 * char *start : buffer position (must be within PL_linestr)
2010 * int token : PL_next* will be this type of bare word
2011 * (e.g., METHOD,BAREWORD)
2012 * int check_keyword : if true, Perl checks to make sure the word isn't
2013 * a keyword (do this if the word is a label, e.g. goto FOO)
2014 * int allow_pack : if true, : characters will also be allowed (require,
2015 * use, etc. do this)
2019 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2024 PERL_ARGS_ASSERT_FORCE_WORD;
2026 start = skipspace(start);
2028 if (isIDFIRST_lazy_if(s,UTF)
2029 || (allow_pack && *s == ':' && s[1] == ':') )
2031 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2032 if (check_keyword) {
2033 char *s2 = PL_tokenbuf;
2035 if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
2037 if (keyword(s2, len2, 0))
2040 if (token == METHOD) {
2045 PL_expect = XOPERATOR;
2048 NEXTVAL_NEXTTOKE.opval
2049 = (OP*)newSVOP(OP_CONST,0,
2050 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2051 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2059 * Called when the lexer wants $foo *foo &foo etc, but the program
2060 * text only contains the "foo" portion. The first argument is a pointer
2061 * to the "foo", and the second argument is the type symbol to prefix.
2062 * Forces the next token to be a "BAREWORD".
2063 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2067 S_force_ident(pTHX_ const char *s, int kind)
2069 PERL_ARGS_ASSERT_FORCE_IDENT;
2072 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2073 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2074 UTF ? SVf_UTF8 : 0));
2075 NEXTVAL_NEXTTOKE.opval = o;
2076 force_next(BAREWORD);
2078 o->op_private = OPpCONST_ENTERED;
2079 /* XXX see note in pp_entereval() for why we forgo typo
2080 warnings if the symbol must be introduced in an eval.
2082 gv_fetchpvn_flags(s, len,
2083 (PL_in_eval ? GV_ADDMULTI
2084 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2085 kind == '$' ? SVt_PV :
2086 kind == '@' ? SVt_PVAV :
2087 kind == '%' ? SVt_PVHV :
2095 S_force_ident_maybe_lex(pTHX_ char pit)
2097 NEXTVAL_NEXTTOKE.ival = pit;
2102 Perl_str_to_version(pTHX_ SV *sv)
2107 const char *start = SvPV_const(sv,len);
2108 const char * const end = start + len;
2109 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2111 PERL_ARGS_ASSERT_STR_TO_VERSION;
2113 while (start < end) {
2117 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2122 retval += ((NV)n)/nshift;
2131 * Forces the next token to be a version number.
2132 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2133 * and if "guessing" is TRUE, then no new token is created (and the caller
2134 * must use an alternative parsing method).
2138 S_force_version(pTHX_ char *s, int guessing)
2143 PERL_ARGS_ASSERT_FORCE_VERSION;
2151 while (isDIGIT(*d) || *d == '_' || *d == '.')
2153 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2155 s = scan_num(s, &pl_yylval);
2156 version = pl_yylval.opval;
2157 ver = cSVOPx(version)->op_sv;
2158 if (SvPOK(ver) && !SvNIOK(ver)) {
2159 SvUPGRADE(ver, SVt_PVNV);
2160 SvNV_set(ver, str_to_version(ver));
2161 SvNOK_on(ver); /* hint that it is a version */
2164 else if (guessing) {
2169 /* NOTE: The parser sees the package name and the VERSION swapped */
2170 NEXTVAL_NEXTTOKE.opval = version;
2171 force_next(BAREWORD);
2177 * S_force_strict_version
2178 * Forces the next token to be a version number using strict syntax rules.
2182 S_force_strict_version(pTHX_ char *s)
2185 const char *errstr = NULL;
2187 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2189 while (isSPACE(*s)) /* leading whitespace */
2192 if (is_STRICT_VERSION(s,&errstr)) {
2194 s = (char *)scan_version(s, ver, 0);
2195 version = newSVOP(OP_CONST, 0, ver);
2197 else if ((*s != ';' && *s != '{' && *s != '}' )
2198 && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
2202 yyerror(errstr); /* version required */
2206 /* NOTE: The parser sees the package name and the VERSION swapped */
2207 NEXTVAL_NEXTTOKE.opval = version;
2208 force_next(BAREWORD);
2215 * Tokenize a quoted string passed in as an SV. It finds the next
2216 * chunk, up to end of string or a backslash. It may make a new
2217 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2222 S_tokeq(pTHX_ SV *sv)
2229 PERL_ARGS_ASSERT_TOKEQ;
2233 assert (!SvIsCOW(sv));
2234 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2238 /* This is relying on the SV being "well formed" with a trailing '\0' */
2239 while (s < send && !(*s == '\\' && s[1] == '\\'))
2244 if ( PL_hints & HINT_NEW_STRING ) {
2245 pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2246 SVs_TEMP | SvUTF8(sv));
2250 if (s + 1 < send && (s[1] == '\\'))
2251 s++; /* all that, just for this */
2256 SvCUR_set(sv, d - SvPVX_const(sv));
2258 if ( PL_hints & HINT_NEW_STRING )
2259 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2264 * Now come three functions related to double-quote context,
2265 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2266 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2267 * interact with PL_lex_state, and create fake ( ... ) argument lists
2268 * to handle functions and concatenation.
2272 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
2277 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2279 * Pattern matching will set PL_lex_op to the pattern-matching op to
2280 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2282 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2284 * Everything else becomes a FUNC.
2286 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2287 * had an OP_CONST or OP_READLINE). This just sets us up for a
2288 * call to S_sublex_push().
2292 S_sublex_start(pTHX)
2294 const I32 op_type = pl_yylval.ival;
2296 if (op_type == OP_NULL) {
2297 pl_yylval.opval = PL_lex_op;
2301 if (op_type == OP_CONST) {
2302 SV *sv = PL_lex_stuff;
2303 PL_lex_stuff = NULL;
2306 if (SvTYPE(sv) == SVt_PVIV) {
2307 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2309 const char * const p = SvPV_const(sv, len);
2310 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2314 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2318 PL_parser->lex_super_state = PL_lex_state;
2319 PL_parser->lex_sub_inwhat = (U16)op_type;
2320 PL_parser->lex_sub_op = PL_lex_op;
2321 PL_lex_state = LEX_INTERPPUSH;
2325 pl_yylval.opval = PL_lex_op;
2335 * Create a new scope to save the lexing state. The scope will be
2336 * ended in S_sublex_done. Returns a '(', starting the function arguments
2337 * to the uc, lc, etc. found before.
2338 * Sets PL_lex_state to LEX_INTERPCONCAT.
2345 const bool is_heredoc = PL_multi_close == '<';
2348 PL_lex_state = PL_parser->lex_super_state;
2349 SAVEI8(PL_lex_dojoin);
2350 SAVEI32(PL_lex_brackets);
2351 SAVEI32(PL_lex_allbrackets);
2352 SAVEI32(PL_lex_formbrack);
2353 SAVEI8(PL_lex_fakeeof);
2354 SAVEI32(PL_lex_casemods);
2355 SAVEI32(PL_lex_starts);
2356 SAVEI8(PL_lex_state);
2357 SAVESPTR(PL_lex_repl);
2358 SAVEVPTR(PL_lex_inpat);
2359 SAVEI16(PL_lex_inwhat);
2362 SAVECOPLINE(PL_curcop);
2363 SAVEI32(PL_multi_end);
2364 SAVEI32(PL_parser->herelines);
2365 PL_parser->herelines = 0;
2367 SAVEIV(PL_multi_close);
2368 SAVEPPTR(PL_bufptr);
2369 SAVEPPTR(PL_bufend);
2370 SAVEPPTR(PL_oldbufptr);
2371 SAVEPPTR(PL_oldoldbufptr);
2372 SAVEPPTR(PL_last_lop);
2373 SAVEPPTR(PL_last_uni);
2374 SAVEPPTR(PL_linestart);
2375 SAVESPTR(PL_linestr);
2376 SAVEGENERICPV(PL_lex_brackstack);
2377 SAVEGENERICPV(PL_lex_casestack);
2378 SAVEGENERICPV(PL_parser->lex_shared);
2379 SAVEBOOL(PL_parser->lex_re_reparsing);
2380 SAVEI32(PL_copline);
2382 /* The here-doc parser needs to be able to peek into outer lexing
2383 scopes to find the body of the here-doc. So we put PL_linestr and
2384 PL_bufptr into lex_shared, to ‘share’ those values.
2386 PL_parser->lex_shared->ls_linestr = PL_linestr;
2387 PL_parser->lex_shared->ls_bufptr = PL_bufptr;
2389 PL_linestr = PL_lex_stuff;
2390 PL_lex_repl = PL_parser->lex_sub_repl;
2391 PL_lex_stuff = NULL;
2392 PL_parser->lex_sub_repl = NULL;
2394 /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2395 set for an inner quote-like operator and then an error causes scope-
2396 popping. We must not have a PL_lex_stuff value left dangling, as
2397 that breaks assumptions elsewhere. See bug #123617. */
2398 SAVEGENERICSV(PL_lex_stuff);
2399 SAVEGENERICSV(PL_parser->lex_sub_repl);
2401 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2402 = SvPVX(PL_linestr);
2403 PL_bufend += SvCUR(PL_linestr);
2404 PL_last_lop = PL_last_uni = NULL;
2405 SAVEFREESV(PL_linestr);
2406 if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2408 PL_lex_dojoin = FALSE;
2409 PL_lex_brackets = PL_lex_formbrack = 0;
2410 PL_lex_allbrackets = 0;
2411 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2412 Newx(PL_lex_brackstack, 120, char);
2413 Newx(PL_lex_casestack, 12, char);
2414 PL_lex_casemods = 0;
2415 *PL_lex_casestack = '\0';
2417 PL_lex_state = LEX_INTERPCONCAT;
2419 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2420 PL_copline = NOLINE;
2422 Newxz(shared, 1, LEXSHARED);
2423 shared->ls_prev = PL_parser->lex_shared;
2424 PL_parser->lex_shared = shared;
2426 PL_lex_inwhat = PL_parser->lex_sub_inwhat;
2427 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2428 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2429 PL_lex_inpat = PL_parser->lex_sub_op;
2431 PL_lex_inpat = NULL;
2433 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2434 PL_in_eval &= ~EVAL_RE_REPARSING;
2441 * Restores lexer state after a S_sublex_push.
2447 if (!PL_lex_starts++) {
2448 SV * const sv = newSVpvs("");
2449 if (SvUTF8(PL_linestr))
2451 PL_expect = XOPERATOR;
2452 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2456 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2457 PL_lex_state = LEX_INTERPCASEMOD;
2461 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2462 assert(PL_lex_inwhat != OP_TRANSR);
2464 assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2465 PL_linestr = PL_lex_repl;
2467 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2468 PL_bufend += SvCUR(PL_linestr);
2469 PL_last_lop = PL_last_uni = NULL;
2470 PL_lex_dojoin = FALSE;
2471 PL_lex_brackets = 0;
2472 PL_lex_allbrackets = 0;
2473 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2474 PL_lex_casemods = 0;
2475 *PL_lex_casestack = '\0';
2477 if (SvEVALED(PL_lex_repl)) {
2478 PL_lex_state = LEX_INTERPNORMAL;
2480 /* we don't clear PL_lex_repl here, so that we can check later
2481 whether this is an evalled subst; that means we rely on the
2482 logic to ensure sublex_done() is called again only via the
2483 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2486 PL_lex_state = LEX_INTERPCONCAT;
2489 if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2490 CopLINE(PL_curcop) +=
2491 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xpad_cop_seq.xlow
2492 + PL_parser->herelines;
2493 PL_parser->herelines = 0;
2498 const line_t l = CopLINE(PL_curcop);
2500 if (PL_multi_close == '<')
2501 PL_parser->herelines += l - PL_multi_end;
2502 PL_bufend = SvPVX(PL_linestr);
2503 PL_bufend += SvCUR(PL_linestr);
2504 PL_expect = XOPERATOR;
2509 PERL_STATIC_INLINE SV*
2510 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2512 /* <s> points to first character of interior of \N{}, <e> to one beyond the
2513 * interior, hence to the "}". Finds what the name resolves to, returning
2514 * an SV* containing it; NULL if no valid one found */
2516 SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2523 const U8* first_bad_char_loc;
2524 const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2526 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2529 Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
2530 "Unknown charname '' is deprecated");
2534 if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2536 &first_bad_char_loc))
2538 /* If warnings are on, this will print a more detailed analysis of what
2539 * is wrong than the error message below */
2540 utf8n_to_uvchr(first_bad_char_loc,
2541 e - ((char *) first_bad_char_loc),
2544 /* We deliberately don't try to print the malformed character, which
2545 * might not print very well; it also may be just the first of many
2546 * malformations, so don't print what comes after it */
2547 yyerror_pv(Perl_form(aTHX_
2548 "Malformed UTF-8 character immediately after '%.*s'",
2549 (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr),
2554 res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2555 /* include the <}> */
2556 e - backslash_ptr + 1);
2558 SvREFCNT_dec_NN(res);
2562 /* See if the charnames handler is the Perl core's, and if so, we can skip
2563 * the validation needed for a user-supplied one, as Perl's does its own
2565 table = GvHV(PL_hintgv); /* ^H */
2566 cvp = hv_fetchs(table, "charnames", FALSE);
2567 if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2568 SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2570 const char * const name = HvNAME(stash);
2571 if (HvNAMELEN(stash) == sizeof("_charnames")-1
2572 && strEQ(name, "_charnames")) {
2577 /* Here, it isn't Perl's charname handler. We can't rely on a
2578 * user-supplied handler to validate the input name. For non-ut8 input,
2579 * look to see that the first character is legal. Then loop through the
2580 * rest checking that each is a continuation */
2582 /* This code makes the reasonable assumption that the only Latin1-range
2583 * characters that begin a character name alias are alphabetic, otherwise
2584 * would have to create a isCHARNAME_BEGIN macro */
2587 if (! isALPHAU(*s)) {
2592 if (! isCHARNAME_CONT(*s)) {
2595 if (*s == ' ' && *(s-1) == ' ') {
2602 /* Similarly for utf8. For invariants can check directly; for other
2603 * Latin1, can calculate their code point and check; otherwise use a
2605 if (UTF8_IS_INVARIANT(*s)) {
2606 if (! isALPHAU(*s)) {
2610 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2611 if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
2617 if (! PL_utf8_charname_begin) {
2618 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2619 PL_utf8_charname_begin = _core_swash_init("utf8",
2620 "_Perl_Charname_Begin",
2622 1, 0, NULL, &flags);
2624 if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2631 if (UTF8_IS_INVARIANT(*s)) {
2632 if (! isCHARNAME_CONT(*s)) {
2635 if (*s == ' ' && *(s-1) == ' ') {
2640 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2641 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
2648 if (! PL_utf8_charname_continue) {
2649 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2650 PL_utf8_charname_continue = _core_swash_init("utf8",
2651 "_Perl_Charname_Continue",
2653 1, 0, NULL, &flags);
2655 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2662 if (*(s-1) == ' ') {
2665 "charnames alias definitions may not contain trailing "
2666 "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2667 (int)(s - backslash_ptr + 1), backslash_ptr,
2668 (int)(e - s + 1), s + 1
2670 UTF ? SVf_UTF8 : 0);
2674 if (SvUTF8(res)) { /* Don't accept malformed input */
2675 const U8* first_bad_char_loc;
2677 const char* const str = SvPV_const(res, len);
2678 if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2679 /* If warnings are on, this will print a more detailed analysis of
2680 * what is wrong than the error message below */
2681 utf8n_to_uvchr(first_bad_char_loc,
2682 (char *) first_bad_char_loc - str,
2685 /* We deliberately don't try to print the malformed character,
2686 * which might not print very well; it also may be just the first
2687 * of many malformations, so don't print what comes after it */
2690 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2691 (int) (e - backslash_ptr + 1), backslash_ptr,
2692 (int) ((char *) first_bad_char_loc - str), str
2703 /* The final %.*s makes sure that should the trailing NUL be missing
2704 * that this print won't run off the end of the string */
2707 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2708 (int)(s - backslash_ptr + 1), backslash_ptr,
2709 (int)(e - s + 1), s + 1
2711 UTF ? SVf_UTF8 : 0);
2718 "charnames alias definitions may not contain a sequence of "
2719 "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
2720 (int)(s - backslash_ptr + 1), backslash_ptr,
2721 (int)(e - s + 1), s + 1
2723 UTF ? SVf_UTF8 : 0);
2730 Extracts the next constant part of a pattern, double-quoted string,
2731 or transliteration. This is terrifying code.
2733 For example, in parsing the double-quoted string "ab\x63$d", it would
2734 stop at the '$' and return an OP_CONST containing 'abc'.
2736 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2737 processing a pattern (PL_lex_inpat is true), a transliteration
2738 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2740 Returns a pointer to the character scanned up to. If this is
2741 advanced from the start pointer supplied (i.e. if anything was
2742 successfully parsed), will leave an OP_CONST for the substring scanned
2743 in pl_yylval. Caller must intuit reason for not parsing further
2744 by looking at the next characters herself.
2748 \N{FOO} => \N{U+hex_for_character_FOO}
2749 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2752 all other \-char, including \N and \N{ apart from \N{ABC}
2755 @ and $ where it appears to be a var, but not for $ as tail anchor
2759 In transliterations:
2760 characters are VERY literal, except for - not at the start or end
2761 of the string, which indicates a range. If the range is in bytes,
2762 scan_const expands the range to the full set of intermediate
2763 characters. If the range is in utf8, the hyphen is replaced with
2764 a certain range mark which will be handled by pmtrans() in op.c.
2766 In double-quoted strings:
2768 double-quoted style: \r and \n
2769 constants: \x31, etc.
2770 deprecated backrefs: \1 (in substitution replacements)
2771 case and quoting: \U \Q \E
2774 scan_const does *not* construct ops to handle interpolated strings.
2775 It stops processing as soon as it finds an embedded $ or @ variable
2776 and leaves it to the caller to work out what's going on.
2778 embedded arrays (whether in pattern or not) could be:
2779 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2781 $ in double-quoted strings must be the symbol of an embedded scalar.
2783 $ in pattern could be $foo or could be tail anchor. Assumption:
2784 it's a tail anchor if $ is the last thing in the string, or if it's
2785 followed by one of "()| \r\n\t"
2787 \1 (backreferences) are turned into $1 in substitutions
2789 The structure of the code is
2790 while (there's a character to process) {
2791 handle transliteration ranges
2792 skip regexp comments /(?#comment)/ and codes /(?{code})/
2793 skip #-initiated comments in //x patterns
2794 check for embedded arrays
2795 check for embedded scalars
2797 deprecate \1 in substitution replacements
2798 handle string-changing backslashes \l \U \Q \E, etc.
2799 switch (what was escaped) {
2800 handle \- in a transliteration (becomes a literal -)
2801 if a pattern and not \N{, go treat as regular character
2802 handle \132 (octal characters)
2803 handle \x15 and \x{1234} (hex characters)
2804 handle \N{name} (named characters, also \N{3,5} in a pattern)
2805 handle \cV (control characters)
2806 handle printf-style backslashes (\f, \r, \n, etc)
2809 } (end if backslash)
2810 handle regular character
2811 } (end while character to read)
2816 S_scan_const(pTHX_ char *start)
2818 char *send = PL_bufend; /* end of the constant */
2819 SV *sv = newSV(send - start); /* sv for the constant. See note below
2821 char *s = start; /* start of the constant */
2822 char *d = SvPVX(sv); /* destination for copies */
2823 bool dorange = FALSE; /* are we in a translit range? */
2824 bool didrange = FALSE; /* did we just finish a range? */
2825 bool in_charclass = FALSE; /* within /[...]/ */
2826 bool has_utf8 = FALSE; /* Output constant is UTF8 */
2827 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed to be
2828 UTF8? But, this can show as true
2829 when the source isn't utf8, as for
2830 example when it is entirely composed
2832 SV *res; /* result from charnames */
2833 STRLEN offset_to_max; /* The offset in the output to where the range
2834 high-end character is temporarily placed */
2836 /* Note on sizing: The scanned constant is placed into sv, which is
2837 * initialized by newSV() assuming one byte of output for every byte of
2838 * input. This routine expects newSV() to allocate an extra byte for a
2839 * trailing NUL, which this routine will append if it gets to the end of
2840 * the input. There may be more bytes of input than output (eg., \N{LATIN
2841 * CAPITAL LETTER A}), or more output than input if the constant ends up
2842 * recoded to utf8, but each time a construct is found that might increase
2843 * the needed size, SvGROW() is called. Its size parameter each time is
2844 * based on the best guess estimate at the time, namely the length used so
2845 * far, plus the length the current construct will occupy, plus room for
2846 * the trailing NUL, plus one byte for every input byte still unscanned */
2848 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
2851 int backslash_N = 0; /* ? was the character from \N{} */
2852 int non_portable_endpoint = 0; /* ? In a range is an endpoint
2853 platform-specific like \x65 */
2856 PERL_ARGS_ASSERT_SCAN_CONST;
2858 assert(PL_lex_inwhat != OP_TRANSR);
2859 if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
2860 /* If we are doing a trans and we know we want UTF8 set expectation */
2861 has_utf8 = PL_parser->lex_sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2862 this_utf8 = PL_parser->lex_sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2865 /* Protect sv from errors and fatal warnings. */
2866 ENTER_with_name("scan_const");
2870 || dorange /* Handle tr/// range at right edge of input */
2873 /* get transliterations out of the way (they're most literal) */
2874 if (PL_lex_inwhat == OP_TRANS) {
2876 /* But there isn't any special handling necessary unless there is a
2877 * range, so for most cases we just drop down and handle the value
2878 * as any other. There are two exceptions.
2880 * 1. A minus sign indicates that we are actually going to have
2881 * a range. In this case, skip the '-', set a flag, then drop
2882 * down to handle what should be the end range value.
2883 * 2. After we've handled that value, the next time through, that
2884 * flag is set and we fix up the range.
2886 * Ranges entirely within Latin1 are expanded out entirely, in
2887 * order to avoid the significant overhead of making a swash.
2888 * Ranges that extend above Latin1 have to have a swash, so there
2889 * is no advantage to abbreviate them here, so they are stored here
2890 * as Min, ILLEGAL_UTF8_BYTE, Max. The illegal byte signifies a
2891 * hyphen without any possible ambiguity. On EBCDIC machines, if
2892 * the range is expressed as Unicode, the Latin1 portion is
2893 * expanded out even if the entire range extends above Latin1.
2894 * This is because each code point in it has to be processed here
2895 * individually to get its native translation */
2899 /* Here, we don't think we're in a range. If we've processed
2900 * at least one character, then see if this next one is a '-',
2901 * indicating the previous one was the start of a range. But
2902 * don't bother if we're too close to the end for the minus to
2904 if (*s != '-' || s >= send - 1 || s == start) {
2906 /* A regular character. Process like any other, but first
2907 * clear any flags */
2911 non_portable_endpoint = 0;
2914 /* Drops down to generic code to process current byte */
2917 if (didrange) { /* Something like y/A-C-Z// */
2918 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2923 s++; /* Skip past the minus */
2925 /* d now points to where the end-range character will be
2926 * placed. Save it so won't have to go finding it later,
2927 * and drop down to get that character. (Actually we
2928 * instead save the offset, to handle the case where a
2929 * realloc in the meantime could change the actual
2930 * pointer). We'll finish processing the range the next
2931 * time through the loop */
2932 offset_to_max = d - SvPVX_const(sv);
2934 } /* End of not a range */
2936 /* Here we have parsed a range. Now must handle it. At this
2938 * 'sv' is a SV* that contains the output string we are
2939 * constructing. The final two characters in that string
2940 * are the range start and range end, in order.
2941 * 'd' points to just beyond the range end in the 'sv' string,
2942 * where we would next place something
2943 * 'offset_to_max' is the offset in 'sv' at which the character
2944 * before 'd' begins.
2946 const char * max_ptr = SvPVX_const(sv) + offset_to_max;
2947 const char * min_ptr;
2949 IV range_max; /* last character in range */
2952 #ifndef EBCDIC /* Not meaningful except in EBCDIC, so initialize to false */
2953 const bool convert_unicode = FALSE;
2954 const IV real_range_max = 0;
2956 bool convert_unicode;
2957 IV real_range_max = 0;
2960 /* Get the range-ends code point values. */
2962 /* We know the utf8 is valid, because we just constructed
2963 * it ourselves in previous loop iterations */
2964 min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
2965 range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
2966 range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
2969 min_ptr = max_ptr - 1;
2970 range_min = * (U8*) min_ptr;
2971 range_max = * (U8*) max_ptr;
2975 /* On EBCDIC platforms, we may have to deal with portable
2976 * ranges. These happen if at least one range endpoint is a
2977 * Unicode value (\N{...}), or if the range is a subset of
2978 * [A-Z] or [a-z], and both ends are literal characters,
2979 * like 'A', and not like \x{C1} */
2980 if ((convert_unicode
2981 = cBOOL(backslash_N) /* \N{} forces Unicode, hence
2983 || ( ! non_portable_endpoint
2984 && (( isLOWER_A(range_min) && isLOWER_A(range_max))
2985 || (isUPPER_A(range_min) && isUPPER_A(range_max))))
2988 /* Special handling is needed for these portable ranges.
2989 * They are defined to all be in Unicode terms, which
2990 * include all Unicode code points between the end points.
2991 * Convert to Unicode to get the Unicode range. Later we
2992 * will convert each code point in the range back to
2994 range_min = NATIVE_TO_UNI(range_min);
2995 range_max = NATIVE_TO_UNI(range_max);
2999 if (range_min > range_max) {
3000 if (convert_unicode) {
3001 /* Need to convert back to native for meaningful
3002 * messages for this platform */
3003 range_min = UNI_TO_NATIVE(range_min);
3004 range_max = UNI_TO_NATIVE(range_max);
3007 /* Use the characters themselves for the error message if
3008 * ASCII printables; otherwise some visible representation
3010 if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
3012 "Invalid range \"%c-%c\" in transliteration operator",
3013 (char)range_min, (char)range_max);
3015 else if (convert_unicode) {
3016 /* diag_listed_as: Invalid range "%s" in transliteration operator */
3018 "Invalid range \"\\N{U+%04"UVXf"}-\\N{U+%04"UVXf"}\""
3019 " in transliteration operator",
3020 range_min, range_max);
3023 /* diag_listed_as: Invalid range "%s" in transliteration operator */
3025 "Invalid range \"\\x{%04"UVXf"}-\\x{%04"UVXf"}\""
3026 " in transliteration operator",
3027 range_min, range_max);
3033 /* We try to avoid creating a swash. If the upper end of
3034 * this range is below 256, this range won't force a swash;
3035 * otherwise it does force a swash, and as long as we have
3036 * to have one, we might as well not expand things out.
3037 * But if it's EBCDIC, we may have to look at each
3038 * character below 256 if we have to convert to/from
3042 && (range_min > 255 || ! convert_unicode)
3045 /* Move the high character one byte to the right; then
3046 * insert between it and the range begin, an illegal
3047 * byte which serves to indicate this is a range (using
3048 * a '-' could be ambiguous). */
3050 while (e-- > max_ptr) {
3053 *(e + 1) = (char) ILLEGAL_UTF8_BYTE;
3057 /* Here, we're going to expand out the range. For EBCDIC
3058 * the range can extend above 255 (not so in ASCII), so
3059 * for EBCDIC, split it into the parts above and below
3062 if (range_max > 255) {
3063 real_range_max = range_max;
3069 /* Here we need to expand out the string to contain each
3070 * character in the range. Grow the output to handle this */
3072 save_offset = min_ptr - SvPVX_const(sv);
3074 /* The base growth is the number of code points in the range */
3075 grow = range_max - range_min + 1;
3078 /* But if the output is UTF-8, some of those characters may
3079 * need two bytes (since the maximum range value here is
3080 * 255, the max bytes per character is two). On ASCII
3081 * platforms, it's not much trouble to get an accurate
3082 * count of what's needed. But on EBCDIC, the ones that
3083 * need 2 bytes are scattered around, so just use a worst
3084 * case value instead of calculating for that platform. */
3088 /* Only those above 127 require 2 bytes. This may be
3089 * everything in the range, or not */
3090 if (range_min > 127) {
3093 else if (range_max > 127) {
3094 grow += range_max - 127;
3099 /* Subtract 3 for the bytes that were already accounted for
3100 * (min, max, and the hyphen) */
3101 SvGROW(sv, SvLEN(sv) + grow - 3);
3102 d = SvPVX(sv) + save_offset; /* refresh d after realloc */
3104 /* Here, we expand out the range. On ASCII platforms, the
3105 * compiler should optimize out the 'convert_unicode==TRUE'
3106 * portion of this */
3107 if (convert_unicode) {
3110 /* Recall that the min and max are now in Unicode terms, so
3111 * we have to convert each character to its native
3114 for (i = range_min; i <= range_max; i++) {
3115 append_utf8_from_native_byte(LATIN1_TO_NATIVE((U8) i),
3120 for (i = range_min; i <= range_max; i++) {
3121 *d++ = (char)LATIN1_TO_NATIVE((U8) i);
3128 /* Here, no conversions are necessary, which means that the
3129 * first character in the range is already in 'd' and
3130 * valid, so we can skip overwriting it */
3133 for (i = range_min + 1; i <= range_max; i++) {
3134 append_utf8_from_native_byte((U8) i, (U8 **) &d);
3139 for (i = range_min + 1; i <= range_max; i++) {
3145 /* (Compilers should optimize this out for non-EBCDIC). If the
3146 * original range extended above 255, add in that portion */
3147 if (real_range_max) {
3148 *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
3149 *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
3150 if (real_range_max > 0x101)
3151 *d++ = (char) ILLEGAL_UTF8_BYTE;
3152 if (real_range_max > 0x100)
3153 d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
3157 /* mark the range as done, and continue */
3161 non_portable_endpoint = 0;
3165 } /* End of is a range */
3166 } /* End of transliteration. Joins main code after these else's */
3167 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3170 while (s1 >= start && *s1-- == '\\')
3173 in_charclass = TRUE;
3176 else if (*s == ']' && PL_lex_inpat && in_charclass) {
3179 while (s1 >= start && *s1-- == '\\')
3182 in_charclass = FALSE;
3185 /* skip for regexp comments /(?#comment)/, except for the last
3186 * char, which will be done separately.
3187 * Stop on (?{..}) and friends */
3189 else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3191 while (s+1 < send && *s != ')')
3194 else if (!PL_lex_casemods
3195 && ( s[2] == '{' /* This should match regcomp.c */
3196 || (s[2] == '?' && s[3] == '{')))
3202 /* likewise skip #-initiated comments in //x patterns */
3206 && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
3208 while (s+1 < send && *s != '\n')
3212 /* no further processing of single-quoted regex */
3213 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3214 goto default_action;
3216 /* check for embedded arrays
3217 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3219 else if (*s == '@' && s[1]) {
3220 if (UTF ? isIDFIRST_utf8((U8*)s+1) : isWORDCHAR_A(s[1]))
3222 if (strchr(":'{$", s[1]))
3224 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3225 break; /* in regexp, neither @+ nor @- are interpolated */
3228 /* check for embedded scalars. only stop if we're sure it's a
3231 else if (*s == '$') {
3232 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
3234 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3236 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3237 "Possible unintended interpolation of $\\ in regex");
3239 break; /* in regexp, $ might be tail anchor */
3243 /* End of else if chain - OP_TRANS rejoin rest */
3246 if (*s == '\\' && s+1 < send) {
3247 char* e; /* Can be used for ending '}', etc. */
3251 /* warn on \1 - \9 in substitution replacements, but note that \11
3252 * is an octal; and \19 is \1 followed by '9' */
3253 if (PL_lex_inwhat == OP_SUBST
3259 /* diag_listed_as: \%d better written as $%d */
3260 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3265 /* string-change backslash escapes */
3266 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3270 /* In a pattern, process \N, but skip any other backslash escapes.
3271 * This is because we don't want to translate an escape sequence
3272 * into a meta symbol and have the regex compiler use the meta
3273 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3274 * in spite of this, we do have to process \N here while the proper
3275 * charnames handler is in scope. See bugs #56444 and #62056.
3277 * There is a complication because \N in a pattern may also stand
3278 * for 'match a non-nl', and not mean a charname, in which case its
3279 * processing should be deferred to the regex compiler. To be a
3280 * charname it must be followed immediately by a '{', and not look
3281 * like \N followed by a curly quantifier, i.e., not something like
3282 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3284 else if (PL_lex_inpat
3287 || regcurly(s + 1)))
3290 goto default_action;
3296 if ((isALPHANUMERIC(*s)))
3297 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3298 "Unrecognized escape \\%c passed through",
3300 /* default action is to copy the quoted character */
3301 goto default_action;
3304 /* eg. \132 indicates the octal constant 0132 */
3305 case '0': case '1': case '2': case '3':
3306 case '4': case '5': case '6': case '7':
3308 I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3310 uv = grok_oct(s, &len, &flags, NULL);
3312 if (len < 3 && s < send && isDIGIT(*s)
3313 && ckWARN(WARN_MISC))
3315 Perl_warner(aTHX_ packWARN(WARN_MISC),
3316 "%s", form_short_octal_warning(s, len));
3319 goto NUM_ESCAPE_INSERT;
3321 /* eg. \o{24} indicates the octal constant \024 */
3326 bool valid = grok_bslash_o(&s, &uv, &error,
3327 TRUE, /* Output warning */
3328 FALSE, /* Not strict */
3329 TRUE, /* Output warnings for
3336 goto NUM_ESCAPE_INSERT;
3339 /* eg. \x24 indicates the hex constant 0x24 */
3344 bool valid = grok_bslash_x(&s, &uv, &error,
3345 TRUE, /* Output warning */
3346 FALSE, /* Not strict */
3347 TRUE, /* Output warnings for
3357 /* Insert oct or hex escaped character. */
3359 /* Here uv is the ordinal of the next character being added */
3360 if (UVCHR_IS_INVARIANT(uv)) {
3364 if (!has_utf8 && uv > 255) {
3365 /* Might need to recode whatever we have accumulated so
3366 * far if it contains any chars variant in utf8 or
3369 SvCUR_set(sv, d - SvPVX_const(sv));
3372 /* See Note on sizing above. */
3373 sv_utf8_upgrade_flags_grow(
3375 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE
3376 /* Above-latin1 in string
3377 * implies no encoding */
3378 |SV_UTF8_NO_ENCODING,
3379 UVCHR_SKIP(uv) + (STRLEN)(send - s) + 1);
3380 d = SvPVX(sv) + SvCUR(sv);
3385 /* Usually, there will already be enough room in 'sv'
3386 * since such escapes are likely longer than any UTF-8
3387 * sequence they can end up as. This isn't the case on
3388 * EBCDIC where \x{40000000} contains 12 bytes, and the
3389 * UTF-8 for it contains 14. And, we have to allow for
3390 * a trailing NUL. It probably can't happen on ASCII
3391 * platforms, but be safe */
3392 const STRLEN needed = d - SvPVX(sv) + UVCHR_SKIP(uv)
3394 if (UNLIKELY(needed > SvLEN(sv))) {
3395 SvCUR_set(sv, d - SvPVX_const(sv));
3396 d = sv_grow(sv, needed) + SvCUR(sv);
3399 d = (char*)uvchr_to_utf8((U8*)d, uv);
3400 if (PL_lex_inwhat == OP_TRANS
3401 && PL_parser->lex_sub_op)
3403 PL_parser->lex_sub_op->op_private |=
3404 (PL_lex_repl ? OPpTRANS_FROM_UTF
3413 non_portable_endpoint++;
3418 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3419 * named character, like \N{LATIN SMALL LETTER A}, or a named
3420 * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3421 * GRAVE} (except y/// can't handle the latter, croaking). For
3422 * convenience all three forms are referred to as "named
3423 * characters" below.
3425 * For patterns, \N also can mean to match a non-newline. Code
3426 * before this 'switch' statement should already have handled
3427 * this situation, and hence this code only has to deal with
3428 * the named character cases.
3430 * For non-patterns, the named characters are converted to
3431 * their string equivalents. In patterns, named characters are
3432 * not converted to their ultimate forms for the same reasons
3433 * that other escapes aren't. Instead, they are converted to
3434 * the \N{U+...} form to get the value from the charnames that
3435 * is in effect right now, while preserving the fact that it
3436 * was a named character, so that the regex compiler knows
3439 * The structure of this section of code (besides checking for
3440 * errors and upgrading to utf8) is:
3441 * If the named character is of the form \N{U+...}, pass it
3442 * through if a pattern; otherwise convert the code point
3444 * Otherwise must be some \N{NAME}: convert to
3445 * \N{U+c1.c2...} if a pattern; otherwise convert to utf8
3447 * Transliteration is an exception. The conversion to utf8 is
3448 * only done if the code point requires it to be representable.
3450 * Here, 's' points to the 'N'; the test below is guaranteed to
3451 * succeed if we are being called on a pattern, as we already
3452 * know from a test above that the next character is a '{'. A
3453 * non-pattern \N must mean 'named character', which requires
3457 yyerror("Missing braces on \\N{}");
3462 /* If there is no matching '}', it is an error. */
3463 if (! (e = strchr(s, '}'))) {
3464 if (! PL_lex_inpat) {
3465 yyerror("Missing right brace on \\N{}");
3467 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3472 /* Here it looks like a named character */
3474 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3475 s += 2; /* Skip to next char after the 'U+' */
3478 /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3479 /* Check the syntax. */
3482 if (!isXDIGIT(*s)) {
3485 "Invalid hexadecimal number in \\N{U+...}"
3493 else if ((*s == '.' || *s == '_')
3499 /* Pass everything through unchanged.
3500 * +1 is for the '}' */
3501 Copy(orig_s, d, e - orig_s + 1, char);
3502 d += e - orig_s + 1;
3504 else { /* Not a pattern: convert the hex to string */
3505 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3506 | PERL_SCAN_SILENT_ILLDIGIT
3507 | PERL_SCAN_DISALLOW_PREFIX;
3509 uv = grok_hex(s, &len, &flags, NULL);
3510 if (len == 0 || (len != (STRLEN)(e - s)))
3513 /* For non-tr///, if the destination is not in utf8,
3514 * unconditionally recode it to be so. This is
3515 * because \N{} implies Unicode semantics, and scalars
3516 * have to be in utf8 to guarantee those semantics.
3517 * tr/// doesn't care about Unicode rules, so no need
3518 * there to upgrade to UTF-8 for small enough code
3520 if (! has_utf8 && ( uv > 0xFF
3521 || PL_lex_inwhat != OP_TRANS))
3523 SvCUR_set(sv, d - SvPVX_const(sv));
3526 /* See Note on sizing above. */
3527 sv_utf8_upgrade_flags_grow(
3529 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3530 UVCHR_SKIP(uv) + (STRLEN)(send - e) + 1);
3531 d = SvPVX(sv) + SvCUR(sv);
3535 /* Add the (Unicode) code point to the output. */
3536 if (! has_utf8 || OFFUNI_IS_INVARIANT(uv)) {
3537 *d++ = (char) LATIN1_TO_NATIVE(uv);
3540 d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3544 else /* Here is \N{NAME} but not \N{U+...}. */
3545 if ((res = get_and_check_backslash_N_name(s, e)))
3548 const char *str = SvPV_const(res, len);
3551 if (! len) { /* The name resolved to an empty string */
3552 Copy("\\N{}", d, 4, char);
3556 /* In order to not lose information for the regex
3557 * compiler, pass the result in the specially made
3558 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3559 * the code points in hex of each character
3560 * returned by charnames */
3562 const char *str_end = str + len;
3563 const STRLEN off = d - SvPVX_const(sv);
3565 if (! SvUTF8(res)) {
3566 /* For the non-UTF-8 case, we can determine the
3567 * exact length needed without having to parse
3568 * through the string. Each character takes up
3569 * 2 hex digits plus either a trailing dot or
3571 const char initial_text[] = "\\N{U+";
3572 const STRLEN initial_len = sizeof(initial_text)
3574 d = off + SvGROW(sv, off
3577 /* +1 for trailing NUL */
3580 + (STRLEN)(send - e));
3581 Copy(initial_text, d, initial_len, char);
3583 while (str < str_end) {
3586 my_snprintf(hex_string,
3590 /* The regex compiler is
3591 * expecting Unicode, not
3593 NATIVE_TO_LATIN1(*str));
3594 PERL_MY_SNPRINTF_POST_GUARD(len,
3595 sizeof(hex_string));
3596 Copy(hex_string, d, 3, char);
3600 d--; /* Below, we will overwrite the final
3601 dot with a right brace */
3604 STRLEN char_length; /* cur char's byte length */
3606 /* and the number of bytes after this is
3607 * translated into hex digits */
3608 STRLEN output_length;
3610 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3611 * for max('U+', '.'); and 1 for NUL */
3612 char hex_string[2 * UTF8_MAXBYTES + 5];
3614 /* Get the first character of the result. */
3615 U32 uv = utf8n_to_uvchr((U8 *) str,
3619 /* Convert first code point to Unicode hex,
3620 * including the boiler plate before it. */
3622 my_snprintf(hex_string, sizeof(hex_string),
3624 (unsigned int) NATIVE_TO_UNI(uv));
3626 /* Make sure there is enough space to hold it */
3627 d = off + SvGROW(sv, off
3629 + (STRLEN)(send - e)
3630 + 2); /* '}' + NUL */
3632 Copy(hex_string, d, output_length, char);
3635 /* For each subsequent character, append dot and
3636 * its Unicode code point in hex */
3637 while ((str += char_length) < str_end) {
3638 const STRLEN off = d - SvPVX_const(sv);
3639 U32 uv = utf8n_to_uvchr((U8 *) str,
3644 my_snprintf(hex_string,
3647 (unsigned int) NATIVE_TO_UNI(uv));
3649 d = off + SvGROW(sv, off
3651 + (STRLEN)(send - e)
3652 + 2); /* '}' + NUL */
3653 Copy(hex_string, d, output_length, char);
3658 *d++ = '}'; /* Done. Add the trailing brace */
3661 else { /* Here, not in a pattern. Convert the name to a
3664 if (PL_lex_inwhat == OP_TRANS) {
3665 str = SvPV_const(res, len);
3666 if (len > ((SvUTF8(res))
3670 yyerror(Perl_form(aTHX_
3671 "%.*s must not be a named sequence"
3672 " in transliteration operator",
3673 /* +1 to include the "}" */
3674 (int) (e + 1 - start), start));
3675 goto end_backslash_N;
3678 else if (! SvUTF8(res)) {
3679 /* Make sure \N{} return is UTF-8. This is because
3680 * \N{} implies Unicode semantics, and scalars have
3681 * to be in utf8 to guarantee those semantics; but
3682 * not needed in tr/// */
3683 sv_utf8_upgrade_flags(res, SV_UTF8_NO_ENCODING);
3684 str = SvPV_const(res, len);
3687 /* Upgrade destination to be utf8 if this new
3689 if (! has_utf8 && SvUTF8(res)) {
3690 SvCUR_set(sv, d - SvPVX_const(sv));
3693 /* See Note on sizing above. */
3694 sv_utf8_upgrade_flags_grow(sv,
3695 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3696 len + (STRLEN)(send - s) + 1);
3697 d = SvPVX(sv) + SvCUR(sv);
3699 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3701 /* See Note on sizing above. (NOTE: SvCUR() is not
3702 * set correctly here). */
3703 const STRLEN off = d - SvPVX_const(sv);
3704 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3706 Copy(str, d, len, char);
3712 } /* End \N{NAME} */
3716 backslash_N++; /* \N{} is defined to be Unicode */
3718 s = e + 1; /* Point to just after the '}' */
3721 /* \c is a control character */
3725 *d++ = grok_bslash_c(*s++, 1);
3728 yyerror("Missing control char name in \\c");
3731 non_portable_endpoint++;
3735 /* printf-style backslashes, formfeeds, newlines, etc */
3761 } /* end if (backslash) */
3764 /* If we started with encoded form, or already know we want it,
3765 then encode the next character */
3766 if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3769 /* One might think that it is wasted effort in the case of the
3770 * source being utf8 (this_utf8 == TRUE) to take the next character
3771 * in the source, convert it to an unsigned value, and then convert
3772 * it back again. But the source has not been validated here. The
3773 * routine that does the conversion checks for errors like
3776 const UV nextuv = (this_utf8)
3777 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
3779 const STRLEN need = UVCHR_SKIP(nextuv);
3781 SvCUR_set(sv, d - SvPVX_const(sv));
3784 /* See Note on sizing above. */
3785 sv_utf8_upgrade_flags_grow(sv,
3786 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3787 need + (STRLEN)(send - s) + 1);
3788 d = SvPVX(sv) + SvCUR(sv);
3790 } else if (need > len) {
3791 /* encoded value larger than old, may need extra space (NOTE:
3792 * SvCUR() is not set correctly here). See Note on sizing
3794 const STRLEN off = d - SvPVX_const(sv);
3795 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3799 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3804 } /* while loop to process each character */
3806 /* terminate the string and set up the sv */
3808 SvCUR_set(sv, d - SvPVX_const(sv));
3809 if (SvCUR(sv) >= SvLEN(sv))
3810 Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3811 " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3816 if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
3817 PL_parser->lex_sub_op->op_private |=
3818 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3822 /* shrink the sv if we allocated more than we used */
3823 if (SvCUR(sv) + 5 < SvLEN(sv)) {
3824 SvPV_shrink_to_cur(sv);
3827 /* return the substring (via pl_yylval) only if we parsed anything */
3830 for (; s2 < s; s2++) {
3832 COPLINE_INC_WITH_HERELINES;
3834 SvREFCNT_inc_simple_void_NN(sv);
3835 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
3836 && ! PL_parser->lex_re_reparsing)
3838 const char *const key = PL_lex_inpat ? "qr" : "q";
3839 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3843 if (PL_lex_inwhat == OP_TRANS) {
3846 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3849 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3857 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3860 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3862 LEAVE_with_name("scan_const");
3867 * Returns TRUE if there's more to the expression (e.g., a subscript),
3870 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3872 * ->[ and ->{ return TRUE
3873 * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
3874 * { and [ outside a pattern are always subscripts, so return TRUE
3875 * if we're outside a pattern and it's not { or [, then return FALSE
3876 * if we're in a pattern and the first char is a {
3877 * {4,5} (any digits around the comma) returns FALSE
3878 * if we're in a pattern and the first char is a [
3880 * [SOMETHING] has a funky algorithm to decide whether it's a
3881 * character class or not. It has to deal with things like
3882 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3883 * anything else returns TRUE
3886 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3889 S_intuit_more(pTHX_ char *s)
3891 PERL_ARGS_ASSERT_INTUIT_MORE;
3893 if (PL_lex_brackets)
3895 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3897 if (*s == '-' && s[1] == '>'
3898 && FEATURE_POSTDEREF_QQ_IS_ENABLED
3899 && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
3900 ||(s[2] == '@' && strchr("*[{",s[3])) ))
3902 if (*s != '{' && *s != '[')
3907 /* In a pattern, so maybe we have {n,m}. */
3915 /* On the other hand, maybe we have a character class */
3918 if (*s == ']' || *s == '^')
3921 /* this is terrifying, and it works */
3924 const char * const send = strchr(s,']');
3925 unsigned char un_char, last_un_char;
3926 char tmpbuf[sizeof PL_tokenbuf * 4];
3928 if (!send) /* has to be an expression */
3930 weight = 2; /* let's weigh the evidence */
3934 else if (isDIGIT(*s)) {
3936 if (isDIGIT(s[1]) && s[2] == ']')
3942 Zero(seen,256,char);
3944 for (; s < send; s++) {
3945 last_un_char = un_char;
3946 un_char = (unsigned char)*s;
3951 weight -= seen[un_char] * 10;
3952 if (isWORDCHAR_lazy_if(s+1,UTF)) {
3954 char *tmp = PL_bufend;
3955 PL_bufend = (char*)send;
3956 scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
3958 len = (int)strlen(tmpbuf);
3959 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3960 UTF ? SVf_UTF8 : 0, SVt_PV))
3967 && strchr("[#!%*<>()-=",s[1]))
3969 if (/*{*/ strchr("])} =",s[2]))
3978 if (strchr("wds]",s[1]))
3980 else if (seen[(U8)'\''] || seen[(U8)'"'])
3982 else if (strchr("rnftbxcav",s[1]))
3984 else if (isDIGIT(s[1])) {
3986 while (s[1] && isDIGIT(s[1]))
3996 if (strchr("aA01! ",last_un_char))
3998 if (strchr("zZ79~",s[1]))
4000 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4001 weight -= 5; /* cope with negative subscript */
4004 if (!isWORDCHAR(last_un_char)
4005 && !(last_un_char == '$' || last_un_char == '@'
4006 || last_un_char == '&')
4007 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4011 if (keyword(d, s - d, 0))
4014 if (un_char == last_un_char + 1)
4016 weight -= seen[un_char];
4021 if (weight >= 0) /* probably a character class */
4031 * Does all the checking to disambiguate
4033 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
4034 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4036 * First argument is the stuff after the first token, e.g. "bar".
4038 * Not a method if foo is a filehandle.
4039 * Not a method if foo is a subroutine prototyped to take a filehandle.
4040 * Not a method if it's really "Foo $bar"
4041 * Method if it's "foo $bar"
4042 * Not a method if it's really "print foo $bar"
4043 * Method if it's really "foo package::" (interpreted as package->foo)
4044 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4045 * Not a method if bar is a filehandle or package, but is quoted with
4050 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
4052 char *s = start + (*start == '$');
4053 char tmpbuf[sizeof PL_tokenbuf];
4056 /* Mustn't actually add anything to a symbol table.
4057 But also don't want to "initialise" any placeholder
4058 constants that might already be there into full
4059 blown PVGVs with attached PVCV. */
4061 ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
4063 PERL_ARGS_ASSERT_INTUIT_METHOD;
4065 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4067 if (cv && SvPOK(cv)) {
4068 const char *proto = CvPROTO(cv);
4070 while (*proto && (isSPACE(*proto) || *proto == ';'))
4077 if (*start == '$') {
4078 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
4079 || isUPPER(*PL_tokenbuf))
4084 return *s == '(' ? FUNCMETH : METHOD;
4087 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4088 /* start is the beginning of the possible filehandle/object,
4089 * and s is the end of it
4090 * tmpbuf is a copy of it (but with single quotes as double colons)
4093 if (!keyword(tmpbuf, len, 0)) {
4094 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4099 indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
4100 if (indirgv && GvCVu(indirgv))
4102 /* filehandle or package name makes it a method */
4103 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4105 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4106 return 0; /* no assumptions -- "=>" quotes bareword */
4108 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
4109 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4110 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4112 force_next(BAREWORD);
4114 return *s == '(' ? FUNCMETH : METHOD;
4120 /* Encoded script support. filter_add() effectively inserts a
4121 * 'pre-processing' function into the current source input stream.
4122 * Note that the filter function only applies to the current source file
4123 * (e.g., it will not affect files 'require'd or 'use'd by this one).
4125 * The datasv parameter (which may be NULL) can be used to pass
4126 * private data to this instance of the filter. The filter function
4127 * can recover the SV using the FILTER_DATA macro and use it to
4128 * store private buffers and state information.
4130 * The supplied datasv parameter is upgraded to a PVIO type
4131 * and the IoDIRP/IoANY field is used to store the function pointer,
4132 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4133 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4134 * private use must be set using malloc'd pointers.
4138 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4146 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4147 Perl_croak(aTHX_ "Source filters apply only to byte streams");
4149 if (!PL_rsfp_filters)
4150 PL_rsfp_filters = newAV();
4153 SvUPGRADE(datasv, SVt_PVIO);
4154 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4155 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4156 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4157 FPTR2DPTR(void *, IoANY(datasv)),
4158 SvPV_nolen(datasv)));
4159 av_unshift(PL_rsfp_filters, 1);
4160 av_store(PL_rsfp_filters, 0, datasv) ;
4162 !PL_parser->filtered
4163 && PL_parser->lex_flags & LEX_EVALBYTES
4164 && PL_bufptr < PL_bufend
4166 const char *s = PL_bufptr;
4167 while (s < PL_bufend) {
4169 SV *linestr = PL_parser->linestr;
4170 char *buf = SvPVX(linestr);
4171 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4172 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4173 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4174 STRLEN const linestart_pos = PL_parser->linestart - buf;
4175 STRLEN const last_uni_pos =
4176 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4177 STRLEN const last_lop_pos =
4178 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4179 av_push(PL_rsfp_filters, linestr);
4180 PL_parser->linestr =
4181 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4182 buf = SvPVX(PL_parser->linestr);
4183 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4184 PL_parser->bufptr = buf + bufptr_pos;
4185 PL_parser->oldbufptr = buf + oldbufptr_pos;
4186 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4187 PL_parser->linestart = buf + linestart_pos;
4188 if (PL_parser->last_uni)
4189 PL_parser->last_uni = buf + last_uni_pos;
4190 if (PL_parser->last_lop)
4191 PL_parser->last_lop = buf + last_lop_pos;
4192 SvLEN(linestr) = SvCUR(linestr);
4193 SvCUR(linestr) = s-SvPVX(linestr);
4194 PL_parser->filtered = 1;
4204 /* Delete most recently added instance of this filter function. */
4206 Perl_filter_del(pTHX_ filter_t funcp)
4210 PERL_ARGS_ASSERT_FILTER_DEL;
4213 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4214 FPTR2DPTR(void*, funcp)));
4216 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4218 /* if filter is on top of stack (usual case) just pop it off */
4219 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4220 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4221 sv_free(av_pop(PL_rsfp_filters));
4225 /* we need to search for the correct entry and clear it */
4226 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4230 /* Invoke the idxth filter function for the current rsfp. */
4231 /* maxlen 0 = read one text line */
4233 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4237 /* This API is bad. It should have been using unsigned int for maxlen.
4238 Not sure if we want to change the API, but if not we should sanity
4239 check the value here. */
4240 unsigned int correct_length = maxlen < 0 ? PERL_INT_MAX : maxlen;
4242 PERL_ARGS_ASSERT_FILTER_READ;
4244 if (!PL_parser || !PL_rsfp_filters)
4246 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
4247 /* Provide a default input filter to make life easy. */
4248 /* Note that we append to the line. This is handy. */
4249 DEBUG_P(PerlIO_printf(Perl_debug_log,
4250 "filter_read %d: from rsfp\n", idx));
4251 if (correct_length) {
4254 const int old_len = SvCUR(buf_sv);
4256 /* ensure buf_sv is large enough */
4257 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4258 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4259 correct_length)) <= 0) {
4260 if (PerlIO_error(PL_rsfp))
4261 return -1; /* error */
4263 return 0 ; /* end of file */
4265 SvCUR_set(buf_sv, old_len + len) ;
4266 SvPVX(buf_sv)[old_len + len] = '\0';
4269 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4270 if (PerlIO_error(PL_rsfp))
4271 return -1; /* error */
4273 return 0 ; /* end of file */
4276 return SvCUR(buf_sv);
4278 /* Skip this filter slot if filter has been deleted */
4279 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4280 DEBUG_P(PerlIO_printf(Perl_debug_log,
4281 "filter_read %d: skipped (filter deleted)\n",
4283 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4285 if (SvTYPE(datasv) != SVt_PVIO) {
4286 if (correct_length) {
4288 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4289 if (!remainder) return 0; /* eof */
4290 if (correct_length > remainder) correct_length = remainder;
4291 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4292 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4295 const char *s = SvEND(datasv);
4296 const char *send = SvPVX(datasv) + SvLEN(datasv);
4304 if (s == send) return 0; /* eof */
4305 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4306 SvCUR_set(datasv, s-SvPVX(datasv));
4308 return SvCUR(buf_sv);
4310 /* Get function pointer hidden within datasv */
4311 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4312 DEBUG_P(PerlIO_printf(Perl_debug_log,
4313 "filter_read %d: via function %p (%s)\n",
4314 idx, (void*)datasv, SvPV_nolen_const(datasv)));
4315 /* Call function. The function is expected to */
4316 /* call "FILTER_READ(idx+1, buf_sv)" first. */
4317 /* Return: <0:error, =0:eof, >0:not eof */
4318 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4322 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4324 PERL_ARGS_ASSERT_FILTER_GETS;
4326 #ifdef PERL_CR_FILTER
4327 if (!PL_rsfp_filters) {
4328 filter_add(S_cr_textfilter,NULL);
4331 if (PL_rsfp_filters) {
4333 SvCUR_set(sv, 0); /* start with empty line */
4334 if (FILTER_READ(0, sv, 0) > 0)
4335 return ( SvPVX(sv) ) ;
4340 return (sv_gets(sv, PL_rsfp, append));
4344 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4348 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4350 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4354 && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
4355 && (gv = gv_fetchpvn_flags(pkgname,
4357 ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4359 return GvHV(gv); /* Foo:: */
4362 /* use constant CLASS => 'MyClass' */
4363 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4364 if (gv && GvCV(gv)) {
4365 SV * const sv = cv_const_sv(GvCV(gv));
4367 return gv_stashsv(sv, 0);
4370 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4375 S_tokenize_use(pTHX_ int is_use, char *s) {
4376 PERL_ARGS_ASSERT_TOKENIZE_USE;
4378 if (PL_expect != XSTATE)
4379 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4380 is_use ? "use" : "no"));
4383 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4384 s = force_version(s, TRUE);
4385 if (*s == ';' || *s == '}'
4386 || (s = skipspace(s), (*s == ';' || *s == '}'))) {
4387 NEXTVAL_NEXTTOKE.opval = NULL;
4388 force_next(BAREWORD);
4390 else if (*s == 'v') {
4391 s = force_word(s,BAREWORD,FALSE,TRUE);
4392 s = force_version(s, FALSE);
4396 s = force_word(s,BAREWORD,FALSE,TRUE);
4397 s = force_version(s, FALSE);
4399 pl_yylval.ival = is_use;
4403 static const char* const exp_name[] =
4404 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4405 "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
4410 #define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
4412 S_word_takes_any_delimiter(char *p, STRLEN len)
4414 return (len == 1 && strchr("msyq", p[0]))
4416 && ((p[0] == 't' && p[1] == 'r')
4417 || (p[0] == 'q' && strchr("qwxr", p[1]))));
4421 S_check_scalar_slice(pTHX_ char *s)
4424 while (*s == ' ' || *s == '\t') s++;
4425 if (*s == 'q' && s[1] == 'w'
4426 && !isWORDCHAR_lazy_if(s+2,UTF))
4428 while (*s && (isWORDCHAR_lazy_if(s,UTF) || strchr(" \t$#+-'\"", *s)))
4429 s += UTF ? UTF8SKIP(s) : 1;
4430 if (*s == '}' || *s == ']')
4431 pl_yylval.ival = OPpSLICEWARNING;
4434 #define lex_token_boundary() S_lex_token_boundary(aTHX)
4436 S_lex_token_boundary(pTHX)
4438 PL_oldoldbufptr = PL_oldbufptr;
4439 PL_oldbufptr = PL_bufptr;
4442 #define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s)
4444 S_vcs_conflict_marker(pTHX_ char *s)
4446 lex_token_boundary();
4448 yyerror("Version control conflict marker");
4449 while (s < PL_bufend && *s != '\n')
4457 Works out what to call the token just pulled out of the input
4458 stream. The yacc parser takes care of taking the ops we return and
4459 stitching them into a tree.
4462 The type of the next token
4465 Check if we have already built the token; if so, use it.
4466 Switch based on the current state:
4467 - if we have a case modifier in a string, deal with that
4468 - handle other cases of interpolation inside a string
4469 - scan the next line if we are inside a format
4470 In the normal state, switch on the next character:
4472 if alphabetic, go to key lookup
4473 unrecognized character - croak
4474 - 0/4/26: handle end-of-line or EOF
4475 - cases for whitespace
4476 - \n and #: handle comments and line numbers
4477 - various operators, brackets and sigils
4480 - 'v': vstrings (or go to key lookup)
4481 - 'x' repetition operator (or go to key lookup)
4482 - other ASCII alphanumerics (key lookup begins here):
4485 scan built-in keyword (but do nothing with it yet)
4486 check for statement label
4487 check for lexical subs
4488 goto just_a_word if there is one
4489 see whether built-in keyword is overridden
4490 switch on keyword number:
4491 - default: just_a_word:
4492 not a built-in keyword; handle bareword lookup
4493 disambiguate between method and sub call
4494 fall back to bareword
4495 - cases for built-in keywords
4503 char *s = PL_bufptr;
4507 const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil);
4511 /* orig_keyword, gvp, and gv are initialized here because
4512 * jump to the label just_a_word_zero can bypass their
4513 * initialization later. */
4514 I32 orig_keyword = 0;
4519 SV* tmp = newSVpvs("");
4520 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4521 (IV)CopLINE(PL_curcop),
4522 lex_state_names[PL_lex_state],
4523 exp_name[PL_expect],
4524 pv_display(tmp, s, strlen(s), 0, 60));
4528 /* when we've already built the next token, just pull it out of the queue */
4531 pl_yylval = PL_nextval[PL_nexttoke];
4534 next_type = PL_nexttype[PL_nexttoke];
4535 if (next_type & (7<<24)) {
4536 if (next_type & (1<<24)) {
4537 if (PL_lex_brackets > 100)
4538 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4539 PL_lex_brackstack[PL_lex_brackets++] =
4540 (char) ((next_type >> 16) & 0xff);
4542 if (next_type & (2<<24))
4543 PL_lex_allbrackets++;
4544 if (next_type & (4<<24))
4545 PL_lex_allbrackets--;
4546 next_type &= 0xffff;
4548 return REPORT(next_type == 'p' ? pending_ident() : next_type);
4552 switch (PL_lex_state) {
4554 case LEX_INTERPNORMAL:
4557 /* interpolated case modifiers like \L \U, including \Q and \E.
4558 when we get here, PL_bufptr is at the \
4560 case LEX_INTERPCASEMOD:
4562 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4564 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4565 PL_bufptr, PL_bufend, *PL_bufptr);
4567 /* handle \E or end of string */
4568 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4570 if (PL_lex_casemods) {
4571 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4572 PL_lex_casestack[PL_lex_casemods] = '\0';
4574 if (PL_bufptr != PL_bufend
4575 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4576 || oldmod == 'F')) {
4578 PL_lex_state = LEX_INTERPCONCAT;
4580 PL_lex_allbrackets--;
4583 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4584 /* Got an unpaired \E */
4585 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4586 "Useless use of \\E");
4588 if (PL_bufptr != PL_bufend)
4590 PL_lex_state = LEX_INTERPCONCAT;
4594 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4595 "### Saw case modifier\n"); });
4597 if (s[1] == '\\' && s[2] == 'E') {
4599 PL_lex_state = LEX_INTERPCONCAT;
4604 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4605 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
4606 if ((*s == 'L' || *s == 'U' || *s == 'F')
4607 && (strchr(PL_lex_casestack, 'L')
4608 || strchr(PL_lex_casestack, 'U')
4609 || strchr(PL_lex_casestack, 'F')))
4611 PL_lex_casestack[--PL_lex_casemods] = '\0';
4612 PL_lex_allbrackets--;
4615 if (PL_lex_casemods > 10)
4616 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4617 PL_lex_casestack[PL_lex_casemods++] = *s;
4618 PL_lex_casestack[PL_lex_casemods] = '\0';
4619 PL_lex_state = LEX_INTERPCONCAT;
4620 NEXTVAL_NEXTTOKE.ival = 0;
4621 force_next((2<<24)|'(');
4623 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4625 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4627 NEXTVAL_NEXTTOKE.ival = OP_LC;
4629 NEXTVAL_NEXTTOKE.ival = OP_UC;
4631 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4633 NEXTVAL_NEXTTOKE.ival = OP_FC;
4635 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4639 if (PL_lex_starts) {
4642 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4643 if (PL_lex_casemods == 1 && PL_lex_inpat)
4646 AopNOASSIGN(OP_CONCAT);
4652 case LEX_INTERPPUSH:
4653 return REPORT(sublex_push());
4655 case LEX_INTERPSTART:
4656 if (PL_bufptr == PL_bufend)
4657 return REPORT(sublex_done());
4658 DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4659 "### Interpolated variable\n"); });
4661 /* for /@a/, we leave the joining for the regex engine to do
4662 * (unless we're within \Q etc) */
4663 PL_lex_dojoin = (*PL_bufptr == '@'
4664 && (!PL_lex_inpat || PL_lex_casemods));
4665 PL_lex_state = LEX_INTERPNORMAL;
4666 if (PL_lex_dojoin) {
4667 NEXTVAL_NEXTTOKE.ival = 0;
4669 force_ident("\"", '$');
4670 NEXTVAL_NEXTTOKE.ival = 0;
4672 NEXTVAL_NEXTTOKE.ival = 0;
4673 force_next((2<<24)|'(');
4674 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
4677 /* Convert (?{...}) and friends to 'do {...}' */
4678 if (PL_lex_inpat && *PL_bufptr == '(') {
4679 PL_parser->lex_shared->re_eval_start = PL_bufptr;
4681 if (*PL_bufptr != '{')
4683 PL_expect = XTERMBLOCK;
4687 if (PL_lex_starts++) {
4689 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4690 if (!PL_lex_casemods && PL_lex_inpat)
4693 AopNOASSIGN(OP_CONCAT);
4697 case LEX_INTERPENDMAYBE:
4698 if (intuit_more(PL_bufptr)) {
4699 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
4705 if (PL_lex_dojoin) {
4706 const U8 dojoin_was = PL_lex_dojoin;
4707 PL_lex_dojoin = FALSE;
4708 PL_lex_state = LEX_INTERPCONCAT;
4709 PL_lex_allbrackets--;
4710 return REPORT(dojoin_was == 1 ? (int)')' : (int)POSTJOIN);
4712 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4713 && SvEVALED(PL_lex_repl))
4715 if (PL_bufptr != PL_bufend)
4716 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4719 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
4720 re_eval_str. If the here-doc body’s length equals the previous
4721 value of re_eval_start, re_eval_start will now be null. So
4722 check re_eval_str as well. */
4723 if (PL_parser->lex_shared->re_eval_start
4724 || PL_parser->lex_shared->re_eval_str) {
4726 if (*PL_bufptr != ')')
4727 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
4729 /* having compiled a (?{..}) expression, return the original
4730 * text too, as a const */
4731 if (PL_parser->lex_shared->re_eval_str) {
4732 sv = PL_parser->lex_shared->re_eval_str;
4733 PL_parser->lex_shared->re_eval_str = NULL;
4735 PL_bufptr - PL_parser->lex_shared->re_eval_start);
4736 SvPV_shrink_to_cur(sv);
4738 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
4739 PL_bufptr - PL_parser->lex_shared->re_eval_start);
4740 NEXTVAL_NEXTTOKE.opval =
4741 (OP*)newSVOP(OP_CONST, 0,
4744 PL_parser->lex_shared->re_eval_start = NULL;
4750 case LEX_INTERPCONCAT:
4752 if (PL_lex_brackets)
4753 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
4754 (long) PL_lex_brackets);
4756 if (PL_bufptr == PL_bufend)
4757 return REPORT(sublex_done());
4759 /* m'foo' still needs to be parsed for possible (?{...}) */
4760 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
4761 SV *sv = newSVsv(PL_linestr);
4763 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4767 s = scan_const(PL_bufptr);
4769 PL_lex_state = LEX_INTERPCASEMOD;
4771 PL_lex_state = LEX_INTERPSTART;
4774 if (s != PL_bufptr) {
4775 NEXTVAL_NEXTTOKE = pl_yylval;
4778 if (PL_lex_starts++) {
4779 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4780 if (!PL_lex_casemods && PL_lex_inpat)
4783 AopNOASSIGN(OP_CONCAT);
4793 s = scan_formline(PL_bufptr);
4794 if (!PL_lex_formbrack)
4803 /* We really do *not* want PL_linestr ever becoming a COW. */
4804 assert (!SvIsCOW(PL_linestr));
4806 PL_oldoldbufptr = PL_oldbufptr;
4808 PL_parser->saw_infix_sigil = 0;
4814 if (! isUTF8_CHAR((U8 *) s, (U8 *) PL_bufend)) {
4816 SAVESPTR(PL_warnhook);
4817 PL_warnhook = PERL_WARNHOOK_FATAL;
4818 utf8n_to_uvchr((U8*)s, PL_bufend-s, NULL, 0);
4821 if (isIDFIRST_utf8((U8*)s)) {
4825 else if (isALNUMC(*s)) {
4829 SV *dsv = newSVpvs_flags("", SVs_TEMP);
4830 const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s,
4832 SVs_TEMP | SVf_UTF8),
4833 10, UNI_DISPLAY_ISPRINT)
4834 : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
4835 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4836 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4837 d = UTF ? (char *) utf8_hop((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4841 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %"UTF8f"<-- HERE near column %d", c,
4842 UTF8fARG(UTF, (s - d), d),
4847 goto fake_eof; /* emulate EOF on ^D or ^Z */
4849 if ((!PL_rsfp || PL_lex_inwhat)
4850 && (!PL_parser->filtered || s+1 < PL_bufend)) {
4854 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
4856 yyerror((const char *)
4858 ? "Format not terminated"
4859 : "Missing right curly or square bracket"));
4861 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4862 "### Tokener got EOF\n");
4866 if (s++ < PL_bufend)
4867 goto retry; /* ignore stray nulls */
4870 if (!PL_in_eval && !PL_preambled) {
4871 PL_preambled = TRUE;
4873 /* Generate a string of Perl code to load the debugger.
4874 * If PERL5DB is set, it will return the contents of that,
4875 * otherwise a compile-time require of perl5db.pl. */
4877 const char * const pdb = PerlEnv_getenv("PERL5DB");
4880 sv_setpv(PL_linestr, pdb);
4881 sv_catpvs(PL_linestr,";");
4883 SETERRNO(0,SS_NORMAL);
4884 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4886 PL_parser->preambling = CopLINE(PL_curcop);
4888 sv_setpvs(PL_linestr,"");
4889 if (PL_preambleav) {
4890 SV **svp = AvARRAY(PL_preambleav);
4891 SV **const end = svp + AvFILLp(PL_preambleav);
4893 sv_catsv(PL_linestr, *svp);
4895 sv_catpvs(PL_linestr, ";");
4897 sv_free(MUTABLE_SV(PL_preambleav));
4898 PL_preambleav = NULL;
4901 sv_catpvs(PL_linestr,
4902 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4903 if (PL_minus_n || PL_minus_p) {
4904 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4906 sv_catpvs(PL_linestr,"chomp;");
4909 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4910 || *PL_splitstr == '"')
4911 && strchr(PL_splitstr + 1, *PL_splitstr))
4912 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4914 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4915 bytes can be used as quoting characters. :-) */
4916 const char *splits = PL_splitstr;
4917 sv_catpvs(PL_linestr, "our @F=split(q\0");
4920 if (*splits == '\\')
4921 sv_catpvn(PL_linestr, splits, 1);
4922 sv_catpvn(PL_linestr, splits, 1);
4923 } while (*splits++);
4924 /* This loop will embed the trailing NUL of
4925 PL_linestr as the last thing it does before
4927 sv_catpvs(PL_linestr, ");");
4931 sv_catpvs(PL_linestr,"our @F=split(' ');");
4934 sv_catpvs(PL_linestr, "\n");
4935 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4936 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4937 PL_last_lop = PL_last_uni = NULL;
4938 if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
4939 update_debugger_info(PL_linestr, NULL, 0);
4944 bof = PL_rsfp ? TRUE : FALSE;
4947 fake_eof = LEX_FAKE_EOF;
4949 PL_bufptr = PL_bufend;
4950 COPLINE_INC_WITH_HERELINES;
4951 if (!lex_next_chunk(fake_eof)) {
4952 CopLINE_dec(PL_curcop);
4954 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4956 CopLINE_dec(PL_curcop);
4958 /* If it looks like the start of a BOM or raw UTF-16,
4959 * check if it in fact is. */
4962 || *(U8*)s == BOM_UTF8_FIRST_BYTE
4966 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4967 bof = (offset == (Off_t)SvCUR(PL_linestr));
4968 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
4969 /* offset may include swallowed CR */
4971 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
4974 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4975 s = swallow_bom((U8*)s);
4978 if (PL_parser->in_pod) {
4979 /* Incest with pod. */
4980 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4981 sv_setpvs(PL_linestr, "");
4982 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4983 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4984 PL_last_lop = PL_last_uni = NULL;
4985 PL_parser->in_pod = 0;
4988 if (PL_rsfp || PL_parser->filtered)
4990 } while (PL_parser->in_pod);
4991 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
4992 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4993 PL_last_lop = PL_last_uni = NULL;
4994 if (CopLINE(PL_curcop) == 1) {
4995 while (s < PL_bufend && isSPACE(*s))
4997 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5001 if (*s == '#' && *(s+1) == '!')
5003 #ifdef ALTERNATE_SHEBANG
5005 static char const as[] = ALTERNATE_SHEBANG;
5006 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5007 d = s + (sizeof(as) - 1);
5009 #endif /* ALTERNATE_SHEBANG */
5018 while (*d && !isSPACE(*d))
5022 #ifdef ARG_ZERO_IS_SCRIPT
5023 if (ipathend > ipath) {
5025 * HP-UX (at least) sets argv[0] to the script name,
5026 * which makes $^X incorrect. And Digital UNIX and Linux,
5027 * at least, set argv[0] to the basename of the Perl
5028 * interpreter. So, having found "#!", we'll set it right.
5030 SV* copfilesv = CopFILESV(PL_curcop);
5033 GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5035 assert(SvPOK(x) || SvGMAGICAL(x));
5036 if (sv_eq(x, copfilesv)) {
5037 sv_setpvn(x, ipath, ipathend - ipath);
5043 const char *bstart = SvPV_const(copfilesv, blen);
5044 const char * const lstart = SvPV_const(x, llen);
5046 bstart += blen - llen;
5047 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5048 sv_setpvn(x, ipath, ipathend - ipath);
5055 /* Anything to do if no copfilesv? */
5057 TAINT_NOT; /* $^X is always tainted, but that's OK */
5059 #endif /* ARG_ZERO_IS_SCRIPT */
5064 d = instr(s,"perl -");
5066 d = instr(s,"perl");
5067 if (d && d[4] == '6')
5070 /* avoid getting into infinite loops when shebang
5071 * line contains "Perl" rather than "perl" */
5073 for (d = ipathend-4; d >= ipath; --d) {
5074 if (isALPHA_FOLD_EQ(*d, 'p')
5075 && !ibcmp(d, "perl", 4))
5085 #ifdef ALTERNATE_SHEBANG
5087 * If the ALTERNATE_SHEBANG on this system starts with a
5088 * character that can be part of a Perl expression, then if
5089 * we see it but not "perl", we're probably looking at the
5090 * start of Perl code, not a request to hand off to some
5091 * other interpreter. Similarly, if "perl" is there, but
5092 * not in the first 'word' of the line, we assume the line
5093 * contains the start of the Perl program.
5095 if (d && *s != '#') {
5096 const char *c = ipath;
5097 while (*c && !strchr("; \t\r\n\f\v#", *c))
5100 d = NULL; /* "perl" not in first word; ignore */
5102 *s = '#'; /* Don't try to parse shebang line */
5104 #endif /* ALTERNATE_SHEBANG */
5109 && !instr(s,"indir")
5110 && instr(PL_origargv[0],"perl"))
5117 while (s < PL_bufend && isSPACE(*s))
5119 if (s < PL_bufend) {
5120 Newx(newargv,PL_origargc+3,char*);
5122 while (s < PL_bufend && !isSPACE(*s))
5125 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5128 newargv = PL_origargv;
5131 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5133 Perl_croak(aTHX_ "Can't exec %s", ipath);
5136 while (*d && !isSPACE(*d))
5138 while (SPACE_OR_TAB(*d))
5142 const bool switches_done = PL_doswitches;
5143 const U32 oldpdb = PL_perldb;
5144 const bool oldn = PL_minus_n;
5145 const bool oldp = PL_minus_p;
5149 bool baduni = FALSE;
5151 const char *d2 = d1 + 1;
5152 if (parse_unicode_opts((const char **)&d2)
5156 if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
5157 const char * const m = d1;
5158 while (*d1 && !isSPACE(*d1))
5160 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5163 d1 = moreswitches(d1);
5165 if (PL_doswitches && !switches_done) {
5166 int argc = PL_origargc;
5167 char **argv = PL_origargv;
5170 } while (argc && argv[0][0] == '-' && argv[0][1]);
5171 init_argv_symbols(argc,argv);
5173 if ( (PERLDB_LINE_OR_SAVESRC && !oldpdb)
5174 || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5175 /* if we have already added "LINE: while (<>) {",
5176 we must not do it again */
5178 sv_setpvs(PL_linestr, "");
5179 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5180 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5181 PL_last_lop = PL_last_uni = NULL;
5182 PL_preambled = FALSE;
5183 if (PERLDB_LINE_OR_SAVESRC)
5184 (void)gv_fetchfile(PL_origfilename);
5191 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5192 PL_lex_state = LEX_FORMLINE;
5193 force_next(FORMRBRACK);
5198 #ifdef PERL_STRICT_CR
5199 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5201 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5203 case ' ': case '\t': case '\f': case '\v':
5208 if (PL_lex_state != LEX_NORMAL
5209 || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
5211 const bool in_comment = *s == '#';
5212 if (*s == '#' && s == PL_linestart && PL_in_eval
5213 && !PL_rsfp && !PL_parser->filtered) {
5214 /* handle eval qq[#line 1 "foo"\n ...] */
5215 CopLINE_dec(PL_curcop);
5219 while (d < PL_bufend && *d != '\n')
5223 else if (d > PL_bufend)
5224 /* Found by Ilya: feed random input to Perl. */
5225 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5228 if (in_comment && d == PL_bufend
5229 && PL_lex_state == LEX_INTERPNORMAL
5230 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5231 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5234 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5235 PL_lex_state = LEX_FORMLINE;
5236 force_next(FORMRBRACK);
5241 while (s < PL_bufend && *s != '\n')
5249 else if (s > PL_bufend)
5250 /* Found by Ilya: feed random input to Perl. */
5251 Perl_croak(aTHX_ "panic: input overflow");
5255 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5263 while (s < PL_bufend && SPACE_OR_TAB(*s))
5266 if (strnEQ(s,"=>",2)) {
5267 s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE);
5268 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5269 OPERATOR('-'); /* unary minus */
5272 case 'r': ftst = OP_FTEREAD; break;
5273 case 'w': ftst = OP_FTEWRITE; break;
5274 case 'x': ftst = OP_FTEEXEC; break;
5275 case 'o': ftst = OP_FTEOWNED; break;
5276 case 'R': ftst = OP_FTRREAD; break;
5277 case 'W': ftst = OP_FTRWRITE; break;
5278 case 'X': ftst = OP_FTREXEC; break;
5279 case 'O': ftst = OP_FTROWNED; break;
5280 case 'e': ftst = OP_FTIS; break;
5281 case 'z': ftst = OP_FTZERO; break;
5282 case 's': ftst = OP_FTSIZE; break;
5283 case 'f': ftst = OP_FTFILE; break;
5284 case 'd': ftst = OP_FTDIR; break;
5285 case 'l': ftst = OP_FTLINK; break;
5286 case 'p': ftst = OP_FTPIPE; break;
5287 case 'S': ftst = OP_FTSOCK; break;
5288 case 'u': ftst = OP_FTSUID; break;
5289 case 'g': ftst = OP_FTSGID; break;
5290 case 'k': ftst = OP_FTSVTX; break;
5291 case 'b': ftst = OP_FTBLK; break;
5292 case 'c': ftst = OP_FTCHR; break;
5293 case 't': ftst = OP_FTTTY; break;
5294 case 'T': ftst = OP_FTTEXT; break;
5295 case 'B': ftst = OP_FTBINARY; break;
5296 case 'M': case 'A': case 'C':
5297 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5299 case 'M': ftst = OP_FTMTIME; break;
5300 case 'A': ftst = OP_FTATIME; break;
5301 case 'C': ftst = OP_FTCTIME; break;
5309 PL_last_uni = PL_oldbufptr;
5310 PL_last_lop_op = (OPCODE)ftst;
5311 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5312 "### Saw file test %c\n", (int)tmp);
5317 /* Assume it was a minus followed by a one-letter named
5318 * subroutine call (or a -bareword), then. */
5319 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5320 "### '-%c' looked like a file test but was not\n",
5327 const char tmp = *s++;
5330 if (PL_expect == XOPERATOR)
5335 else if (*s == '>') {
5338 if (((*s == '$' || *s == '&') && s[1] == '*')
5339 ||(*s == '$' && s[1] == '#' && s[2] == '*')
5340 ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
5341 ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5344 PL_expect = XPOSTDEREF;
5347 if (isIDFIRST_lazy_if(s,UTF)) {
5348 s = force_word(s,METHOD,FALSE,TRUE);
5356 if (PL_expect == XOPERATOR) {
5358 && !PL_lex_allbrackets
5359 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5367 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5369 OPERATOR('-'); /* unary minus */
5375 const char tmp = *s++;
5378 if (PL_expect == XOPERATOR)
5383 if (PL_expect == XOPERATOR) {
5385 && !PL_lex_allbrackets
5386 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5394 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5401 if (PL_expect == XPOSTDEREF) POSTDEREF('*');
5402 if (PL_expect != XOPERATOR) {
5403 s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5404 PL_expect = XOPERATOR;
5405 force_ident(PL_tokenbuf, '*');
5413 if (*s == '=' && !PL_lex_allbrackets
5414 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5422 && !PL_lex_allbrackets
5423 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5428 PL_parser->saw_infix_sigil = 1;
5433 if (PL_expect == XOPERATOR) {
5435 && !PL_lex_allbrackets
5436 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5441 PL_parser->saw_infix_sigil = 1;
5444 else if (PL_expect == XPOSTDEREF) POSTDEREF('%');
5445 PL_tokenbuf[0] = '%';
5446 s = scan_ident(s, PL_tokenbuf + 1,
5447 sizeof PL_tokenbuf - 1, FALSE);
5449 if (!PL_tokenbuf[1]) {
5452 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5454 PL_tokenbuf[0] = '@';
5456 PL_expect = XOPERATOR;
5457 force_ident_maybe_lex('%');
5462 bof = FEATURE_BITWISE_IS_ENABLED;
5463 if (bof && s[1] == '.')
5465 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5466 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5472 BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
5474 if (PL_lex_brackets > 100)
5475 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5476 PL_lex_brackstack[PL_lex_brackets++] = 0;
5477 PL_lex_allbrackets++;
5479 const char tmp = *s++;
5484 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5486 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5489 Perl_ck_warner_d(aTHX_
5490 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5491 "Smartmatch is experimental");
5495 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
5497 BCop(OP_SCOMPLEMENT);
5499 BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
5501 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5508 goto just_a_word_zero_gv;
5514 switch (PL_expect) {
5516 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5518 PL_bufptr = s; /* update in case we back off */
5521 "Use of := for an empty attribute list is not allowed");
5528 PL_expect = XTERMBLOCK;
5532 while (isIDFIRST_lazy_if(s,UTF)) {
5535 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5536 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5537 if (tmp < 0) tmp = -tmp;
5552 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5554 d = scan_str(d,TRUE,TRUE,FALSE,NULL);
5555 COPLINE_SET_FROM_MULTI_END;
5557 /* MUST advance bufptr here to avoid bogus
5558 "at end of line" context messages from yyerror().
5560 PL_bufptr = s + len;
5561 yyerror("Unterminated attribute parameter in attribute list");
5565 return REPORT(0); /* EOF indicator */
5569 sv_catsv(sv, PL_lex_stuff);
5570 attrs = op_append_elem(OP_LIST, attrs,
5571 newSVOP(OP_CONST, 0, sv));
5572 SvREFCNT_dec_NN(PL_lex_stuff);
5573 PL_lex_stuff = NULL;
5576 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5578 if (PL_in_my == KEY_our) {
5579 deprecate(":unique");
5582 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5585 /* NOTE: any CV attrs applied here need to be part of
5586 the CVf_BUILTIN_ATTRS define in cv.h! */
5587 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5589 CvLVALUE_on(PL_compcv);
5591 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5593 deprecate(":locked");
5595 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5597 CvMETHOD_on(PL_compcv);
5599 else if (!PL_in_my && len == 5
5600 && strnEQ(SvPVX(sv), "const", len))
5603 Perl_ck_warner_d(aTHX_
5604 packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
5605 ":const is experimental"
5607 CvANONCONST_on(PL_compcv);
5608 if (!CvANON(PL_compcv))
5609 yyerror(":const is not permitted on named "
5612 /* After we've set the flags, it could be argued that
5613 we don't need to do the attributes.pm-based setting
5614 process, and shouldn't bother appending recognized
5615 flags. To experiment with that, uncomment the
5616 following "else". (Note that's already been
5617 uncommented. That keeps the above-applied built-in
5618 attributes from being intercepted (and possibly
5619 rejected) by a package's attribute routines, but is
5620 justified by the performance win for the common case
5621 of applying only built-in attributes.) */
5623 attrs = op_append_elem(OP_LIST, attrs,
5624 newSVOP(OP_CONST, 0,
5628 if (*s == ':' && s[1] != ':')
5631 break; /* require real whitespace or :'s */
5632 /* XXX losing whitespace on sequential attributes here */
5637 && !(PL_expect == XOPERATOR
5638 ? (*s == '=' || *s == ')')
5639 : (*s == '{' || *s == '(')))
5641 const char q = ((*s == '\'') ? '"' : '\'');
5642 /* If here for an expression, and parsed no attrs, back
5644 if (PL_expect == XOPERATOR && !attrs) {
5648 /* MUST advance bufptr here to avoid bogus "at end of line"
5649 context messages from yyerror().
5652 yyerror( (const char *)
5654 ? Perl_form(aTHX_ "Invalid separator character "
5655 "%c%c%c in attribute list", q, *s, q)
5656 : "Unterminated attribute list" ) );
5664 NEXTVAL_NEXTTOKE.opval = attrs;
5670 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5674 PL_lex_allbrackets--;
5678 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5679 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
5683 PL_lex_allbrackets++;
5686 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5693 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5696 PL_lex_allbrackets--;
5702 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5705 if (PL_lex_brackets <= 0)
5706 /* diag_listed_as: Unmatched right %s bracket */
5707 yyerror("Unmatched right square bracket");
5710 PL_lex_allbrackets--;
5711 if (PL_lex_state == LEX_INTERPNORMAL) {
5712 if (PL_lex_brackets == 0) {
5713 if (*s == '-' && s[1] == '>')
5714 PL_lex_state = LEX_INTERPENDMAYBE;
5715 else if (*s != '[' && *s != '{')
5716 PL_lex_state = LEX_INTERPEND;
5723 if (PL_lex_brackets > 100) {
5724 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5726 switch (PL_expect) {
5729 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5730 PL_lex_allbrackets++;
5731 OPERATOR(HASHBRACK);
5733 while (s < PL_bufend && SPACE_OR_TAB(*s))
5736 PL_tokenbuf[0] = '\0';
5737 if (d < PL_bufend && *d == '-') {
5738 PL_tokenbuf[0] = '-';
5740 while (d < PL_bufend && SPACE_OR_TAB(*d))
5743 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5744 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5746 while (d < PL_bufend && SPACE_OR_TAB(*d))
5749 const char minus = (PL_tokenbuf[0] == '-');
5750 s = force_word(s + minus, BAREWORD, FALSE, TRUE);
5758 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5759 PL_lex_allbrackets++;
5764 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5765 PL_lex_allbrackets++;
5769 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5770 PL_lex_allbrackets++;
5775 if (PL_oldoldbufptr == PL_last_lop)
5776 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5778 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5779 PL_lex_allbrackets++;
5782 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5784 /* This hack is to get the ${} in the message. */
5786 yyerror("syntax error");
5789 OPERATOR(HASHBRACK);
5791 if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
5792 /* ${...} or @{...} etc., but not print {...}
5793 * Skip the disambiguation and treat this as a block.
5795 goto block_expectation;
5797 /* This hack serves to disambiguate a pair of curlies
5798 * as being a block or an anon hash. Normally, expectation
5799 * determines that, but in cases where we're not in a
5800 * position to expect anything in particular (like inside
5801 * eval"") we have to resolve the ambiguity. This code
5802 * covers the case where the first term in the curlies is a
5803 * quoted string. Most other cases need to be explicitly
5804 * disambiguated by prepending a "+" before the opening
5805 * curly in order to force resolution as an anon hash.
5807 * XXX should probably propagate the outer expectation
5808 * into eval"" to rely less on this hack, but that could
5809 * potentially break current behavior of eval"".
5813 if (*s == '\'' || *s == '"' || *s == '`') {
5814 /* common case: get past first string, handling escapes */
5815 for (t++; t < PL_bufend && *t != *s;)
5820 else if (*s == 'q') {
5823 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5824 && !isWORDCHAR(*t))))
5826 /* skip q//-like construct */
5828 char open, close, term;
5831 while (t < PL_bufend && isSPACE(*t))
5833 /* check for q => */
5834 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5835 OPERATOR(HASHBRACK);
5839 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5843 for (t++; t < PL_bufend; t++) {
5844 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5846 else if (*t == open)
5850 for (t++; t < PL_bufend; t++) {
5851 if (*t == '\\' && t+1 < PL_bufend)
5853 else if (*t == close && --brackets <= 0)
5855 else if (*t == open)
5862 /* skip plain q word */
5863 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
5864 t += UTF ? UTF8SKIP(t) : 1;
5866 else if (isWORDCHAR_lazy_if(t,UTF)) {
5867 t += UTF ? UTF8SKIP(t) : 1;
5868 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
5869 t += UTF ? UTF8SKIP(t) : 1;
5871 while (t < PL_bufend && isSPACE(*t))
5873 /* if comma follows first term, call it an anon hash */
5874 /* XXX it could be a comma expression with loop modifiers */
5875 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5876 || (*t == '=' && t[1] == '>')))
5877 OPERATOR(HASHBRACK);
5878 if (PL_expect == XREF)
5881 /* If there is an opening brace or 'sub:', treat it
5882 as a term to make ${{...}}{k} and &{sub:attr...}
5883 dwim. Otherwise, treat it as a statement, so
5884 map {no strict; ...} works.
5891 if (strnEQ(s, "sub", 3)) {
5902 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5908 pl_yylval.ival = CopLINE(PL_curcop);
5909 PL_copline = NOLINE; /* invalidate current command line number */
5910 TOKEN(formbrack ? '=' : '{');
5912 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5916 if (PL_lex_brackets <= 0)
5917 /* diag_listed_as: Unmatched right %s bracket */
5918 yyerror("Unmatched right curly bracket");
5920 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5921 PL_lex_allbrackets--;
5922 if (PL_lex_state == LEX_INTERPNORMAL) {
5923 if (PL_lex_brackets == 0) {
5924 if (PL_expect & XFAKEBRACK) {
5925 PL_expect &= XENUMMASK;
5926 PL_lex_state = LEX_INTERPEND;
5928 return yylex(); /* ignore fake brackets */
5930 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5931 && SvEVALED(PL_lex_repl))
5932 PL_lex_state = LEX_INTERPEND;
5933 else if (*s == '-' && s[1] == '>')
5934 PL_lex_state = LEX_INTERPENDMAYBE;
5935 else if (*s != '[' && *s != '{')
5936 PL_lex_state = LEX_INTERPEND;
5939 if (PL_expect & XFAKEBRACK) {
5940 PL_expect &= XENUMMASK;
5942 return yylex(); /* ignore fake brackets */
5944 force_next(formbrack ? '.' : '}');
5945 if (formbrack) LEAVE;
5946 if (formbrack == 2) { /* means . where arguments were expected */
5952 if (PL_expect == XPOSTDEREF) POSTDEREF('&');
5955 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5956 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5963 if (PL_expect == XOPERATOR) {
5964 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5965 && isIDFIRST_lazy_if(s,UTF))
5967 CopLINE_dec(PL_curcop);
5968 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5969 CopLINE_inc(PL_curcop);
5972 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
5974 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5975 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5981 PL_parser->saw_infix_sigil = 1;
5982 BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
5988 PL_tokenbuf[0] = '&';
5989 s = scan_ident(s - 1, PL_tokenbuf + 1,
5990 sizeof PL_tokenbuf - 1, TRUE);
5991 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
5992 if (PL_tokenbuf[1]) {
5993 force_ident_maybe_lex('&');
6002 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6003 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6011 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6013 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6014 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6018 BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
6022 const char tmp = *s++;
6024 if ((s == PL_linestart+2 || s[-3] == '\n') && strnEQ(s, "=====", 5)) {
6025 s = vcs_conflict_marker(s + 5);
6028 if (!PL_lex_allbrackets
6029 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6037 if (!PL_lex_allbrackets
6038 && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
6047 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6048 && strchr("+-*/%.^&|<",tmp))
6049 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6050 "Reversed %c= operator",(int)tmp);
6052 if (PL_expect == XSTATE
6054 && (s == PL_linestart+1 || s[-2] == '\n') )
6056 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6057 || PL_lex_state != LEX_NORMAL) {
6062 if (strnEQ(s,"=cut",4)) {
6076 PL_parser->in_pod = 1;
6080 if (PL_expect == XBLOCK) {
6082 #ifdef PERL_STRICT_CR
6083 while (SPACE_OR_TAB(*t))
6085 while (SPACE_OR_TAB(*t) || *t == '\r')
6088 if (*t == '\n' || *t == '#') {
6091 SAVEI8(PL_parser->form_lex_state);
6092 SAVEI32(PL_lex_formbrack);
6093 PL_parser->form_lex_state = PL_lex_state;
6094 PL_lex_formbrack = PL_lex_brackets + 1;
6098 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6107 const char tmp = *s++;
6109 /* was this !=~ where !~ was meant?
6110 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6112 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6113 const char *t = s+1;
6115 while (t < PL_bufend && isSPACE(*t))
6118 if (*t == '/' || *t == '?'
6119 || ((*t == 'm' || *t == 's' || *t == 'y')
6120 && !isWORDCHAR(t[1]))
6121 || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6122 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6123 "!=~ should be !~");
6125 if (!PL_lex_allbrackets
6126 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6139 if (PL_expect != XOPERATOR) {
6140 if (s[1] != '<' && !strchr(s,'>'))
6142 if (s[1] == '<' && s[2] != '>') {
6143 if ((s == PL_linestart || s[-1] == '\n') && strnEQ(s+2, "<<<<<", 5)) {
6144 s = vcs_conflict_marker(s + 7);
6147 s = scan_heredoc(s);
6150 s = scan_inputsymbol(s);
6151 PL_expect = XOPERATOR;
6152 TOKEN(sublex_start());
6158 if ((s == PL_linestart+2 || s[-3] == '\n') && strnEQ(s, "<<<<<", 5)) {
6159 s = vcs_conflict_marker(s + 5);
6162 if (*s == '=' && !PL_lex_allbrackets
6163 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6168 SHop(OP_LEFT_SHIFT);
6173 if (!PL_lex_allbrackets
6174 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6182 if (!PL_lex_allbrackets
6183 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6192 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6200 const char tmp = *s++;
6202 if ((s == PL_linestart+2 || s[-3] == '\n') && strnEQ(s, ">>>>>", 5)) {
6203 s = vcs_conflict_marker(s + 5);
6206 if (*s == '=' && !PL_lex_allbrackets
6207 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6212 SHop(OP_RIGHT_SHIFT);
6214 else if (tmp == '=') {
6215 if (!PL_lex_allbrackets
6216 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6225 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6234 if (PL_expect == XOPERATOR) {
6235 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6236 return deprecate_commaless_var_list();
6239 else if (PL_expect == XPOSTDEREF) {
6242 POSTDEREF(DOLSHARP);
6247 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6248 PL_tokenbuf[0] = '@';
6249 s = scan_ident(s + 1, PL_tokenbuf + 1,
6250 sizeof PL_tokenbuf - 1, FALSE);
6251 if (PL_expect == XOPERATOR) {
6253 if (PL_bufptr > s) {
6255 PL_bufptr = PL_oldbufptr;
6257 no_op("Array length", d);
6259 if (!PL_tokenbuf[1])
6261 PL_expect = XOPERATOR;
6262 force_ident_maybe_lex('#');
6266 PL_tokenbuf[0] = '$';
6267 s = scan_ident(s, PL_tokenbuf + 1,
6268 sizeof PL_tokenbuf - 1, FALSE);
6269 if (PL_expect == XOPERATOR) {
6271 if (PL_bufptr > s) {
6273 PL_bufptr = PL_oldbufptr;
6277 if (!PL_tokenbuf[1]) {
6279 yyerror("Final $ should be \\$ or $name");
6285 const char tmp = *s;
6286 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6289 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6290 && intuit_more(s)) {
6292 PL_tokenbuf[0] = '@';
6293 if (ckWARN(WARN_SYNTAX)) {
6296 while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
6297 t += UTF ? UTF8SKIP(t) : 1;
6299 PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
6300 while (t < PL_bufend && *t != ']')
6302 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6303 "Multidimensional syntax %"UTF8f" not supported",
6304 UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
6308 else if (*s == '{') {
6310 PL_tokenbuf[0] = '%';
6311 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6312 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6314 char tmpbuf[sizeof PL_tokenbuf];
6317 } while (isSPACE(*t));
6318 if (isIDFIRST_lazy_if(t,UTF)) {
6320 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6325 && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6326 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6327 "You need to quote \"%"UTF8f"\"",
6328 UTF8fARG(UTF, len, tmpbuf));
6334 PL_expect = XOPERATOR;
6335 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6336 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6337 if (!islop || PL_last_lop_op == OP_GREPSTART)
6338 PL_expect = XOPERATOR;
6339 else if (strchr("$@\"'`q", *s))
6340 PL_expect = XTERM; /* e.g. print $fh "foo" */
6341 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6342 PL_expect = XTERM; /* e.g. print $fh &sub */
6343 else if (isIDFIRST_lazy_if(s,UTF)) {
6344 char tmpbuf[sizeof PL_tokenbuf];
6346 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6347 if ((t2 = keyword(tmpbuf, len, 0))) {
6348 /* binary operators exclude handle interpretations */
6360 PL_expect = XTERM; /* e.g. print $fh length() */
6365 PL_expect = XTERM; /* e.g. print $fh subr() */
6368 else if (isDIGIT(*s))
6369 PL_expect = XTERM; /* e.g. print $fh 3 */
6370 else if (*s == '.' && isDIGIT(s[1]))
6371 PL_expect = XTERM; /* e.g. print $fh .3 */
6372 else if ((*s == '?' || *s == '-' || *s == '+')
6373 && !isSPACE(s[1]) && s[1] != '=')
6374 PL_expect = XTERM; /* e.g. print $fh -1 */
6375 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6377 PL_expect = XTERM; /* e.g. print $fh /.../
6378 XXX except DORDOR operator
6380 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6382 PL_expect = XTERM; /* print $fh <<"EOF" */
6385 force_ident_maybe_lex('$');
6389 if (PL_expect == XPOSTDEREF)
6391 PL_tokenbuf[0] = '@';
6392 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6393 if (PL_expect == XOPERATOR) {
6395 if (PL_bufptr > s) {
6397 PL_bufptr = PL_oldbufptr;
6402 if (!PL_tokenbuf[1]) {
6405 if (PL_lex_state == LEX_NORMAL)
6407 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6409 PL_tokenbuf[0] = '%';
6411 /* Warn about @ where they meant $. */
6412 if (*s == '[' || *s == '{') {
6413 if (ckWARN(WARN_SYNTAX)) {
6414 S_check_scalar_slice(aTHX_ s);
6418 PL_expect = XOPERATOR;
6419 force_ident_maybe_lex('@');
6422 case '/': /* may be division, defined-or, or pattern */
6423 if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6424 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6425 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6430 else if (PL_expect == XOPERATOR) {
6432 if (*s == '=' && !PL_lex_allbrackets
6433 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6441 /* Disable warning on "study /blah/" */
6442 if (PL_oldoldbufptr == PL_last_uni
6443 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6444 || memNE(PL_last_uni, "study", 5)
6445 || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
6448 s = scan_pat(s,OP_MATCH);
6449 TERM(sublex_start());
6452 case '?': /* conditional */
6454 if (!PL_lex_allbrackets
6455 && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
6460 PL_lex_allbrackets++;
6464 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6465 #ifdef PERL_STRICT_CR
6468 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6470 && (s == PL_linestart || s[-1] == '\n') )
6473 formbrack = 2; /* dot seen where arguments expected */
6476 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6480 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6483 if (!PL_lex_allbrackets
6484 && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
6492 pl_yylval.ival = OPf_SPECIAL;
6498 if (*s == '=' && !PL_lex_allbrackets
6499 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6507 case '0': case '1': case '2': case '3': case '4':
6508 case '5': case '6': case '7': case '8': case '9':
6509 s = scan_num(s, &pl_yylval);
6510 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6511 if (PL_expect == XOPERATOR)
6516 if ( PL_expect == XOPERATOR
6517 && (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack))
6518 return deprecate_commaless_var_list();
6520 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6523 COPLINE_SET_FROM_MULTI_END;
6524 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6525 if (PL_expect == XOPERATOR) {
6528 pl_yylval.ival = OP_CONST;
6529 TERM(sublex_start());
6532 if ( PL_expect == XOPERATOR
6533 && (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack))
6534 return deprecate_commaless_var_list();
6536 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6539 printbuf("### Saw string before %s\n", s);
6541 PerlIO_printf(Perl_debug_log,
6542 "### Saw unterminated string\n");
6544 if (PL_expect == XOPERATOR) {
6549 pl_yylval.ival = OP_CONST;
6550 /* FIXME. I think that this can be const if char *d is replaced by
6551 more localised variables. */
6552 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6553 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6554 pl_yylval.ival = OP_STRINGIFY;
6558 if (pl_yylval.ival == OP_CONST)
6559 COPLINE_SET_FROM_MULTI_END;
6560 TERM(sublex_start());
6563 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6566 printbuf("### Saw backtick string before %s\n", s);
6568 PerlIO_printf(Perl_debug_log,
6569 "### Saw unterminated backtick string\n");
6571 if (PL_expect == XOPERATOR)
6572 no_op("Backticks",s);
6575 pl_yylval.ival = OP_BACKTICK;
6576 TERM(sublex_start());
6580 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6582 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6584 if (PL_expect == XOPERATOR)
6585 no_op("Backslash",s);
6589 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6590 char *start = s + 2;
6591 while (isDIGIT(*start) || *start == '_')
6593 if (*start == '.' && isDIGIT(start[1])) {
6594 s = scan_num(s, &pl_yylval);
6597 else if ((*start == ':' && start[1] == ':')
6598 || (PL_expect == XSTATE && *start == ':'))
6600 else if (PL_expect == XSTATE) {
6602 while (d < PL_bufend && isSPACE(*d)) d++;
6603 if (*d == ':') goto keylookup;
6605 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6606 if (!isALPHA(*start) && (PL_expect == XTERM
6607 || PL_expect == XREF || PL_expect == XSTATE
6608 || PL_expect == XTERMORDORDOR)) {
6609 GV *const gv = gv_fetchpvn_flags(s, start - s,
6610 UTF ? SVf_UTF8 : 0, SVt_PVCV);
6612 s = scan_num(s, &pl_yylval);
6619 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6672 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6674 /* Some keywords can be followed by any delimiter, including ':' */
6675 anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
6677 /* x::* is just a word, unless x is "CORE" */
6678 if (!anydelim && *s == ':' && s[1] == ':') {
6679 if (strEQ(PL_tokenbuf, "CORE")) goto case_KEY_CORE;
6684 while (d < PL_bufend && isSPACE(*d))
6685 d++; /* no comments skipped here, or s### is misparsed */
6687 /* Is this a word before a => operator? */
6688 if (*d == '=' && d[1] == '>') {
6692 = (OP*)newSVOP(OP_CONST, 0,
6693 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6694 pl_yylval.opval->op_private = OPpCONST_BARE;
6698 /* Check for plugged-in keyword */
6702 char *saved_bufptr = PL_bufptr;
6704 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6706 if (result == KEYWORD_PLUGIN_DECLINE) {
6707 /* not a plugged-in keyword */
6708 PL_bufptr = saved_bufptr;
6709 } else if (result == KEYWORD_PLUGIN_STMT) {
6710 pl_yylval.opval = o;
6712 if (!PL_nexttoke) PL_expect = XSTATE;
6713 return REPORT(PLUGSTMT);
6714 } else if (result == KEYWORD_PLUGIN_EXPR) {
6715 pl_yylval.opval = o;
6717 if (!PL_nexttoke) PL_expect = XOPERATOR;
6718 return REPORT(PLUGEXPR);
6720 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6725 /* Check for built-in keyword */
6726 tmp = keyword(PL_tokenbuf, len, 0);
6728 /* Is this a label? */
6729 if (!anydelim && PL_expect == XSTATE
6730 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6732 pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
6733 pl_yylval.pval[len] = '\0';
6734 pl_yylval.pval[len+1] = UTF ? 1 : 0;
6739 /* Check for lexical sub */
6740 if (PL_expect != XOPERATOR) {
6741 char tmpbuf[sizeof PL_tokenbuf + 1];
6743 Copy(PL_tokenbuf, tmpbuf+1, len, char);
6744 off = pad_findmy_pvn(tmpbuf, len+1, 0);
6745 if (off != NOT_IN_PAD) {
6746 assert(off); /* we assume this is boolean-true below */
6747 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
6748 HV * const stash = PAD_COMPNAME_OURSTASH(off);
6749 HEK * const stashname = HvNAME_HEK(stash);
6750 sv = newSVhek(stashname);
6751 sv_catpvs(sv, "::");
6752 sv_catpvn_flags(sv, PL_tokenbuf, len,
6753 (UTF ? SV_CATUTF8 : SV_CATBYTES));
6754 gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
6764 rv2cv_op = newOP(OP_PADANY, 0);
6765 rv2cv_op->op_targ = off;
6766 cv = find_lexical_cv(off);
6774 if (tmp < 0) { /* second-class keyword? */
6775 GV *ogv = NULL; /* override (winner) */
6776 GV *hgv = NULL; /* hidden (loser) */
6777 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6779 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6780 (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
6782 && (cv = GvCVu(gv)))
6784 if (GvIMPORTED_CV(gv))
6786 else if (! CvMETHOD(cv))
6790 && (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
6793 && (isGV_with_GP(gv)
6794 ? GvCVu(gv) && GvIMPORTED_CV(gv)
6795 : SvPCS_IMPORTED(gv)
6796 && (gv_init(gv, PL_globalstash, PL_tokenbuf,
6804 tmp = 0; /* overridden by import or by GLOBAL */
6807 && -tmp==KEY_lock /* XXX generalizable kludge */
6810 tmp = 0; /* any sub overrides "weak" keyword */
6812 else { /* no override */
6814 if (tmp == KEY_dump) {
6815 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6816 "dump() better written as CORE::dump()");
6820 if (hgv && tmp != KEY_x) /* never ambiguous */
6821 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6822 "Ambiguous call resolved as CORE::%s(), "
6823 "qualify as such or use &",
6828 if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
6829 && (!anydelim || *s != '#')) {
6830 /* no override, and not s### either; skipspace is safe here
6831 * check for => on following line */
6833 STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
6834 STRLEN soff = s - SvPVX(PL_linestr);
6835 s = skipspace_flags(s, LEX_NO_INCLINE);
6836 arrow = *s == '=' && s[1] == '>';
6837 PL_bufptr = SvPVX(PL_linestr) + bufoff;
6838 s = SvPVX(PL_linestr) + soff;
6846 default: /* not a keyword */
6847 /* Trade off - by using this evil construction we can pull the
6848 variable gv into the block labelled keylookup. If not, then
6849 we have to give it function scope so that the goto from the
6850 earlier ':' case doesn't bypass the initialisation. */
6852 just_a_word_zero_gv:
6864 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6868 /* Get the rest if it looks like a package qualifier */
6870 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6872 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6875 Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
6876 UTF8fARG(UTF, len, PL_tokenbuf),
6877 *s == '\'' ? "'" : "::");
6882 if (PL_expect == XOPERATOR) {
6883 if (PL_bufptr == PL_linestart) {
6884 CopLINE_dec(PL_curcop);
6885 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6886 CopLINE_inc(PL_curcop);
6889 no_op("Bareword",s);
6892 /* See if the name is "Foo::",
6893 in which case Foo is a bareword
6894 (and a package name). */
6897 && PL_tokenbuf[len - 2] == ':'
6898 && PL_tokenbuf[len - 1] == ':')
6900 if (ckWARN(WARN_BAREWORD)
6901 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
6902 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6903 "Bareword \"%"UTF8f"\" refers to nonexistent package",
6904 UTF8fARG(UTF, len, PL_tokenbuf));
6906 PL_tokenbuf[len] = '\0';
6915 /* if we saw a global override before, get the right name */
6918 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6921 SV * const tmp_sv = sv;
6922 sv = newSVpvs("CORE::GLOBAL::");
6923 sv_catsv(sv, tmp_sv);
6924 SvREFCNT_dec(tmp_sv);
6928 /* Presume this is going to be a bareword of some sort. */
6930 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6931 pl_yylval.opval->op_private = OPpCONST_BARE;
6933 /* And if "Foo::", then that's what it certainly is. */
6939 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
6940 const_op->op_private = OPpCONST_BARE;
6942 newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
6946 : SvROK(gv) && SvTYPE(SvRV(gv)) == SVt_PVCV
6949 : rv2cv_op_cv(rv2cv_op, RV2CVOPCV_RETURN_STUB);
6952 /* Use this var to track whether intuit_method has been
6953 called. intuit_method returns 0 or > 255. */
6956 /* See if it's the indirect object for a list operator. */
6959 && PL_oldoldbufptr < PL_bufptr
6960 && (PL_oldoldbufptr == PL_last_lop
6961 || PL_oldoldbufptr == PL_last_uni)
6962 && /* NO SKIPSPACE BEFORE HERE! */
6964 || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
6967 bool immediate_paren = *s == '(';
6969 /* (Now we can afford to cross potential line boundary.) */
6972 /* Two barewords in a row may indicate method call. */
6974 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$')
6975 && (tmp = intuit_method(s, lex ? NULL : sv, cv)))
6980 /* If not a declared subroutine, it's an indirect object. */
6981 /* (But it's an indir obj regardless for sort.) */
6982 /* Also, if "_" follows a filetest operator, it's a bareword */
6985 ( !immediate_paren && (PL_last_lop_op == OP_SORT
6987 && (PL_last_lop_op != OP_MAPSTART
6988 && PL_last_lop_op != OP_GREPSTART))))
6989 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6990 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
6994 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6999 PL_expect = XOPERATOR;
7002 /* Is this a word before a => operator? */
7003 if (*s == '=' && s[1] == '>' && !pkgname) {
7006 if (gvp || (lex && !off)) {
7007 assert (cSVOPx(pl_yylval.opval)->op_sv == sv);
7008 /* This is our own scalar, created a few lines
7009 above, so this is safe. */
7011 sv_setpv(sv, PL_tokenbuf);
7012 if (UTF && !IN_BYTES
7013 && is_utf8_string((U8*)PL_tokenbuf, len))
7020 /* If followed by a paren, it's certainly a subroutine. */
7025 while (SPACE_OR_TAB(*d))
7027 if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
7032 NEXTVAL_NEXTTOKE.opval =
7033 off ? rv2cv_op : pl_yylval.opval;
7035 op_free(pl_yylval.opval), force_next(PRIVATEREF);
7036 else op_free(rv2cv_op), force_next(BAREWORD);
7041 /* If followed by var or block, call it a method (unless sub) */
7043 if ((*s == '$' || *s == '{') && !cv) {
7045 PL_last_lop = PL_oldbufptr;
7046 PL_last_lop_op = OP_METHOD;
7047 if (!PL_lex_allbrackets
7048 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7050 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7052 PL_expect = XBLOCKTERM;
7054 return REPORT(METHOD);
7057 /* If followed by a bareword, see if it looks like indir obj. */
7059 if (tmp == 1 && !orig_keyword
7060 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
7061 && (tmp = intuit_method(s, lex ? NULL : sv, cv))) {
7064 assert(cSVOPx(pl_yylval.opval)->op_sv == sv);
7066 sv_setpvn(sv, PL_tokenbuf, len);
7067 if (UTF && !IN_BYTES
7068 && is_utf8_string((U8*)PL_tokenbuf, len))
7070 else SvUTF8_off(sv);
7073 if (tmp == METHOD && !PL_lex_allbrackets
7074 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7076 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7081 /* Not a method, so call it a subroutine (if defined) */
7084 /* Check for a constant sub */
7085 if ((sv = cv_const_sv_or_av(cv))) {
7088 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7089 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7090 if (SvTYPE(sv) == SVt_PVAV)
7091 pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7094 pl_yylval.opval->op_private = 0;
7095 pl_yylval.opval->op_folded = 1;
7096 pl_yylval.opval->op_flags |= OPf_SPECIAL;
7101 op_free(pl_yylval.opval);
7103 off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7104 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7105 PL_last_lop = PL_oldbufptr;
7106 PL_last_lop_op = OP_ENTERSUB;
7107 /* Is there a prototype? */
7111 STRLEN protolen = CvPROTOLEN(cv);
7112 const char *proto = CvPROTO(cv);
7114 proto = S_strip_spaces(aTHX_ proto, &protolen);
7117 if ((optional = *proto == ';'))
7120 while (*proto == ';');
7124 *proto == '$' || *proto == '_'
7125 || *proto == '*' || *proto == '+'
7130 *proto == '\\' && proto[1] && proto[2] == '\0'
7133 UNIPROTO(UNIOPSUB,optional);
7134 if (*proto == '\\' && proto[1] == '[') {
7135 const char *p = proto + 2;
7136 while(*p && *p != ']')
7138 if(*p == ']' && !p[1])
7139 UNIPROTO(UNIOPSUB,optional);
7141 if (*proto == '&' && *s == '{') {
7143 sv_setpvs(PL_subname, "__ANON__");
7145 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7146 if (!PL_lex_allbrackets
7147 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7149 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7154 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7156 force_next(off ? PRIVATEREF : BAREWORD);
7157 if (!PL_lex_allbrackets
7158 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7160 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7165 /* Call it a bare word */
7167 if (PL_hints & HINT_STRICT_SUBS)
7168 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7171 /* after "print" and similar functions (corresponding to
7172 * "F? L" in opcode.pl), whatever wasn't already parsed as
7173 * a filehandle should be subject to "strict subs".
7174 * Likewise for the optional indirect-object argument to system
7175 * or exec, which can't be a bareword */
7176 if ((PL_last_lop_op == OP_PRINT
7177 || PL_last_lop_op == OP_PRTF
7178 || PL_last_lop_op == OP_SAY
7179 || PL_last_lop_op == OP_SYSTEM
7180 || PL_last_lop_op == OP_EXEC)
7181 && (PL_hints & HINT_STRICT_SUBS))
7182 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7183 if (lastchar != '-') {
7184 if (ckWARN(WARN_RESERVED)) {
7188 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7190 /* PL_warn_reserved is constant */
7191 GCC_DIAG_IGNORE(-Wformat-nonliteral);
7192 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7202 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7203 && saw_infix_sigil) {
7204 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7205 "Operator or semicolon missing before %c%"UTF8f,
7207 UTF8fARG(UTF, strlen(PL_tokenbuf),
7209 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7210 "Ambiguous use of %c resolved as operator %c",
7211 lastchar, lastchar);
7218 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7223 (OP*)newSVOP(OP_CONST, 0,
7224 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7227 case KEY___PACKAGE__:
7229 (OP*)newSVOP(OP_CONST, 0,
7231 ? newSVhek(HvNAME_HEK(PL_curstash))
7238 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7239 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
7242 gv = (GV *)*hv_fetchs(stash, "DATA", 1);
7244 gv_init(gv,stash,"DATA",4,0);
7247 GvIOp(gv) = newIO();
7248 IoIFP(GvIOp(gv)) = PL_rsfp;
7249 #if defined(HAS_FCNTL) && defined(F_SETFD) && defined(FD_CLOEXEC)
7251 const int fd = PerlIO_fileno(PL_rsfp);
7253 fcntl(fd,F_SETFD, FD_CLOEXEC);
7257 /* Mark this internal pseudo-handle as clean */
7258 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7259 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7260 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7262 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7263 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7264 /* if the script was opened in binmode, we need to revert
7265 * it to text mode for compatibility; but only iff it has CRs
7266 * XXX this is a questionable hack at best. */
7267 if (PL_bufend-PL_bufptr > 2
7268 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7271 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7272 loc = PerlIO_tell(PL_rsfp);
7273 (void)PerlIO_seek(PL_rsfp, 0L, 0);
7276 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7278 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7279 #endif /* NETWARE */
7281 PerlIO_seek(PL_rsfp, loc, 0);
7285 #ifdef PERLIO_LAYERS
7288 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7297 FUN0OP(CvCLONE(PL_compcv)
7298 ? newOP(OP_RUNCV, 0)
7299 : newPVOP(OP_RUNCV,0,NULL));
7308 if (PL_expect == XSTATE) {
7319 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7320 if ((*s == ':' && s[1] == ':')
7321 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7325 Copy(PL_bufptr, PL_tokenbuf, olen, char);
7329 Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
7330 UTF8fARG(UTF, len, PL_tokenbuf));
7333 else if (tmp == KEY_require || tmp == KEY_do
7335 /* that's a way to remember we saw "CORE::" */
7347 LOP(OP_ACCEPT,XTERM);
7350 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7355 LOP(OP_ATAN2,XTERM);
7361 LOP(OP_BINMODE,XTERM);
7364 LOP(OP_BLESS,XTERM);
7373 /* We have to disambiguate the two senses of
7374 "continue". If the next token is a '{' then
7375 treat it as the start of a continue block;
7376 otherwise treat it as a control operator.
7386 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7396 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7405 if (!PL_cryptseen) {
7406 PL_cryptseen = TRUE;
7410 LOP(OP_CRYPT,XTERM);
7413 LOP(OP_CHMOD,XTERM);
7416 LOP(OP_CHOWN,XTERM);
7419 LOP(OP_CONNECT,XTERM);
7439 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7441 if (len && (len != 4 || strNE(PL_tokenbuf+1, "CORE"))
7442 && !keyword(PL_tokenbuf + 1, len, 0)) {
7445 force_ident_maybe_lex('&');
7450 if (orig_keyword == KEY_do) {
7459 PL_hints |= HINT_BLOCK_SCOPE;
7469 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7470 STR_WITH_LEN("NDBM_File::"),
7471 STR_WITH_LEN("DB_File::"),
7472 STR_WITH_LEN("GDBM_File::"),
7473 STR_WITH_LEN("SDBM_File::"),
7474 STR_WITH_LEN("ODBM_File::"),
7476 LOP(OP_DBMOPEN,XTERM);
7488 pl_yylval.ival = CopLINE(PL_curcop);
7492 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7504 if (*s == '{') { /* block eval */
7505 PL_expect = XTERMBLOCK;
7506 UNIBRACK(OP_ENTERTRY);
7508 else { /* string eval */
7510 UNIBRACK(OP_ENTEREVAL);
7515 UNIBRACK(-OP_ENTEREVAL);
7529 case KEY_endhostent:
7535 case KEY_endservent:
7538 case KEY_endprotoent:
7549 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7551 pl_yylval.ival = CopLINE(PL_curcop);
7553 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7556 if ((PL_bufend - p) >= 3
7557 && strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7561 else if ((PL_bufend - p) >= 4
7562 && strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7565 /* skip optional package name, as in "for my abc $x (..)" */
7566 if (isIDFIRST_lazy_if(p,UTF)) {
7567 p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7570 if (*p != '$' && *p != '\\')
7571 Perl_croak(aTHX_ "Missing $ on loop variable");
7576 LOP(OP_FORMLINE,XTERM);
7585 LOP(OP_FCNTL,XTERM);
7591 LOP(OP_FLOCK,XTERM);
7594 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7599 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7604 LOP(OP_GREPSTART, XREF);
7621 case KEY_getpriority:
7622 LOP(OP_GETPRIORITY,XTERM);
7624 case KEY_getprotobyname:
7627 case KEY_getprotobynumber:
7628 LOP(OP_GPBYNUMBER,XTERM);
7630 case KEY_getprotoent:
7642 case KEY_getpeername:
7643 UNI(OP_GETPEERNAME);
7645 case KEY_gethostbyname:
7648 case KEY_gethostbyaddr:
7649 LOP(OP_GHBYADDR,XTERM);
7651 case KEY_gethostent:
7654 case KEY_getnetbyname:
7657 case KEY_getnetbyaddr:
7658 LOP(OP_GNBYADDR,XTERM);
7663 case KEY_getservbyname:
7664 LOP(OP_GSBYNAME,XTERM);
7666 case KEY_getservbyport:
7667 LOP(OP_GSBYPORT,XTERM);
7669 case KEY_getservent:
7672 case KEY_getsockname:
7673 UNI(OP_GETSOCKNAME);
7675 case KEY_getsockopt:
7676 LOP(OP_GSOCKOPT,XTERM);
7691 pl_yylval.ival = CopLINE(PL_curcop);
7692 Perl_ck_warner_d(aTHX_
7693 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
7694 "given is experimental");
7699 orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB,
7707 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7709 pl_yylval.ival = CopLINE(PL_curcop);
7713 LOP(OP_INDEX,XTERM);
7719 LOP(OP_IOCTL,XTERM);
7746 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7751 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7765 LOP(OP_LISTEN,XTERM);
7774 s = scan_pat(s,OP_MATCH);
7775 TERM(sublex_start());
7778 LOP(OP_MAPSTART, XREF);
7781 LOP(OP_MKDIR,XTERM);
7784 LOP(OP_MSGCTL,XTERM);
7787 LOP(OP_MSGGET,XTERM);
7790 LOP(OP_MSGRCV,XTERM);
7793 LOP(OP_MSGSND,XTERM);
7800 yyerror(Perl_form(aTHX_
7801 "Can't redeclare \"%s\" in \"%s\"",
7802 tmp == KEY_my ? "my" :
7803 tmp == KEY_state ? "state" : "our",
7804 PL_in_my == KEY_my ? "my" :
7805 PL_in_my == KEY_state ? "state" : "our"));
7807 PL_in_my = (U16)tmp;
7809 if (isIDFIRST_lazy_if(s,UTF)) {
7810 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7811 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7813 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7814 if (!PL_in_my_stash) {
7818 len = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7819 PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(tmpbuf));
7820 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7823 else if (*s == '\\') {
7824 if (!FEATURE_MYREF_IS_ENABLED)
7825 Perl_croak(aTHX_ "The experimental declared_refs "
7826 "feature is not enabled");
7827 Perl_ck_warner_d(aTHX_
7828 packWARN(WARN_EXPERIMENTAL__DECLARED_REFS),
7829 "Declaring references is experimental");
7837 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7842 s = tokenize_use(0, s);
7846 if (*s == '(' || (s = skipspace(s), *s == '('))
7849 if (!PL_lex_allbrackets
7850 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7852 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7859 if (isIDFIRST_lazy_if(s,UTF)) {
7861 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
7863 for (t=d; isSPACE(*t);)
7865 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7867 && !(t[0] == '=' && t[1] == '>')
7868 && !(t[0] == ':' && t[1] == ':')
7869 && !keyword(s, d-s, 0)
7871 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7872 "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
7873 UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
7879 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7881 pl_yylval.ival = OP_OR;
7891 LOP(OP_OPEN_DIR,XTERM);
7894 checkcomma(s,PL_tokenbuf,"filehandle");
7898 checkcomma(s,PL_tokenbuf,"filehandle");
7917 s = force_word(s,BAREWORD,FALSE,TRUE);
7919 s = force_strict_version(s);
7923 LOP(OP_PIPE_OP,XTERM);
7926 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7929 COPLINE_SET_FROM_MULTI_END;
7930 pl_yylval.ival = OP_CONST;
7931 TERM(sublex_start());
7938 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7941 COPLINE_SET_FROM_MULTI_END;
7942 PL_expect = XOPERATOR;
7943 if (SvCUR(PL_lex_stuff)) {
7944 int warned_comma = !ckWARN(WARN_QW);
7945 int warned_comment = warned_comma;
7946 d = SvPV_force(PL_lex_stuff, len);
7948 for (; isSPACE(*d) && len; --len, ++d)
7953 if (!warned_comma || !warned_comment) {
7954 for (; !isSPACE(*d) && len; --len, ++d) {
7955 if (!warned_comma && *d == ',') {
7956 Perl_warner(aTHX_ packWARN(WARN_QW),
7957 "Possible attempt to separate words with commas");
7960 else if (!warned_comment && *d == '#') {
7961 Perl_warner(aTHX_ packWARN(WARN_QW),
7962 "Possible attempt to put comments in qw() list");
7968 for (; !isSPACE(*d) && len; --len, ++d)
7971 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7972 words = op_append_elem(OP_LIST, words,
7973 newSVOP(OP_CONST, 0, tokeq(sv)));
7978 words = newNULLLIST();
7979 SvREFCNT_dec_NN(PL_lex_stuff);
7980 PL_lex_stuff = NULL;
7981 PL_expect = XOPERATOR;
7982 pl_yylval.opval = sawparens(words);
7987 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7990 pl_yylval.ival = OP_STRINGIFY;
7991 if (SvIVX(PL_lex_stuff) == '\'')
7992 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
7993 TERM(sublex_start());
7996 s = scan_pat(s,OP_QR);
7997 TERM(sublex_start());
8000 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8003 pl_yylval.ival = OP_BACKTICK;
8004 TERM(sublex_start());
8012 s = force_version(s, FALSE);
8014 else if (*s != 'v' || !isDIGIT(s[1])
8015 || (s = force_version(s, TRUE), *s == 'v'))
8017 *PL_tokenbuf = '\0';
8018 s = force_word(s,BAREWORD,TRUE,TRUE);
8019 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
8020 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8021 GV_ADD | (UTF ? SVf_UTF8 : 0));
8023 yyerror("<> at require-statement should be quotes");
8025 if (orig_keyword == KEY_require) {
8031 PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
8033 PL_last_uni = PL_oldbufptr;
8034 PL_last_lop_op = OP_REQUIRE;
8036 return REPORT( (int)REQUIRE );
8045 LOP(OP_RENAME,XTERM);
8054 LOP(OP_RINDEX,XTERM);
8063 UNIDOR(OP_READLINE);
8066 UNIDOR(OP_BACKTICK);
8075 LOP(OP_REVERSE,XTERM);
8078 UNIDOR(OP_READLINK);
8085 if (pl_yylval.opval)
8086 TERM(sublex_start());
8088 TOKEN(1); /* force error */
8091 checkcomma(s,PL_tokenbuf,"filehandle");
8101 LOP(OP_SELECT,XTERM);
8107 LOP(OP_SEMCTL,XTERM);
8110 LOP(OP_SEMGET,XTERM);
8113 LOP(OP_SEMOP,XTERM);
8119 LOP(OP_SETPGRP,XTERM);
8121 case KEY_setpriority:
8122 LOP(OP_SETPRIORITY,XTERM);
8124 case KEY_sethostent:
8130 case KEY_setservent:
8133 case KEY_setprotoent:
8143 LOP(OP_SEEKDIR,XTERM);
8145 case KEY_setsockopt:
8146 LOP(OP_SSOCKOPT,XTERM);
8152 LOP(OP_SHMCTL,XTERM);
8155 LOP(OP_SHMGET,XTERM);
8158 LOP(OP_SHMREAD,XTERM);
8161 LOP(OP_SHMWRITE,XTERM);
8164 LOP(OP_SHUTDOWN,XTERM);
8173 LOP(OP_SOCKET,XTERM);
8175 case KEY_socketpair:
8176 LOP(OP_SOCKPAIR,XTERM);
8179 checkcomma(s,PL_tokenbuf,"subroutine name");
8182 s = force_word(s,BAREWORD,TRUE,TRUE);
8186 LOP(OP_SPLIT,XTERM);
8189 LOP(OP_SPRINTF,XTERM);
8192 LOP(OP_SPLICE,XTERM);
8207 LOP(OP_SUBSTR,XTERM);
8213 char * const tmpbuf = PL_tokenbuf + 1;
8214 expectation attrful;
8215 bool have_name, have_proto;
8216 const int key = tmp;
8217 SV *format_name = NULL;
8222 if (isIDFIRST_lazy_if(s,UTF)
8224 || (*s == ':' && s[1] == ':'))
8228 attrful = XATTRBLOCK;
8229 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8231 if (key == KEY_format)
8232 format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
8234 if (memchr(tmpbuf, ':', len) || key != KEY_sub
8236 PL_tokenbuf, len + 1, 0
8238 sv_setpvn(PL_subname, tmpbuf, len);
8240 sv_setsv(PL_subname,PL_curstname);
8241 sv_catpvs(PL_subname,"::");
8242 sv_catpvn(PL_subname,tmpbuf,len);
8244 if (SvUTF8(PL_linestr))
8245 SvUTF8_on(PL_subname);
8252 if (key == KEY_my || key == KEY_our || key==KEY_state)
8255 /* diag_listed_as: Missing name in "%s sub" */
8257 "Missing name in \"%s\"", PL_bufptr);
8259 PL_expect = XTERMBLOCK;
8260 attrful = XATTRTERM;
8261 sv_setpvs(PL_subname,"?");
8265 if (key == KEY_format) {
8267 NEXTVAL_NEXTTOKE.opval
8268 = (OP*)newSVOP(OP_CONST,0, format_name);
8269 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
8270 force_next(BAREWORD);
8275 /* Look for a prototype */
8276 if (*s == '(' && !FEATURE_SIGNATURES_IS_ENABLED) {
8277 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8278 COPLINE_SET_FROM_MULTI_END;
8280 Perl_croak(aTHX_ "Prototype not terminated");
8281 (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
8289 if (*s == ':' && s[1] != ':')
8290 PL_expect = attrful;
8291 else if ((*s != '{' && *s != '(') && key != KEY_format) {
8292 assert(key == KEY_sub || key == KEY_AUTOLOAD ||
8293 key == KEY_DESTROY || key == KEY_BEGIN ||
8294 key == KEY_UNITCHECK || key == KEY_CHECK ||
8295 key == KEY_INIT || key == KEY_END ||
8296 key == KEY_my || key == KEY_state ||
8299 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8300 else if (*s != ';' && *s != '}')
8301 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8305 NEXTVAL_NEXTTOKE.opval =
8306 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8307 PL_lex_stuff = NULL;
8312 sv_setpvs(PL_subname, "__ANON__");
8314 sv_setpvs(PL_subname, "__ANON__::__ANON__");
8317 force_ident_maybe_lex('&');
8322 LOP(OP_SYSTEM,XREF);
8325 LOP(OP_SYMLINK,XTERM);
8328 LOP(OP_SYSCALL,XTERM);
8331 LOP(OP_SYSOPEN,XTERM);
8334 LOP(OP_SYSSEEK,XTERM);
8337 LOP(OP_SYSREAD,XTERM);
8340 LOP(OP_SYSWRITE,XTERM);
8345 TERM(sublex_start());
8366 LOP(OP_TRUNCATE,XTERM);
8378 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8380 pl_yylval.ival = CopLINE(PL_curcop);
8384 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8386 pl_yylval.ival = CopLINE(PL_curcop);
8390 LOP(OP_UNLINK,XTERM);
8396 LOP(OP_UNPACK,XTERM);
8399 LOP(OP_UTIME,XTERM);
8405 LOP(OP_UNSHIFT,XTERM);
8408 s = tokenize_use(1, s);
8418 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8420 pl_yylval.ival = CopLINE(PL_curcop);
8421 Perl_ck_warner_d(aTHX_
8422 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8423 "when is experimental");
8427 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8429 pl_yylval.ival = CopLINE(PL_curcop);
8433 PL_hints |= HINT_BLOCK_SCOPE;
8440 LOP(OP_WAITPID,XTERM);
8446 /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8447 * we use the same number on EBCDIC */
8448 gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8452 if (PL_expect == XOPERATOR) {
8453 if (*s == '=' && !PL_lex_allbrackets
8454 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8464 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8466 pl_yylval.ival = OP_XOR;
8475 Looks up an identifier in the pad or in a package
8478 PRIVATEREF if this is a lexical name.
8479 BAREWORD if this belongs to a package.
8482 if we're in a my declaration
8483 croak if they tried to say my($foo::bar)
8484 build the ops for a my() declaration
8485 if it's an access to a my() variable
8486 build ops for access to a my() variable
8487 if in a dq string, and they've said @foo and we can't find @foo
8489 build ops for a bareword
8493 S_pending_ident(pTHX)
8496 const char pit = (char)pl_yylval.ival;
8497 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8498 /* All routes through this function want to know if there is a colon. */
8499 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8501 DEBUG_T({ PerlIO_printf(Perl_debug_log,
8502 "### Pending identifier '%s'\n", PL_tokenbuf); });
8504 /* if we're in a my(), we can't allow dynamics here.
8505 $foo'bar has already been turned into $foo::bar, so
8506 just check for colons.
8508 if it's a legal name, the OP is a PADANY.
8511 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
8513 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8514 "variable %s in \"our\"",
8515 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
8516 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8520 /* "my" variable %s can't be in a package */
8521 /* PL_no_myglob is constant */
8522 GCC_DIAG_IGNORE(-Wformat-nonliteral);
8523 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8524 PL_in_my == KEY_my ? "my" : "state",
8525 *PL_tokenbuf == '&' ? "subroutin" : "variabl",
8527 UTF ? SVf_UTF8 : 0);
8531 pl_yylval.opval = newOP(OP_PADANY, 0);
8532 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8533 UTF ? SVf_UTF8 : 0);
8539 build the ops for accesses to a my() variable.
8544 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8546 if (tmp != NOT_IN_PAD) {
8547 /* might be an "our" variable" */
8548 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8549 /* build ops for a bareword */
8550 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8551 HEK * const stashname = HvNAME_HEK(stash);
8552 SV * const sym = newSVhek(stashname);
8553 sv_catpvs(sym, "::");
8554 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
8555 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8556 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8560 ((PL_tokenbuf[0] == '$') ? SVt_PV
8561 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8566 pl_yylval.opval = newOP(OP_PADANY, 0);
8567 pl_yylval.opval->op_targ = tmp;
8573 Whine if they've said @foo in a doublequoted string,
8574 and @foo isn't a variable we can find in the symbol
8577 if (ckWARN(WARN_AMBIGUOUS)
8579 && PL_lex_state != LEX_NORMAL
8580 && !PL_lex_brackets)
8582 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8583 ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
8585 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8588 /* Downgraded from fatal to warning 20000522 mjd */
8589 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8590 "Possible unintended interpolation of %"UTF8f
8592 UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
8596 /* build ops for a bareword */
8597 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
8598 newSVpvn_flags(PL_tokenbuf + 1,
8600 UTF ? SVf_UTF8 : 0 ));
8601 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8603 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8604 (PL_in_eval ? GV_ADDMULTI : GV_ADD)
8605 | ( UTF ? SVf_UTF8 : 0 ),
8606 ((PL_tokenbuf[0] == '$') ? SVt_PV
8607 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8613 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8615 PERL_ARGS_ASSERT_CHECKCOMMA;
8617 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
8618 if (ckWARN(WARN_SYNTAX)) {
8621 for (w = s+2; *w && level; w++) {
8629 /* the list of chars below is for end of statements or
8630 * block / parens, boolean operators (&&, ||, //) and branch
8631 * constructs (or, and, if, until, unless, while, err, for).
8632 * Not a very solid hack... */
8633 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8634 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8635 "%s (...) interpreted as function",name);
8638 while (s < PL_bufend && isSPACE(*s))
8642 while (s < PL_bufend && isSPACE(*s))
8644 if (isIDFIRST_lazy_if(s,UTF)) {
8645 const char * const w = s;
8646 s += UTF ? UTF8SKIP(s) : 1;
8647 while (isWORDCHAR_lazy_if(s,UTF))
8648 s += UTF ? UTF8SKIP(s) : 1;
8649 while (s < PL_bufend && isSPACE(*s))
8654 if (keyword(w, s - w, 0))
8657 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8658 if (gv && GvCVu(gv))
8662 Copy(w, tmpbuf+1, s - w, char);
8664 off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
8665 if (off != NOT_IN_PAD) return;
8667 Perl_croak(aTHX_ "No comma allowed after %s", what);
8672 /* S_new_constant(): do any overload::constant lookup.
8674 Either returns sv, or mortalizes/frees sv and returns a new SV*.
8675 Best used as sv=new_constant(..., sv, ...).
8676 If s, pv are NULL, calls subroutine with one argument,
8677 and <type> is used with error messages only.
8678 <type> is assumed to be well formed UTF-8 */
8681 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8682 SV *sv, SV *pv, const char *type, STRLEN typelen)
8685 HV * table = GvHV(PL_hintgv); /* ^H */
8690 const char *why1 = "", *why2 = "", *why3 = "";
8692 PERL_ARGS_ASSERT_NEW_CONSTANT;
8693 /* We assume that this is true: */
8694 if (*key == 'c') { assert (strEQ(key, "charnames")); }
8697 /* charnames doesn't work well if there have been errors found */
8698 if (PL_error_count > 0 && *key == 'c')
8700 SvREFCNT_dec_NN(sv);
8701 return &PL_sv_undef;
8704 sv_2mortal(sv); /* Parent created it permanently */
8706 || ! (PL_hints & HINT_LOCALIZE_HH)
8707 || ! (cvp = hv_fetch(table, key, keylen, FALSE))
8712 /* Here haven't found what we're looking for. If it is charnames,
8713 * perhaps it needs to be loaded. Try doing that before giving up */
8715 Perl_load_module(aTHX_
8717 newSVpvs("_charnames"),
8718 /* version parameter; no need to specify it, as if
8719 * we get too early a version, will fail anyway,
8720 * not being able to find '_charnames' */
8725 assert(sp == PL_stack_sp);
8726 table = GvHV(PL_hintgv);
8728 && (PL_hints & HINT_LOCALIZE_HH)
8729 && (cvp = hv_fetch(table, key, keylen, FALSE))
8735 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8736 msg = Perl_form(aTHX_
8737 "Constant(%.*s) unknown",
8738 (int)(type ? typelen : len),
8744 why3 = "} is not defined";
8747 msg = Perl_form(aTHX_
8748 /* The +3 is for '\N{'; -4 for that, plus '}' */
8749 "Unknown charname '%.*s'", (int)typelen - 4, type + 3
8753 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
8754 (int)(type ? typelen : len),
8755 (type ? type: s), why1, why2, why3);
8758 yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
8759 return SvREFCNT_inc_simple_NN(sv);
8764 pv = newSVpvn_flags(s, len, SVs_TEMP);
8766 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
8768 typesv = &PL_sv_undef;
8770 PUSHSTACKi(PERLSI_OVERLOAD);
8782 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
8786 /* Check the eval first */
8787 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
8789 const char * errstr;
8790 sv_catpvs(errsv, "Propagated");
8791 errstr = SvPV_const(errsv, errlen);
8792 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
8794 res = SvREFCNT_inc_simple_NN(sv);
8798 SvREFCNT_inc_simple_void_NN(res);
8807 why1 = "Call to &{$^H{";
8809 why3 = "}} did not return a defined value";
8811 (void)sv_2mortal(sv);
8818 PERL_STATIC_INLINE void
8819 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
8820 bool is_utf8, bool check_dollar) {
8821 PERL_ARGS_ASSERT_PARSE_IDENT;
8825 Perl_croak(aTHX_ "%s", ident_too_long);
8826 if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
8827 /* The UTF-8 case must come first, otherwise things
8828 * like c\N{COMBINING TILDE} would start failing, as the
8829 * isWORDCHAR_A case below would gobble the 'c' up.
8832 char *t = *s + UTF8SKIP(*s);
8833 while (isIDCONT_utf8((U8*)t))
8835 if (*d + (t - *s) > e)
8836 Perl_croak(aTHX_ "%s", ident_too_long);
8837 Copy(*s, *d, t - *s, char);
8841 else if ( isWORDCHAR_A(**s) ) {
8844 } while (isWORDCHAR_A(**s) && *d < e);
8846 else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
8851 else if (allow_package && **s == ':' && (*s)[1] == ':'
8852 /* Disallow things like Foo::$bar. For the curious, this is
8853 * the code path that triggers the "Bad name after" warning
8854 * when looking for barewords.
8856 && !(check_dollar && (*s)[2] == '$')) {
8866 /* Returns a NUL terminated string, with the length of the string written to
8870 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
8873 char * const e = d + destlen - 3; /* two-character token, ending NUL */
8874 bool is_utf8 = cBOOL(UTF);
8876 PERL_ARGS_ASSERT_SCAN_WORD;
8878 parse_ident(&s, &d, e, allow_package, is_utf8, TRUE);
8884 /* Is the byte 'd' a legal single character identifier name? 'u' is true
8885 * iff Unicode semantics are to be used. The legal ones are any of:
8886 * a) all ASCII characters except:
8887 * 1) control and space-type ones, like NUL, SOH, \t, and SPACE;
8889 * The final case currently doesn't get this far in the program, so we
8890 * don't test for it. If that were to change, it would be ok to allow it.
8891 * b) When not under Unicode rules, any upper Latin1 character
8892 * c) Otherwise, when unicode rules are used, all XIDS characters.
8894 * Because all ASCII characters have the same representation whether
8895 * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
8896 * '{' without knowing if is UTF-8 or not. */
8897 #define VALID_LEN_ONE_IDENT(s, is_utf8) \
8898 (isGRAPH_A(*(s)) || ((is_utf8) \
8899 ? isIDFIRST_utf8((U8*) (s)) \
8901 && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
8904 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
8906 I32 herelines = PL_parser->herelines;
8907 SSize_t bracket = -1;
8910 char * const e = d + destlen - 3; /* two-character token, ending NUL */
8911 bool is_utf8 = cBOOL(UTF);
8912 I32 orig_copline = 0, tmp_copline = 0;
8914 PERL_ARGS_ASSERT_SCAN_IDENT;
8916 if (isSPACE(*s) || !*s)
8919 while (isDIGIT(*s)) {
8921 Perl_croak(aTHX_ "%s", ident_too_long);
8925 else { /* See if it is a "normal" identifier */
8926 parse_ident(&s, &d, e, 1, is_utf8, FALSE);
8931 /* Either a digit variable, or parse_ident() found an identifier
8932 (anything valid as a bareword), so job done and return. */
8933 if (PL_lex_state != LEX_NORMAL)
8934 PL_lex_state = LEX_INTERPENDMAYBE;
8938 /* Here, it is not a run-of-the-mill identifier name */
8940 if (*s == '$' && s[1]
8941 && (isIDFIRST_lazy_if(s+1,is_utf8)
8942 || isDIGIT_A((U8)s[1])
8945 || strnEQ(s+1,"::",2)) )
8947 /* Dereferencing a value in a scalar variable.
8948 The alternatives are different syntaxes for a scalar variable.
8949 Using ' as a leading package separator isn't allowed. :: is. */
8952 /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */
8954 bracket = s - SvPVX(PL_linestr);
8956 orig_copline = CopLINE(PL_curcop);
8957 if (s < PL_bufend && isSPACE(*s)) {
8961 if ((s <= PL_bufend - (is_utf8)
8964 && VALID_LEN_ONE_IDENT(s, is_utf8))
8967 const STRLEN skip = UTF8SKIP(s);
8970 for ( i = 0; i < skip; i++ )
8978 /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
8979 if (*d == '^' && *s && isCONTROLVAR(*s)) {
8983 /* Warn about ambiguous code after unary operators if {...} notation isn't
8984 used. There's no difference in ambiguity; it's merely a heuristic
8985 about when not to warn. */
8986 else if (ck_uni && bracket == -1)
8988 if (bracket != -1) {
8989 /* If we were processing {...} notation then... */
8990 if (isIDFIRST_lazy_if(d,is_utf8)) {
8991 /* if it starts as a valid identifier, assume that it is one.
8992 (the later check for } being at the expected point will trap
8993 cases where this doesn't pan out.) */
8994 d += is_utf8 ? UTF8SKIP(d) : 1;
8995 parse_ident(&s, &d, e, 1, is_utf8, TRUE);
8997 tmp_copline = CopLINE(PL_curcop);
8998 if (s < PL_bufend && isSPACE(*s)) {
9001 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9002 /* ${foo[0]} and ${foo{bar}} notation. */
9003 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9004 const char * const brack =
9006 ((*s == '[') ? "[...]" : "{...}");
9007 orig_copline = CopLINE(PL_curcop);
9008 CopLINE_set(PL_curcop, tmp_copline);
9009 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9010 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9011 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9012 funny, dest, brack, funny, dest, brack);
9013 CopLINE_set(PL_curcop, orig_copline);
9016 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9017 PL_lex_allbrackets++;
9021 /* Handle extended ${^Foo} variables
9022 * 1999-02-27 mjd-perl-patch@plover.com */
9023 else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
9027 while (isWORDCHAR(*s) && d < e) {
9031 Perl_croak(aTHX_ "%s", ident_too_long);
9036 tmp_copline = CopLINE(PL_curcop);
9037 if (s < PL_bufend && isSPACE(*s)) {
9041 /* Expect to find a closing } after consuming any trailing whitespace.
9045 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9046 PL_lex_state = LEX_INTERPEND;
9049 if (PL_lex_state == LEX_NORMAL) {
9050 if (ckWARN(WARN_AMBIGUOUS)
9051 && (keyword(dest, d - dest, 0)
9052 || get_cvn_flags(dest, d - dest, is_utf8
9056 SV *tmp = newSVpvn_flags( dest, d - dest,
9057 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
9060 orig_copline = CopLINE(PL_curcop);
9061 CopLINE_set(PL_curcop, tmp_copline);
9062 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9063 "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9064 funny, SVfARG(tmp), funny, SVfARG(tmp));
9065 CopLINE_set(PL_curcop, orig_copline);
9070 /* Didn't find the closing } at the point we expected, so restore
9071 state such that the next thing to process is the opening { and */
9072 s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
9073 CopLINE_set(PL_curcop, orig_copline);
9074 PL_parser->herelines = herelines;
9078 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9079 PL_lex_state = LEX_INTERPEND;
9084 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
9086 /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
9087 * found in the parse starting at 's', based on the subset that are valid
9088 * in this context input to this routine in 'valid_flags'. Advances s.
9089 * Returns TRUE if the input should be treated as a valid flag, so the next
9090 * char may be as well; otherwise FALSE. 'charset' should point to a NUL
9091 * upon first call on the current regex. This routine will set it to any
9092 * charset modifier found. The caller shouldn't change it. This way,
9093 * another charset modifier encountered in the parse can be detected as an
9094 * error, as we have decided to allow only one */
9097 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9099 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9100 if (isWORDCHAR_lazy_if(*s, UTF)) {
9101 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9102 UTF ? SVf_UTF8 : 0);
9104 /* Pretend that it worked, so will continue processing before
9113 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
9114 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
9115 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
9116 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
9117 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
9118 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9119 case LOCALE_PAT_MOD:
9121 goto multiple_charsets;
9123 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9126 case UNICODE_PAT_MOD:
9128 goto multiple_charsets;
9130 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9133 case ASCII_RESTRICT_PAT_MOD:
9135 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9139 /* Error if previous modifier wasn't an 'a', but if it was, see
9140 * if, and accept, a second occurrence (only) */
9142 || get_regex_charset(*pmfl)
9143 != REGEX_ASCII_RESTRICTED_CHARSET)
9145 goto multiple_charsets;
9147 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9151 case DEPENDS_PAT_MOD:
9153 goto multiple_charsets;
9155 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9164 if (*charset != c) {
9165 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9167 else if (c == 'a') {
9168 /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
9169 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9172 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9175 /* Pretend that it worked, so will continue processing before dieing */
9181 S_scan_pat(pTHX_ char *start, I32 type)
9185 const char * const valid_flags =
9186 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9187 char charset = '\0'; /* character set modifier */
9188 unsigned int x_mod_count = 0;
9190 PERL_ARGS_ASSERT_SCAN_PAT;
9192 s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
9194 Perl_croak(aTHX_ "Search pattern not terminated");
9196 pm = (PMOP*)newPMOP(type, 0);
9197 if (PL_multi_open == '?') {
9198 /* This is the only point in the code that sets PMf_ONCE: */
9199 pm->op_pmflags |= PMf_ONCE;
9201 /* Hence it's safe to do this bit of PMOP book-keeping here, which
9202 allows us to restrict the list needed by reset to just the ??
9204 assert(type != OP_TRANS);
9206 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9209 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9212 elements = mg->mg_len / sizeof(PMOP**);
9213 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9214 ((PMOP**)mg->mg_ptr) [elements++] = pm;
9215 mg->mg_len = elements * sizeof(PMOP**);
9216 PmopSTASH_set(pm,PL_curstash);
9220 /* if qr/...(?{..}).../, then need to parse the pattern within a new
9221 * anon CV. False positives like qr/[(?{]/ are harmless */
9223 if (type == OP_QR) {
9225 char *e, *p = SvPV(PL_lex_stuff, len);
9227 for (; p < e; p++) {
9228 if (p[0] == '(' && p[1] == '?'
9229 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9231 pm->op_pmflags |= PMf_HAS_CV;
9235 pm->op_pmflags |= PMf_IS_QR;
9238 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
9239 &s, &charset, &x_mod_count))
9241 /* issue a warning if /c is specified,but /g is not */
9242 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9244 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9245 "Use of /c modifier is meaningless without /g" );
9248 if (UNLIKELY((x_mod_count) > 1)) {
9249 yyerror("Only one /x regex modifier is allowed");
9252 PL_lex_op = (OP*)pm;
9253 pl_yylval.ival = OP_MATCH;
9258 S_scan_subst(pTHX_ char *start)
9265 char charset = '\0'; /* character set modifier */
9266 unsigned int x_mod_count = 0;
9269 PERL_ARGS_ASSERT_SCAN_SUBST;
9271 pl_yylval.ival = OP_NULL;
9273 s = scan_str(start, TRUE, FALSE, FALSE, &t);
9276 Perl_croak(aTHX_ "Substitution pattern not terminated");
9280 first_start = PL_multi_start;
9281 first_line = CopLINE(PL_curcop);
9282 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9284 SvREFCNT_dec_NN(PL_lex_stuff);
9285 PL_lex_stuff = NULL;
9286 Perl_croak(aTHX_ "Substitution replacement not terminated");
9288 PL_multi_start = first_start; /* so whole substitution is taken together */
9290 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9294 if (*s == EXEC_PAT_MOD) {
9298 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
9299 &s, &charset, &x_mod_count))
9305 if (UNLIKELY((x_mod_count) > 1)) {
9306 yyerror("Only one /x regex modifier is allowed");
9309 if ((pm->op_pmflags & PMf_CONTINUE)) {
9310 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9314 SV * const repl = newSVpvs("");
9317 pm->op_pmflags |= PMf_EVAL;
9320 sv_catpvs(repl, "eval ");
9322 sv_catpvs(repl, "do ");
9324 sv_catpvs(repl, "{");
9325 sv_catsv(repl, PL_parser->lex_sub_repl);
9326 sv_catpvs(repl, "}");
9328 SvREFCNT_dec(PL_parser->lex_sub_repl);
9329 PL_parser->lex_sub_repl = repl;
9331 if (CopLINE(PL_curcop) != first_line) {
9332 sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
9333 ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xpad_cop_seq.xlow =
9334 CopLINE(PL_curcop) - first_line;
9335 CopLINE_set(PL_curcop, first_line);
9338 PL_lex_op = (OP*)pm;
9339 pl_yylval.ival = OP_SUBST;
9344 S_scan_trans(pTHX_ char *start)
9351 bool nondestruct = 0;
9354 PERL_ARGS_ASSERT_SCAN_TRANS;
9356 pl_yylval.ival = OP_NULL;
9358 s = scan_str(start,FALSE,FALSE,FALSE,&t);
9360 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9364 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9366 SvREFCNT_dec_NN(PL_lex_stuff);
9367 PL_lex_stuff = NULL;
9368 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9371 complement = del = squash = 0;
9375 complement = OPpTRANS_COMPLEMENT;
9378 del = OPpTRANS_DELETE;
9381 squash = OPpTRANS_SQUASH;
9393 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9394 o->op_private &= ~OPpTRANS_ALL;
9395 o->op_private |= del|squash|complement|
9396 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9397 (DO_UTF8(PL_parser->lex_sub_repl) ? OPpTRANS_TO_UTF : 0);
9400 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9407 Takes a pointer to the first < in <<FOO.
9408 Returns a pointer to the byte following <<FOO.
9410 This function scans a heredoc, which involves different methods
9411 depending on whether we are in a string eval, quoted construct, etc.
9412 This is because PL_linestr could containing a single line of input, or
9413 a whole string being evalled, or the contents of the current quote-
9416 The two basic methods are:
9417 - Steal lines from the input stream
9418 - Scan the heredoc in PL_linestr and remove it therefrom
9420 In a file scope or filtered eval, the first method is used; in a
9421 string eval, the second.
9423 In a quote-like operator, we have to choose between the two,
9424 depending on where we can find a newline. We peek into outer lex-
9425 ing scopes until we find one with a newline in it. If we reach the
9426 outermost lexing scope and it is a file, we use the stream method.
9427 Otherwise it is treated as an eval.
9431 S_scan_heredoc(pTHX_ char *s)
9433 I32 op_type = OP_SCALAR;
9440 const bool infile = PL_rsfp || PL_parser->filtered;
9441 const line_t origline = CopLINE(PL_curcop);
9442 LEXSHARED *shared = PL_parser->lex_shared;
9444 PERL_ARGS_ASSERT_SCAN_HEREDOC;
9447 d = PL_tokenbuf + 1;
9448 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9449 *PL_tokenbuf = '\n';
9451 while (SPACE_OR_TAB(*peek))
9453 if (*peek == '`' || *peek == '\'' || *peek =='"') {
9456 s = delimcpy(d, e, s, PL_bufend, term, &len);
9458 Perl_croak(aTHX_ "Unterminated delimiter for here document");
9464 /* <<\FOO is equivalent to <<'FOO' */
9468 if (!isWORDCHAR_lazy_if(s,UTF))
9469 deprecate("bare << to mean <<\"\"");
9471 while (isWORDCHAR_lazy_if(peek,UTF)) {
9472 peek += UTF ? UTF8SKIP(peek) : 1;
9474 len = (peek - s >= e - d) ? (e - d) : (peek - s);
9475 Copy(s, d, len, char);
9479 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9480 Perl_croak(aTHX_ "Delimiter for here document is too long");
9483 len = d - PL_tokenbuf;
9485 #ifndef PERL_STRICT_CR
9486 d = strchr(s, '\r');
9488 char * const olds = s;
9490 while (s < PL_bufend) {
9496 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9505 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9510 tmpstr = newSV_type(SVt_PVIV);
9514 SvIV_set(tmpstr, -1);
9516 else if (term == '`') {
9517 op_type = OP_BACKTICK;
9518 SvIV_set(tmpstr, '\\');
9521 PL_multi_start = origline + 1 + PL_parser->herelines;
9522 PL_multi_open = PL_multi_close = '<';
9523 /* inside a string eval or quote-like operator */
9524 if (!infile || PL_lex_inwhat) {
9527 char * const olds = s;
9528 PERL_CONTEXT * const cx = CX_CUR();
9529 /* These two fields are not set until an inner lexing scope is
9530 entered. But we need them set here. */
9531 shared->ls_bufptr = s;
9532 shared->ls_linestr = PL_linestr;
9534 /* Look for a newline. If the current buffer does not have one,
9535 peek into the line buffer of the parent lexing scope, going
9536 up as many levels as necessary to find one with a newline
9539 while (!(s = (char *)memchr(
9540 (void *)shared->ls_bufptr, '\n',
9541 SvEND(shared->ls_linestr)-shared->ls_bufptr
9543 shared = shared->ls_prev;
9544 /* shared is only null if we have gone beyond the outermost
9545 lexing scope. In a file, we will have broken out of the
9546 loop in the previous iteration. In an eval, the string buf-
9547 fer ends with "\n;", so the while condition above will have
9548 evaluated to false. So shared can never be null. Or so you
9549 might think. Odd syntax errors like s;@{<<; can gobble up
9550 the implicit semicolon at the end of a flie, causing the
9551 file handle to be closed even when we are not in a string
9552 eval. So shared may be null in that case. */
9553 if (UNLIKELY(!shared))
9555 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
9556 most lexing scope. In a file, shared->ls_linestr at that
9557 level is just one line, so there is no body to steal. */
9558 if (infile && !shared->ls_prev) {
9563 else { /* eval or we've already hit EOF */
9564 s = (char*)memchr((void*)s, '\n', PL_bufend - s);
9568 linestr = shared->ls_linestr;
9569 bufend = SvEND(linestr);
9571 while (s < bufend - len + 1
9572 && memNE(s,PL_tokenbuf,len) )
9575 ++PL_parser->herelines;
9577 if (s >= bufend - len + 1) {
9580 sv_setpvn(tmpstr,d+1,s-d);
9582 /* the preceding stmt passes a newline */
9583 PL_parser->herelines++;
9585 /* s now points to the newline after the heredoc terminator.
9586 d points to the newline before the body of the heredoc.
9589 /* We are going to modify linestr in place here, so set
9590 aside copies of the string if necessary for re-evals or
9592 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
9593 check shared->re_eval_str. */
9594 if (shared->re_eval_start || shared->re_eval_str) {
9595 /* Set aside the rest of the regexp */
9596 if (!shared->re_eval_str)
9597 shared->re_eval_str =
9598 newSVpvn(shared->re_eval_start,
9599 bufend - shared->re_eval_start);
9600 shared->re_eval_start -= s-d;
9603 && CxTYPE(cx) == CXt_EVAL
9604 && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
9605 && cx->blk_eval.cur_text == linestr)
9607 cx->blk_eval.cur_text = newSVsv(linestr);
9608 SvSCREAM_on(cx->blk_eval.cur_text);
9610 /* Copy everything from s onwards back to d. */
9611 Move(s,d,bufend-s + 1,char);
9612 SvCUR_set(linestr, SvCUR(linestr) - (s-d));
9613 /* Setting PL_bufend only applies when we have not dug deeper
9614 into other scopes, because sublex_done sets PL_bufend to
9615 SvEND(PL_linestr). */
9616 if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
9622 char *oldbufptr_save;
9624 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
9625 term = PL_tokenbuf[1];
9627 linestr_save = PL_linestr; /* must restore this afterwards */
9628 d = s; /* and this */
9629 oldbufptr_save = PL_oldbufptr;
9630 PL_linestr = newSVpvs("");
9631 PL_bufend = SvPVX(PL_linestr);
9633 PL_bufptr = PL_bufend;
9634 CopLINE_set(PL_curcop,
9635 origline + 1 + PL_parser->herelines);
9636 if (!lex_next_chunk(LEX_NO_TERM)
9637 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
9638 /* Simply freeing linestr_save might seem simpler here, as it
9639 does not matter what PL_linestr points to, since we are
9640 about to croak; but in a quote-like op, linestr_save
9641 will have been prospectively freed already, via
9642 SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
9643 restore PL_linestr. */
9644 SvREFCNT_dec_NN(PL_linestr);
9645 PL_linestr = linestr_save;
9646 PL_oldbufptr = oldbufptr_save;
9649 CopLINE_set(PL_curcop, origline);
9650 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
9651 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
9652 /* ^That should be enough to avoid this needing to grow: */
9653 sv_catpvs(PL_linestr, "\n\0");
9654 assert(s == SvPVX(PL_linestr));
9655 PL_bufend = SvEND(PL_linestr);
9658 PL_parser->herelines++;
9659 PL_last_lop = PL_last_uni = NULL;
9660 #ifndef PERL_STRICT_CR
9661 if (PL_bufend - PL_linestart >= 2) {
9662 if ( (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
9663 || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9665 PL_bufend[-2] = '\n';
9667 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9669 else if (PL_bufend[-1] == '\r')
9670 PL_bufend[-1] = '\n';
9672 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9673 PL_bufend[-1] = '\n';
9675 if (*s == term && PL_bufend-s >= len
9676 && memEQ(s,PL_tokenbuf + 1,len)) {
9677 SvREFCNT_dec(PL_linestr);
9678 PL_linestr = linestr_save;
9679 PL_linestart = SvPVX(linestr_save);
9680 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9681 PL_oldbufptr = oldbufptr_save;
9686 sv_catsv(tmpstr,PL_linestr);
9690 PL_multi_end = origline + PL_parser->herelines;
9691 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9692 SvPV_shrink_to_cur(tmpstr);
9695 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9698 PL_lex_stuff = tmpstr;
9699 pl_yylval.ival = op_type;
9703 SvREFCNT_dec(tmpstr);
9704 CopLINE_set(PL_curcop, origline);
9705 missingterm(PL_tokenbuf + 1);
9709 takes: current position in input buffer
9710 returns: new position in input buffer
9711 side-effects: pl_yylval and lex_op are set.
9716 <<>> read from ARGV without magic open
9717 <FH> read from filehandle
9718 <pkg::FH> read from package qualified filehandle
9719 <pkg'FH> read from package qualified filehandle
9720 <$fh> read from filehandle in $fh
9726 S_scan_inputsymbol(pTHX_ char *start)
9728 char *s = start; /* current position in buffer */
9731 bool nomagicopen = FALSE;
9732 char *d = PL_tokenbuf; /* start of temp holding space */
9733 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
9735 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9737 end = strchr(s, '\n');
9740 if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
9747 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
9749 /* die if we didn't have space for the contents of the <>,
9750 or if it didn't end, or if we see a newline
9753 if (len >= (I32)sizeof PL_tokenbuf)
9754 Perl_croak(aTHX_ "Excessively long <> operator");
9756 Perl_croak(aTHX_ "Unterminated <> operator");
9761 Remember, only scalar variables are interpreted as filehandles by
9762 this code. Anything more complex (e.g., <$fh{$num}>) will be
9763 treated as a glob() call.
9764 This code makes use of the fact that except for the $ at the front,
9765 a scalar variable and a filehandle look the same.
9767 if (*d == '$' && d[1]) d++;
9769 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9770 while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9771 d += UTF ? UTF8SKIP(d) : 1;
9773 /* If we've tried to read what we allow filehandles to look like, and
9774 there's still text left, then it must be a glob() and not a getline.
9775 Use scan_str to pull out the stuff between the <> and treat it
9776 as nothing more than a string.
9779 if (d - PL_tokenbuf != len) {
9780 pl_yylval.ival = OP_GLOB;
9781 s = scan_str(start,FALSE,FALSE,FALSE,NULL);
9783 Perl_croak(aTHX_ "Glob not terminated");
9787 bool readline_overriden = FALSE;
9789 /* we're in a filehandle read situation */
9792 /* turn <> into <ARGV> */
9794 Copy("ARGV",d,5,char);
9796 /* Check whether readline() is overriden */
9797 if ((gv_readline = gv_override("readline",8)))
9798 readline_overriden = TRUE;
9800 /* if <$fh>, create the ops to turn the variable into a
9804 /* try to find it in the pad for this block, otherwise find
9805 add symbol table ops
9807 const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
9808 if (tmp != NOT_IN_PAD) {
9809 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9810 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9811 HEK * const stashname = HvNAME_HEK(stash);
9812 SV * const sym = sv_2mortal(newSVhek(stashname));
9813 sv_catpvs(sym, "::");
9819 OP * const o = newOP(OP_PADSV, 0);
9821 PL_lex_op = readline_overriden
9822 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9823 op_append_elem(OP_LIST, o,
9824 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9825 : (OP*)newUNOP(OP_READLINE, 0, o);
9833 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
9835 PL_lex_op = readline_overriden
9836 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9837 op_append_elem(OP_LIST,
9838 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9839 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9840 : (OP*)newUNOP(OP_READLINE, 0,
9841 newUNOP(OP_RV2SV, 0,
9842 newGVOP(OP_GV, 0, gv)));
9844 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9845 pl_yylval.ival = OP_NULL;
9848 /* If it's none of the above, it must be a literal filehandle
9849 (<Foo::BAR> or <FOO>) so build a simple readline OP */
9851 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
9852 PL_lex_op = readline_overriden
9853 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9854 op_append_elem(OP_LIST,
9855 newGVOP(OP_GV, 0, gv),
9856 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9857 : (OP*)newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
9858 pl_yylval.ival = OP_NULL;
9868 start position in buffer
9869 keep_bracketed_quoted preserve \ quoting of embedded delimiters, but
9870 only if they are of the open/close form
9871 keep_delims preserve the delimiters around the string
9872 re_reparse compiling a run-time /(?{})/:
9873 collapse // to /, and skip encoding src
9874 delimp if non-null, this is set to the position of
9875 the closing delimiter, or just after it if
9876 the closing and opening delimiters differ
9877 (i.e., the opening delimiter of a substitu-
9879 returns: position to continue reading from buffer
9880 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9881 updates the read buffer.
9883 This subroutine pulls a string out of the input. It is called for:
9884 q single quotes q(literal text)
9885 ' single quotes 'literal text'
9886 qq double quotes qq(interpolate $here please)
9887 " double quotes "interpolate $here please"
9888 qx backticks qx(/bin/ls -l)
9889 ` backticks `/bin/ls -l`
9890 qw quote words @EXPORT_OK = qw( func() $spam )
9891 m// regexp match m/this/
9892 s/// regexp substitute s/this/that/
9893 tr/// string transliterate tr/this/that/
9894 y/// string transliterate y/this/that/
9895 ($*@) sub prototypes sub foo ($)
9896 (stuff) sub attr parameters sub foo : attr(stuff)
9897 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
9899 In most of these cases (all but <>, patterns and transliterate)
9900 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
9901 calls scan_str(). s/// makes yylex() call scan_subst() which calls
9902 scan_str(). tr/// and y/// make yylex() call scan_trans() which
9905 It skips whitespace before the string starts, and treats the first
9906 character as the delimiter. If the delimiter is one of ([{< then
9907 the corresponding "close" character )]}> is used as the closing
9908 delimiter. It allows quoting of delimiters, and if the string has
9909 balanced delimiters ([{<>}]) it allows nesting.
9911 On success, the SV with the resulting string is put into lex_stuff or,
9912 if that is already non-NULL, into lex_repl. The second case occurs only
9913 when parsing the RHS of the special constructs s/// and tr/// (y///).
9914 For convenience, the terminating delimiter character is stuffed into
9919 S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
9923 SV *sv; /* scalar value: string */
9924 const char *tmps; /* temp string, used for delimiter matching */
9925 char *s = start; /* current position in the buffer */
9926 char term; /* terminating character */
9927 char *to; /* current position in the sv's data */
9928 I32 brackets = 1; /* bracket nesting level */
9929 bool has_utf8 = FALSE; /* is there any utf8 content? */
9930 I32 termcode; /* terminating char. code */
9931 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
9932 STRLEN termlen; /* length of terminating string */
9935 PERL_ARGS_ASSERT_SCAN_STR;
9937 /* skip space before the delimiter */
9942 /* mark where we are, in case we need to report errors */
9945 /* after skipping whitespace, the next character is the terminator */
9948 termcode = termstr[0] = term;
9952 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
9953 Copy(s, termstr, termlen, U8);
9954 if (!UTF8_IS_INVARIANT(term))
9958 /* mark where we are */
9959 PL_multi_start = CopLINE(PL_curcop);
9960 PL_multi_open = termcode;
9961 herelines = PL_parser->herelines;
9963 /* find corresponding closing delimiter */
9964 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
9965 termcode = termstr[0] = term = tmps[5];
9967 PL_multi_close = termcode;
9969 if (PL_multi_open == PL_multi_close) {
9970 keep_bracketed_quoted = FALSE;
9973 /* create a new SV to hold the contents. 79 is the SV's initial length.
9974 What a random number. */
9975 sv = newSV_type(SVt_PVIV);
9977 SvIV_set(sv, termcode);
9978 (void)SvPOK_only(sv); /* validate pointer */
9980 /* move past delimiter and try to read a complete string */
9982 sv_catpvn(sv, s, termlen);
9985 /* extend sv if need be */
9986 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
9987 /* set 'to' to the next character in the sv's string */
9988 to = SvPVX(sv)+SvCUR(sv);
9990 /* if open delimiter is the close delimiter read unbridle */
9991 if (PL_multi_open == PL_multi_close) {
9992 for (; s < PL_bufend; s++,to++) {
9993 /* embedded newlines increment the current line number */
9994 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
9995 COPLINE_INC_WITH_HERELINES;
9996 /* handle quoted delimiters */
9997 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
9998 if (!keep_bracketed_quoted
10000 || (re_reparse && s[1] == '\\'))
10003 else /* any other quotes are simply copied straight through */
10006 /* terminate when run out of buffer (the for() condition), or
10007 have found the terminator */
10008 else if (*s == term) {
10011 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10014 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10020 /* if the terminator isn't the same as the start character (e.g.,
10021 matched brackets), we have to allow more in the quoting, and
10022 be prepared for nested brackets.
10025 /* read until we run out of string, or we find the terminator */
10026 for (; s < PL_bufend; s++,to++) {
10027 /* embedded newlines increment the line count */
10028 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10029 COPLINE_INC_WITH_HERELINES;
10030 /* backslashes can escape the open or closing characters */
10031 if (*s == '\\' && s+1 < PL_bufend) {
10032 if (!keep_bracketed_quoted
10033 && ( ((UV)s[1] == PL_multi_open)
10034 || ((UV)s[1] == PL_multi_close) ))
10041 /* allow nested opens and closes */
10042 else if ((UV)*s == PL_multi_close && --brackets <= 0)
10044 else if ((UV)*s == PL_multi_open)
10046 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10051 /* terminate the copied string and update the sv's end-of-string */
10053 SvCUR_set(sv, to - SvPVX_const(sv));
10056 * this next chunk reads more into the buffer if we're not done yet
10060 break; /* handle case where we are done yet :-) */
10062 #ifndef PERL_STRICT_CR
10063 if (to - SvPVX_const(sv) >= 2) {
10064 if ( (to[-2] == '\r' && to[-1] == '\n')
10065 || (to[-2] == '\n' && to[-1] == '\r'))
10069 SvCUR_set(sv, to - SvPVX_const(sv));
10071 else if (to[-1] == '\r')
10074 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10078 /* if we're out of file, or a read fails, bail and reset the current
10079 line marker so we can report where the unterminated string began
10081 COPLINE_INC_WITH_HERELINES;
10082 PL_bufptr = PL_bufend;
10083 if (!lex_next_chunk(0)) {
10085 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10091 /* at this point, we have successfully read the delimited string */
10094 sv_catpvn(sv, s, termlen);
10100 PL_multi_end = CopLINE(PL_curcop);
10101 CopLINE_set(PL_curcop, PL_multi_start);
10102 PL_parser->herelines = herelines;
10104 /* if we allocated too much space, give some back */
10105 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10106 SvLEN_set(sv, SvCUR(sv) + 1);
10107 SvPV_renew(sv, SvLEN(sv));
10110 /* decide whether this is the first or second quoted string we've read
10115 PL_parser->lex_sub_repl = sv;
10118 if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
10124 takes: pointer to position in buffer
10125 returns: pointer to new position in buffer
10126 side-effects: builds ops for the constant in pl_yylval.op
10128 Read a number in any of the formats that Perl accepts:
10130 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10131 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
10132 0b[01](_?[01])* binary integers
10133 0[0-7](_?[0-7])* octal integers
10134 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* hexadecimal integers
10135 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+ hexadecimal floats
10137 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10140 If it reads a number without a decimal point or an exponent, it will
10141 try converting the number to an integer and see if it can do so
10142 without loss of precision.
10146 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10148 const char *s = start; /* current position in buffer */
10149 char *d; /* destination in temp buffer */
10150 char *e; /* end of temp buffer */
10151 NV nv; /* number read, as a double */
10152 SV *sv = NULL; /* place to put the converted number */
10153 bool floatit; /* boolean: int or float? */
10154 const char *lastub = NULL; /* position of last underbar */
10155 static const char* const number_too_long = "Number too long";
10156 /* Hexadecimal floating point.
10158 * In many places (where we have quads and NV is IEEE 754 double)
10159 * we can fit the mantissa bits of a NV into an unsigned quad.
10160 * (Note that UVs might not be quads even when we have quads.)
10161 * This will not work everywhere, though (either no quads, or
10162 * using long doubles), in which case we have to resort to NV,
10163 * which will probably mean horrible loss of precision due to
10164 * multiple fp operations. */
10165 bool hexfp = FALSE;
10166 int total_bits = 0;
10167 int significant_bits = 0;
10168 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
10169 # define HEXFP_UQUAD
10170 Uquad_t hexfp_uquad = 0;
10171 int hexfp_frac_bits = 0;
10176 NV hexfp_mult = 1.0;
10177 UV high_non_zero = 0; /* highest digit */
10178 int non_zero_integer_digits = 0;
10180 PERL_ARGS_ASSERT_SCAN_NUM;
10182 /* We use the first character to decide what type of number this is */
10186 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
10188 /* if it starts with a 0, it could be an octal number, a decimal in
10189 0.13 disguise, or a hexadecimal number, or a binary number. */
10193 u holds the "number so far"
10194 shift the power of 2 of the base
10195 (hex == 4, octal == 3, binary == 1)
10196 overflowed was the number more than we can hold?
10198 Shift is used when we add a digit. It also serves as an "are
10199 we in octal/hex/binary?" indicator to disallow hex characters
10200 when in octal mode.
10205 bool overflowed = FALSE;
10206 bool just_zero = TRUE; /* just plain 0 or binary number? */
10207 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10208 static const char* const bases[5] =
10209 { "", "binary", "", "octal", "hexadecimal" };
10210 static const char* const Bases[5] =
10211 { "", "Binary", "", "Octal", "Hexadecimal" };
10212 static const char* const maxima[5] =
10214 "0b11111111111111111111111111111111",
10218 const char *base, *Base, *max;
10220 /* check for hex */
10221 if (isALPHA_FOLD_EQ(s[1], 'x')) {
10225 } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
10230 /* check for a decimal in disguise */
10231 else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
10233 /* so it must be octal */
10240 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10241 "Misplaced _ in number");
10245 base = bases[shift];
10246 Base = Bases[shift];
10247 max = maxima[shift];
10249 /* read the rest of the number */
10251 /* x is used in the overflow test,
10252 b is the digit we're adding on. */
10257 /* if we don't mention it, we're done */
10261 /* _ are ignored -- but warned about if consecutive */
10263 if (lastub && s == lastub + 1)
10264 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10265 "Misplaced _ in number");
10269 /* 8 and 9 are not octal */
10270 case '8': case '9':
10272 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10276 case '2': case '3': case '4':
10277 case '5': case '6': case '7':
10279 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10282 case '0': case '1':
10283 b = *s++ & 15; /* ASCII digit -> value of digit */
10287 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10288 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10289 /* make sure they said 0x */
10292 b = (*s++ & 7) + 9;
10294 /* Prepare to put the digit we have onto the end
10295 of the number so far. We check for overflows.
10301 x = u << shift; /* make room for the digit */
10303 total_bits += shift;
10305 if ((x >> shift) != u
10306 && !(PL_hints & HINT_NEW_BINARY)) {
10309 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10310 "Integer overflow in %s number",
10313 u = x | b; /* add the digit to the end */
10316 n *= nvshift[shift];
10317 /* If an NV has not enough bits in its
10318 * mantissa to represent an UV this summing of
10319 * small low-order numbers is a waste of time
10320 * (because the NV cannot preserve the
10321 * low-order bits anyway): we could just
10322 * remember when did we overflow and in the
10323 * end just multiply n by the right
10328 if (high_non_zero == 0 && b > 0)
10332 non_zero_integer_digits++;
10334 /* this could be hexfp, but peek ahead
10335 * to avoid matching ".." */
10336 if (UNLIKELY(HEXFP_PEEK(s))) {
10344 /* if we get here, we had success: make a scalar value from
10349 /* final misplaced underbar check */
10350 if (s[-1] == '_') {
10351 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10354 if (UNLIKELY(HEXFP_PEEK(s))) {
10355 /* Do sloppy (on the underbars) but quick detection
10356 * (and value construction) for hexfp, the decimal
10357 * detection will shortly be more thorough with the
10358 * underbar checks. */
10360 significant_bits = non_zero_integer_digits * shift;
10363 #else /* HEXFP_NV */
10366 /* Ignore the leading zero bits of
10367 * the high (first) non-zero digit. */
10368 if (high_non_zero) {
10369 if (high_non_zero < 0x8)
10370 significant_bits--;
10371 if (high_non_zero < 0x4)
10372 significant_bits--;
10373 if (high_non_zero < 0x2)
10374 significant_bits--;
10381 bool accumulate = TRUE;
10382 for (h++; (isXDIGIT(*h) || *h == '_'); h++) {
10383 if (isXDIGIT(*h)) {
10384 U8 b = XDIGIT_VALUE(*h);
10385 significant_bits += shift;
10388 if (significant_bits < NV_MANT_DIG) {
10389 /* We are in the long "run" of xdigits,
10390 * accumulate the full four bits. */
10391 hexfp_uquad <<= shift;
10393 hexfp_frac_bits += shift;
10395 /* We are at a hexdigit either at,
10396 * or straddling, the edge of mantissa.
10397 * We will try grabbing as many as
10398 * possible bits. */
10400 significant_bits - NV_MANT_DIG;
10403 hexfp_uquad <<= tail;
10404 hexfp_uquad |= b >> (shift - tail);
10405 hexfp_frac_bits += tail;
10407 /* Ignore the trailing zero bits
10408 * of the last non-zero xdigit.
10410 * The assumption here is that if
10411 * one has input of e.g. the xdigit
10412 * eight (0x8), there is only one
10413 * bit being input, not the full
10414 * four bits. Conversely, if one
10415 * specifies a zero xdigit, the
10416 * assumption is that one really
10417 * wants all those bits to be zero. */
10419 if ((b & 0x1) == 0x0) {
10420 significant_bits--;
10421 if ((b & 0x2) == 0x0) {
10422 significant_bits--;
10423 if ((b & 0x4) == 0x0) {
10424 significant_bits--;
10430 accumulate = FALSE;
10433 /* Keep skipping the xdigits, and
10434 * accumulating the significant bits,
10435 * but do not shift the uquad
10436 * (which would catastrophically drop
10437 * high-order bits) or accumulate the
10438 * xdigits anymore. */
10440 #else /* HEXFP_NV */
10444 hexfp_nv += b * nv_mult;
10446 accumulate = FALSE;
10450 if (significant_bits >= NV_MANT_DIG)
10451 accumulate = FALSE;
10455 if ((total_bits > 0 || significant_bits > 0) &&
10456 isALPHA_FOLD_EQ(*h, 'p')) {
10457 bool negexp = FALSE;
10461 else if (*h == '-') {
10467 while (isDIGIT(*h) || *h == '_') {
10470 hexfp_exp += *h - '0';
10473 && -hexfp_exp < NV_MIN_EXP - 1) {
10474 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10475 "Hexadecimal float: exponent underflow");
10481 && hexfp_exp > NV_MAX_EXP - 1) {
10482 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10483 "Hexadecimal float: exponent overflow");
10491 hexfp_exp = -hexfp_exp;
10493 hexfp_exp -= hexfp_frac_bits;
10495 hexfp_mult = pow(2.0, hexfp_exp);
10503 if (n > 4294967295.0)
10504 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10505 "%s number > %s non-portable",
10511 if (u > 0xffffffff)
10512 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10513 "%s number > %s non-portable",
10518 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10519 sv = new_constant(start, s - start, "integer",
10520 sv, NULL, NULL, 0);
10521 else if (PL_hints & HINT_NEW_BINARY)
10522 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10527 handle decimal numbers.
10528 we're also sent here when we read a 0 as the first digit
10530 case '1': case '2': case '3': case '4': case '5':
10531 case '6': case '7': case '8': case '9': case '.':
10534 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10543 /* read next group of digits and _ and copy into d */
10546 || UNLIKELY(hexfp && isXDIGIT(*s)))
10548 /* skip underscores, checking for misplaced ones
10552 if (lastub && s == lastub + 1)
10553 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10554 "Misplaced _ in number");
10558 /* check for end of fixed-length buffer */
10560 Perl_croak(aTHX_ "%s", number_too_long);
10561 /* if we're ok, copy the character */
10566 /* final misplaced underbar check */
10567 if (lastub && s == lastub + 1) {
10568 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10571 /* read a decimal portion if there is one. avoid
10572 3..5 being interpreted as the number 3. followed
10575 if (*s == '.' && s[1] != '.') {
10580 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10581 "Misplaced _ in number");
10585 /* copy, ignoring underbars, until we run out of digits.
10589 || UNLIKELY(hexfp && isXDIGIT(*s));
10592 /* fixed length buffer check */
10594 Perl_croak(aTHX_ "%s", number_too_long);
10596 if (lastub && s == lastub + 1)
10597 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10598 "Misplaced _ in number");
10604 /* fractional part ending in underbar? */
10605 if (s[-1] == '_') {
10606 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10607 "Misplaced _ in number");
10609 if (*s == '.' && isDIGIT(s[1])) {
10610 /* oops, it's really a v-string, but without the "v" */
10616 /* read exponent part, if present */
10617 if ((isALPHA_FOLD_EQ(*s, 'e')
10618 || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
10619 && strchr("+-0123456789_", s[1]))
10623 /* regardless of whether user said 3E5 or 3e5, use lower 'e',
10624 ditto for p (hexfloats) */
10625 if ((isALPHA_FOLD_EQ(*s, 'e'))) {
10626 /* At least some Mach atof()s don't grok 'E' */
10629 else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
10636 /* stray preinitial _ */
10638 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10639 "Misplaced _ in number");
10643 /* allow positive or negative exponent */
10644 if (*s == '+' || *s == '-')
10647 /* stray initial _ */
10649 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10650 "Misplaced _ in number");
10654 /* read digits of exponent */
10655 while (isDIGIT(*s) || *s == '_') {
10658 Perl_croak(aTHX_ "%s", number_too_long);
10662 if (((lastub && s == lastub + 1)
10663 || (!isDIGIT(s[1]) && s[1] != '_')))
10664 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10665 "Misplaced _ in number");
10673 We try to do an integer conversion first if no characters
10674 indicating "float" have been found.
10679 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10681 if (flags == IS_NUMBER_IN_UV) {
10683 sv = newSViv(uv); /* Prefer IVs over UVs. */
10686 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10687 if (uv <= (UV) IV_MIN)
10688 sv = newSViv(-(IV)uv);
10695 STORE_LC_NUMERIC_UNDERLYING_SET_STANDARD();
10696 /* terminate the string */
10698 if (UNLIKELY(hexfp)) {
10699 # ifdef NV_MANT_DIG
10700 if (significant_bits > NV_MANT_DIG)
10701 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10702 "Hexadecimal float: mantissa overflow");
10705 nv = hexfp_uquad * hexfp_mult;
10706 #else /* HEXFP_NV */
10707 nv = hexfp_nv * hexfp_mult;
10710 nv = Atof(PL_tokenbuf);
10712 RESTORE_LC_NUMERIC_UNDERLYING();
10717 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10718 const char *const key = floatit ? "float" : "integer";
10719 const STRLEN keylen = floatit ? 5 : 7;
10720 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10721 key, keylen, sv, NULL, NULL, 0);
10725 /* if it starts with a v, it could be a v-string */
10728 sv = newSV(5); /* preallocate storage space */
10729 ENTER_with_name("scan_vstring");
10731 s = scan_vstring(s, PL_bufend, sv);
10732 SvREFCNT_inc_simple_void_NN(sv);
10733 LEAVE_with_name("scan_vstring");
10737 /* make the op for the constant and return */
10740 lvalp->opval = newSVOP(OP_CONST, 0, sv);
10742 lvalp->opval = NULL;
10748 S_scan_formline(pTHX_ char *s)
10752 SV * const stuff = newSVpvs("");
10753 bool needargs = FALSE;
10754 bool eofmt = FALSE;
10756 PERL_ARGS_ASSERT_SCAN_FORMLINE;
10758 while (!needargs) {
10761 #ifdef PERL_STRICT_CR
10762 while (SPACE_OR_TAB(*t))
10765 while (SPACE_OR_TAB(*t) || *t == '\r')
10768 if (*t == '\n' || t == PL_bufend) {
10773 eol = (char *) memchr(s,'\n',PL_bufend-s);
10777 for (t = s; t < eol; t++) {
10778 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10780 goto enough; /* ~~ must be first line in formline */
10782 if (*t == '@' || *t == '^')
10786 sv_catpvn(stuff, s, eol-s);
10787 #ifndef PERL_STRICT_CR
10788 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10789 char *end = SvPVX(stuff) + SvCUR(stuff);
10792 SvCUR_set(stuff, SvCUR(stuff) - 1);
10800 if ((PL_rsfp || PL_parser->filtered)
10801 && PL_parser->form_lex_state == LEX_NORMAL) {
10803 PL_bufptr = PL_bufend;
10804 COPLINE_INC_WITH_HERELINES;
10805 got_some = lex_next_chunk(0);
10806 CopLINE_dec(PL_curcop);
10814 if (!SvCUR(stuff) || needargs)
10815 PL_lex_state = PL_parser->form_lex_state;
10816 if (SvCUR(stuff)) {
10817 PL_expect = XSTATE;
10819 const char *s2 = s;
10820 while (*s2 == '\r' || *s2 == ' ' || *s2 == '\t' || *s2 == '\f'
10824 PL_expect = XTERMBLOCK;
10825 NEXTVAL_NEXTTOKE.ival = 0;
10828 NEXTVAL_NEXTTOKE.ival = 0;
10829 force_next(FORMLBRACK);
10832 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10835 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10839 SvREFCNT_dec(stuff);
10841 PL_lex_formbrack = 0;
10847 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10849 const I32 oldsavestack_ix = PL_savestack_ix;
10850 CV* const outsidecv = PL_compcv;
10852 SAVEI32(PL_subline);
10853 save_item(PL_subname);
10854 SAVESPTR(PL_compcv);
10856 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
10857 CvFLAGS(PL_compcv) |= flags;
10859 PL_subline = CopLINE(PL_curcop);
10860 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
10861 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
10862 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
10863 if (outsidecv && CvPADLIST(outsidecv))
10864 CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
10866 return oldsavestack_ix;
10870 S_yywarn(pTHX_ const char *const s, U32 flags)
10872 PERL_ARGS_ASSERT_YYWARN;
10874 PL_in_eval |= EVAL_WARNONLY;
10875 yyerror_pv(s, flags);
10880 Perl_yyerror(pTHX_ const char *const s)
10882 PERL_ARGS_ASSERT_YYERROR;
10883 return yyerror_pvn(s, strlen(s), 0);
10887 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
10889 PERL_ARGS_ASSERT_YYERROR_PV;
10890 return yyerror_pvn(s, strlen(s), flags);
10894 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
10896 const char *context = NULL;
10899 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
10900 int yychar = PL_parser->yychar;
10902 PERL_ARGS_ASSERT_YYERROR_PVN;
10904 if (!yychar || (yychar == ';' && !PL_rsfp))
10905 sv_catpvs(where_sv, "at EOF");
10906 else if ( PL_oldoldbufptr
10907 && PL_bufptr > PL_oldoldbufptr
10908 && PL_bufptr - PL_oldoldbufptr < 200
10909 && PL_oldoldbufptr != PL_oldbufptr
10910 && PL_oldbufptr != PL_bufptr)
10914 The code below is removed for NetWare because it abends/crashes on NetWare
10915 when the script has error such as not having the closing quotes like:
10916 if ($var eq "value)
10917 Checking of white spaces is anyway done in NetWare code.
10920 while (isSPACE(*PL_oldoldbufptr))
10923 context = PL_oldoldbufptr;
10924 contlen = PL_bufptr - PL_oldoldbufptr;
10926 else if ( PL_oldbufptr
10927 && PL_bufptr > PL_oldbufptr
10928 && PL_bufptr - PL_oldbufptr < 200
10929 && PL_oldbufptr != PL_bufptr) {
10932 The code below is removed for NetWare because it abends/crashes on NetWare
10933 when the script has error such as not having the closing quotes like:
10934 if ($var eq "value)
10935 Checking of white spaces is anyway done in NetWare code.
10938 while (isSPACE(*PL_oldbufptr))
10941 context = PL_oldbufptr;
10942 contlen = PL_bufptr - PL_oldbufptr;
10944 else if (yychar > 255)
10945 sv_catpvs(where_sv, "next token ???");
10946 else if (yychar == YYEMPTY) {
10947 if (PL_lex_state == LEX_NORMAL)
10948 sv_catpvs(where_sv, "at end of line");
10949 else if (PL_lex_inpat)
10950 sv_catpvs(where_sv, "within pattern");
10952 sv_catpvs(where_sv, "within string");
10955 sv_catpvs(where_sv, "next char ");
10957 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
10958 else if (isPRINT_LC(yychar)) {
10959 const char string = yychar;
10960 sv_catpvn(where_sv, &string, 1);
10963 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
10965 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
10966 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
10967 OutCopFILE(PL_curcop),
10968 (IV)(PL_parser->preambling == NOLINE
10969 ? CopLINE(PL_curcop)
10970 : PL_parser->preambling));
10972 Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
10973 UTF8fARG(UTF, contlen, context));
10975 Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
10976 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
10977 Perl_sv_catpvf(aTHX_ msg,
10978 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
10979 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
10982 if (PL_in_eval & EVAL_WARNONLY) {
10983 PL_in_eval &= ~EVAL_WARNONLY;
10984 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
10988 if (PL_error_count >= 10) {
10990 if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
10991 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
10992 SVfARG(errsv), OutCopFILE(PL_curcop));
10994 Perl_croak(aTHX_ "%s has too many errors.\n",
10995 OutCopFILE(PL_curcop));
10998 PL_in_my_stash = NULL;
11003 S_swallow_bom(pTHX_ U8 *s)
11005 const STRLEN slen = SvCUR(PL_linestr);
11007 PERL_ARGS_ASSERT_SWALLOW_BOM;
11011 if (s[1] == 0xFE) {
11012 /* UTF-16 little-endian? (or UTF-32LE?) */
11013 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
11014 /* diag_listed_as: Unsupported script encoding %s */
11015 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11016 #ifndef PERL_NO_UTF16_FILTER
11017 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11019 if (PL_bufend > (char*)s) {
11020 s = add_utf16_textfilter(s, TRUE);
11023 /* diag_listed_as: Unsupported script encoding %s */
11024 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11029 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
11030 #ifndef PERL_NO_UTF16_FILTER
11031 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11033 if (PL_bufend > (char *)s) {
11034 s = add_utf16_textfilter(s, FALSE);
11037 /* diag_listed_as: Unsupported script encoding %s */
11038 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11042 case BOM_UTF8_FIRST_BYTE: {
11043 const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
11044 if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
11045 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11046 s += len + 1; /* UTF-8 */
11053 if (s[2] == 0xFE && s[3] == 0xFF) {
11054 /* UTF-32 big-endian */
11055 /* diag_listed_as: Unsupported script encoding %s */
11056 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11059 else if (s[2] == 0 && s[3] != 0) {
11062 * are a good indicator of UTF-16BE. */
11063 #ifndef PERL_NO_UTF16_FILTER
11064 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11065 s = add_utf16_textfilter(s, FALSE);
11067 /* diag_listed_as: Unsupported script encoding %s */
11068 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11075 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11078 * are a good indicator of UTF-16LE. */
11079 #ifndef PERL_NO_UTF16_FILTER
11080 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11081 s = add_utf16_textfilter(s, TRUE);
11083 /* diag_listed_as: Unsupported script encoding %s */
11084 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11092 #ifndef PERL_NO_UTF16_FILTER
11094 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11096 SV *const filter = FILTER_DATA(idx);
11097 /* We re-use this each time round, throwing the contents away before we
11099 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11100 SV *const utf8_buffer = filter;
11101 IV status = IoPAGE(filter);
11102 const bool reverse = cBOOL(IoLINES(filter));
11105 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11107 /* As we're automatically added, at the lowest level, and hence only called
11108 from this file, we can be sure that we're not called in block mode. Hence
11109 don't bother writing code to deal with block mode. */
11111 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11114 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11116 DEBUG_P(PerlIO_printf(Perl_debug_log,
11117 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11118 FPTR2DPTR(void *, S_utf16_textfilter),
11119 reverse ? 'l' : 'b', idx, maxlen, status,
11120 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11127 /* First, look in our buffer of existing UTF-8 data: */
11128 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11132 } else if (status == 0) {
11134 IoPAGE(filter) = 0;
11135 nl = SvEND(utf8_buffer);
11138 STRLEN got = nl - SvPVX(utf8_buffer);
11139 /* Did we have anything to append? */
11141 sv_catpvn(sv, SvPVX(utf8_buffer), got);
11142 /* Everything else in this code works just fine if SVp_POK isn't
11143 set. This, however, needs it, and we need it to work, else
11144 we loop infinitely because the buffer is never consumed. */
11145 sv_chop(utf8_buffer, nl);
11149 /* OK, not a complete line there, so need to read some more UTF-16.
11150 Read an extra octect if the buffer currently has an odd number. */
11154 if (SvCUR(utf16_buffer) >= 2) {
11155 /* Location of the high octet of the last complete code point.
11156 Gosh, UTF-16 is a pain. All the benefits of variable length,
11157 *coupled* with all the benefits of partial reads and
11159 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11160 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11162 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11166 /* We have the first half of a surrogate. Read more. */
11167 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11170 status = FILTER_READ(idx + 1, utf16_buffer,
11171 160 + (SvCUR(utf16_buffer) & 1));
11172 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
11173 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11176 IoPAGE(filter) = status;
11181 chars = SvCUR(utf16_buffer) >> 1;
11182 have = SvCUR(utf8_buffer);
11183 SvGROW(utf8_buffer, have + chars * 3 + 1);
11186 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11187 (U8*)SvPVX_const(utf8_buffer) + have,
11188 chars * 2, &newlen);
11190 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11191 (U8*)SvPVX_const(utf8_buffer) + have,
11192 chars * 2, &newlen);
11194 SvCUR_set(utf8_buffer, have + newlen);
11197 /* No need to keep this SV "well-formed" with a '\0' after the end, as
11198 it's private to us, and utf16_to_utf8{,reversed} take a
11199 (pointer,length) pair, rather than a NUL-terminated string. */
11200 if(SvCUR(utf16_buffer) & 1) {
11201 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11202 SvCUR_set(utf16_buffer, 1);
11204 SvCUR_set(utf16_buffer, 0);
11207 DEBUG_P(PerlIO_printf(Perl_debug_log,
11208 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11210 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11211 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11216 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11218 SV *filter = filter_add(S_utf16_textfilter, NULL);
11220 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11222 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11223 sv_setpvs(filter, "");
11224 IoLINES(filter) = reversed;
11225 IoPAGE(filter) = 1; /* Not EOF */
11227 /* Sadly, we have to return a valid pointer, come what may, so we have to
11228 ignore any error return from this. */
11229 SvCUR_set(PL_linestr, 0);
11230 if (FILTER_READ(0, PL_linestr, 0)) {
11231 SvUTF8_on(PL_linestr);
11233 SvUTF8_on(PL_linestr);
11235 PL_bufend = SvEND(PL_linestr);
11236 return (U8*)SvPVX(PL_linestr);
11241 Returns a pointer to the next character after the parsed
11242 vstring, as well as updating the passed in sv.
11244 Function must be called like
11246 sv = sv_2mortal(newSV(5));
11247 s = scan_vstring(s,e,sv);
11249 where s and e are the start and end of the string.
11250 The sv should already be large enough to store the vstring
11251 passed in, for performance reasons.
11253 This function may croak if fatal warnings are enabled in the
11254 calling scope, hence the sv_2mortal in the example (to prevent
11255 a leak). Make sure to do SvREFCNT_inc afterwards if you use
11261 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11263 const char *pos = s;
11264 const char *start = s;
11266 PERL_ARGS_ASSERT_SCAN_VSTRING;
11268 if (*pos == 'v') pos++; /* get past 'v' */
11269 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11271 if ( *pos != '.') {
11272 /* this may not be a v-string if followed by => */
11273 const char *next = pos;
11274 while (next < e && isSPACE(*next))
11276 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11277 /* return string not v-string */
11278 sv_setpvn(sv,(char *)s,pos-s);
11279 return (char *)pos;
11283 if (!isALPHA(*pos)) {
11284 U8 tmpbuf[UTF8_MAXBYTES+1];
11287 s++; /* get past 'v' */
11292 /* this is atoi() that tolerates underscores */
11295 const char *end = pos;
11297 while (--end >= s) {
11299 const UV orev = rev;
11300 rev += (*end - '0') * mult;
11303 /* diag_listed_as: Integer overflow in %s number */
11304 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11305 "Integer overflow in decimal number");
11309 /* Append native character for the rev point */
11310 tmpend = uvchr_to_utf8(tmpbuf, rev);
11311 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11312 if (!UVCHR_IS_INVARIANT(rev))
11314 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11320 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11324 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11331 Perl_keyword_plugin_standard(pTHX_
11332 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11334 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11335 PERL_UNUSED_CONTEXT;
11336 PERL_UNUSED_ARG(keyword_ptr);
11337 PERL_UNUSED_ARG(keyword_len);
11338 PERL_UNUSED_ARG(op_ptr);
11339 return KEYWORD_PLUGIN_DECLINE;
11342 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11344 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11346 SAVEI32(PL_lex_brackets);
11347 if (PL_lex_brackets > 100)
11348 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11349 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11350 SAVEI32(PL_lex_allbrackets);
11351 PL_lex_allbrackets = 0;
11352 SAVEI8(PL_lex_fakeeof);
11353 PL_lex_fakeeof = (U8)fakeeof;
11354 if(yyparse(gramtype) && !PL_parser->error_count)
11355 qerror(Perl_mess(aTHX_ "Parse error"));
11358 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11360 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11364 SAVEVPTR(PL_eval_root);
11365 PL_eval_root = NULL;
11366 parse_recdescent(gramtype, fakeeof);
11372 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11374 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11377 if (flags & ~PARSE_OPTIONAL)
11378 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11379 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11380 if (!exprop && !(flags & PARSE_OPTIONAL)) {
11381 if (!PL_parser->error_count)
11382 qerror(Perl_mess(aTHX_ "Parse error"));
11383 exprop = newOP(OP_NULL, 0);
11389 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11391 Parse a Perl arithmetic expression. This may contain operators of precedence
11392 down to the bit shift operators. The expression must be followed (and thus
11393 terminated) either by a comparison or lower-precedence operator or by
11394 something that would normally terminate an expression such as semicolon.
11395 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
11396 otherwise it is mandatory. It is up to the caller to ensure that the
11397 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11398 the source of the code to be parsed and the lexical context for the
11401 The op tree representing the expression is returned. If an optional
11402 expression is absent, a null pointer is returned, otherwise the pointer
11405 If an error occurs in parsing or compilation, in most cases a valid op
11406 tree is returned anyway. The error is reflected in the parser state,
11407 normally resulting in a single exception at the top level of parsing
11408 which covers all the compilation errors that occurred. Some compilation
11409 errors, however, will throw an exception immediately.
11415 Perl_parse_arithexpr(pTHX_ U32 flags)
11417 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11421 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11423 Parse a Perl term expression. This may contain operators of precedence
11424 down to the assignment operators. The expression must be followed (and thus
11425 terminated) either by a comma or lower-precedence operator or by
11426 something that would normally terminate an expression such as semicolon.
11427 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
11428 otherwise it is mandatory. It is up to the caller to ensure that the
11429 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11430 the source of the code to be parsed and the lexical context for the
11433 The op tree representing the expression is returned. If an optional
11434 expression is absent, a null pointer is returned, otherwise the pointer
11437 If an error occurs in parsing or compilation, in most cases a valid op
11438 tree is returned anyway. The error is reflected in the parser state,
11439 normally resulting in a single exception at the top level of parsing
11440 which covers all the compilation errors that occurred. Some compilation
11441 errors, however, will throw an exception immediately.
11447 Perl_parse_termexpr(pTHX_ U32 flags)
11449 return parse_expr(LEX_FAKEEOF_COMMA, flags);
11453 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11455 Parse a Perl list expression. This may contain operators of precedence
11456 down to the comma operator. The expression must be followed (and thus
11457 terminated) either by a low-precedence logic operator such as C<or> or by
11458 something that would normally terminate an expression such as semicolon.
11459 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
11460 otherwise it is mandatory. It is up to the caller to ensure that the
11461 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11462 the source of the code to be parsed and the lexical context for the
11465 The op tree representing the expression is returned. If an optional
11466 expression is absent, a null pointer is returned, otherwise the pointer
11469 If an error occurs in parsing or compilation, in most cases a valid op
11470 tree is returned anyway. The error is reflected in the parser state,
11471 normally resulting in a single exception at the top level of parsing
11472 which covers all the compilation errors that occurred. Some compilation
11473 errors, however, will throw an exception immediately.
11479 Perl_parse_listexpr(pTHX_ U32 flags)
11481 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11485 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
11487 Parse a single complete Perl expression. This allows the full
11488 expression grammar, including the lowest-precedence operators such
11489 as C<or>. The expression must be followed (and thus terminated) by a
11490 token that an expression would normally be terminated by: end-of-file,
11491 closing bracketing punctuation, semicolon, or one of the keywords that
11492 signals a postfix expression-statement modifier. If C<flags> has the
11493 C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
11494 mandatory. It is up to the caller to ensure that the dynamic parser
11495 state (L</PL_parser> et al) is correctly set to reflect the source of
11496 the code to be parsed and the lexical context for the expression.
11498 The op tree representing the expression is returned. If an optional
11499 expression is absent, a null pointer is returned, otherwise the pointer
11502 If an error occurs in parsing or compilation, in most cases a valid op
11503 tree is returned anyway. The error is reflected in the parser state,
11504 normally resulting in a single exception at the top level of parsing
11505 which covers all the compilation errors that occurred. Some compilation
11506 errors, however, will throw an exception immediately.
11512 Perl_parse_fullexpr(pTHX_ U32 flags)
11514 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11518 =for apidoc Amx|OP *|parse_block|U32 flags
11520 Parse a single complete Perl code block. This consists of an opening
11521 brace, a sequence of statements, and a closing brace. The block
11522 constitutes a lexical scope, so C<my> variables and various compile-time
11523 effects can be contained within it. It is up to the caller to ensure
11524 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11525 reflect the source of the code to be parsed and the lexical context for
11528 The op tree representing the code block is returned. This is always a
11529 real op, never a null pointer. It will normally be a C<lineseq> list,
11530 including C<nextstate> or equivalent ops. No ops to construct any kind
11531 of runtime scope are included by virtue of it being a block.
11533 If an error occurs in parsing or compilation, in most cases a valid op
11534 tree (most likely null) is returned anyway. The error is reflected in
11535 the parser state, normally resulting in a single exception at the top
11536 level of parsing which covers all the compilation errors that occurred.
11537 Some compilation errors, however, will throw an exception immediately.
11539 The C<flags> parameter is reserved for future use, and must always
11546 Perl_parse_block(pTHX_ U32 flags)
11549 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11550 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11554 =for apidoc Amx|OP *|parse_barestmt|U32 flags
11556 Parse a single unadorned Perl statement. This may be a normal imperative
11557 statement or a declaration that has compile-time effect. It does not
11558 include any label or other affixture. It is up to the caller to ensure
11559 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11560 reflect the source of the code to be parsed and the lexical context for
11563 The op tree representing the statement is returned. This may be a
11564 null pointer if the statement is null, for example if it was actually
11565 a subroutine definition (which has compile-time side effects). If not
11566 null, it will be ops directly implementing the statement, suitable to
11567 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
11568 equivalent op (except for those embedded in a scope contained entirely
11569 within the statement).
11571 If an error occurs in parsing or compilation, in most cases a valid op
11572 tree (most likely null) is returned anyway. The error is reflected in
11573 the parser state, normally resulting in a single exception at the top
11574 level of parsing which covers all the compilation errors that occurred.
11575 Some compilation errors, however, will throw an exception immediately.
11577 The C<flags> parameter is reserved for future use, and must always
11584 Perl_parse_barestmt(pTHX_ U32 flags)
11587 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11588 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11592 =for apidoc Amx|SV *|parse_label|U32 flags
11594 Parse a single label, possibly optional, of the type that may prefix a
11595 Perl statement. It is up to the caller to ensure that the dynamic parser
11596 state (L</PL_parser> et al) is correctly set to reflect the source of
11597 the code to be parsed. If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
11598 label is optional, otherwise it is mandatory.
11600 The name of the label is returned in the form of a fresh scalar. If an
11601 optional label is absent, a null pointer is returned.
11603 If an error occurs in parsing, which can only occur if the label is
11604 mandatory, a valid label is returned anyway. The error is reflected in
11605 the parser state, normally resulting in a single exception at the top
11606 level of parsing which covers all the compilation errors that occurred.
11612 Perl_parse_label(pTHX_ U32 flags)
11614 if (flags & ~PARSE_OPTIONAL)
11615 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11617 PL_parser->yychar = yylex();
11618 if (PL_parser->yychar == LABEL) {
11619 char * const lpv = pl_yylval.pval;
11620 STRLEN llen = strlen(lpv);
11621 PL_parser->yychar = YYEMPTY;
11622 return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
11629 STRLEN wlen, bufptr_pos;
11632 if (!isIDFIRST_lazy_if(s, UTF))
11634 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
11635 if (word_takes_any_delimiter(s, wlen))
11637 bufptr_pos = s - SvPVX(PL_linestr);
11639 lex_read_space(LEX_KEEP_PREVIOUS);
11641 s = SvPVX(PL_linestr) + bufptr_pos;
11642 if (t[0] == ':' && t[1] != ':') {
11643 PL_oldoldbufptr = PL_oldbufptr;
11646 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
11650 if (flags & PARSE_OPTIONAL) {
11653 qerror(Perl_mess(aTHX_ "Parse error"));
11654 return newSVpvs("x");
11661 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
11663 Parse a single complete Perl statement. This may be a normal imperative
11664 statement or a declaration that has compile-time effect, and may include
11665 optional labels. It is up to the caller to ensure that the dynamic
11666 parser state (L</PL_parser> et al) is correctly set to reflect the source
11667 of the code to be parsed and the lexical context for the statement.
11669 The op tree representing the statement is returned. This may be a
11670 null pointer if the statement is null, for example if it was actually
11671 a subroutine definition (which has compile-time side effects). If not
11672 null, it will be the result of a L</newSTATEOP> call, normally including
11673 a C<nextstate> or equivalent op.
11675 If an error occurs in parsing or compilation, in most cases a valid op
11676 tree (most likely null) is returned anyway. The error is reflected in
11677 the parser state, normally resulting in a single exception at the top
11678 level of parsing which covers all the compilation errors that occurred.
11679 Some compilation errors, however, will throw an exception immediately.
11681 The C<flags> parameter is reserved for future use, and must always
11688 Perl_parse_fullstmt(pTHX_ U32 flags)
11691 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11692 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11696 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
11698 Parse a sequence of zero or more Perl statements. These may be normal
11699 imperative statements, including optional labels, or declarations
11700 that have compile-time effect, or any mixture thereof. The statement
11701 sequence ends when a closing brace or end-of-file is encountered in a
11702 place where a new statement could have validly started. It is up to
11703 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11704 is correctly set to reflect the source of the code to be parsed and the
11705 lexical context for the statements.
11707 The op tree representing the statement sequence is returned. This may
11708 be a null pointer if the statements were all null, for example if there
11709 were no statements or if there were only subroutine definitions (which
11710 have compile-time side effects). If not null, it will be a C<lineseq>
11711 list, normally including C<nextstate> or equivalent ops.
11713 If an error occurs in parsing or compilation, in most cases a valid op
11714 tree is returned anyway. The error is reflected in the parser state,
11715 normally resulting in a single exception at the top level of parsing
11716 which covers all the compilation errors that occurred. Some compilation
11717 errors, however, will throw an exception immediately.
11719 The C<flags> parameter is reserved for future use, and must always
11726 Perl_parse_stmtseq(pTHX_ U32 flags)
11731 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11732 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
11733 c = lex_peek_unichar(0);
11734 if (c != -1 && c != /*{*/'}')
11735 qerror(Perl_mess(aTHX_ "Parse error"));
11739 #define parse_opt_lexvar() S_parse_opt_lexvar(aTHX)
11741 S_parse_opt_lexvar(pTHX)
11746 lex_token_boundary();
11747 sigil = lex_read_unichar(0);
11748 if (lex_peek_unichar(0) == '#') {
11749 qerror(Perl_mess(aTHX_ "Parse error"));
11753 c = lex_peek_unichar(0);
11754 if (c == -1 || !(UTF ? isIDFIRST_uni(c) : isIDFIRST_A(c)))
11757 d = PL_tokenbuf + 1;
11758 PL_tokenbuf[0] = (char)sigil;
11759 parse_ident(&s, &d, PL_tokenbuf + sizeof(PL_tokenbuf) - 1, 0,
11760 cBOOL(UTF), FALSE);
11762 if (d == PL_tokenbuf+1)
11764 var = newOP(sigil == '$' ? OP_PADSV : sigil == '@' ? OP_PADAV : OP_PADHV,
11765 OPf_MOD | (OPpLVAL_INTRO<<8));
11766 var->op_targ = allocmy(PL_tokenbuf, d - PL_tokenbuf, UTF ? SVf_UTF8 : 0);
11771 Perl_parse_subsignature(pTHX)
11774 int prev_type = 0, pos = 0, min_arity = 0, max_arity = 0;
11775 OP *initops = NULL;
11777 c = lex_peek_unichar(0);
11778 while (c != /*(*/')') {
11782 if (prev_type == 2)
11783 qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
11784 var = parse_opt_lexvar();
11786 newBINOP(OP_AELEM, 0,
11787 ref(newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv)),
11789 newSVOP(OP_CONST, 0, newSViv(pos))) :
11792 c = lex_peek_unichar(0);
11794 lex_token_boundary();
11795 lex_read_unichar(0);
11797 c = lex_peek_unichar(0);
11798 if (c == ',' || c == /*(*/')') {
11800 qerror(Perl_mess(aTHX_ "Optional parameter "
11801 "lacks default expression"));
11803 OP *defexpr = parse_termexpr(0);
11804 if (defexpr->op_type == OP_UNDEF
11805 && !(defexpr->op_flags & OPf_KIDS))
11811 scalar(newUNOP(OP_RV2AV, 0,
11812 newGVOP(OP_GV, 0, PL_defgv))),
11813 newSVOP(OP_CONST, 0, newSViv(pos+1)));
11815 newCONDOP(0, ifop, expr, defexpr) :
11816 newLOGOP(OP_OR, 0, ifop, defexpr);
11821 if (prev_type == 1)
11822 qerror(Perl_mess(aTHX_ "Mandatory parameter "
11823 "follows optional parameter"));
11825 min_arity = pos + 1;
11827 if (var) expr = newASSIGNOP(OPf_STACKED, var, 0, expr);
11829 initops = op_append_list(OP_LINESEQ, initops,
11830 newSTATEOP(0, NULL, expr));
11836 if (prev_type == 2)
11837 qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
11838 var = parse_opt_lexvar();
11840 OP *chkop = newLOGOP((pos & 1) ? OP_OR : OP_AND, 0,
11841 newBINOP(OP_BIT_AND, 0,
11842 scalar(newUNOP(OP_RV2AV, 0,
11843 newGVOP(OP_GV, 0, PL_defgv))),
11844 newSVOP(OP_CONST, 0, newSViv(1))),
11845 op_convert_list(OP_DIE, 0,
11846 op_convert_list(OP_SPRINTF, 0,
11847 op_append_list(OP_LIST,
11848 newSVOP(OP_CONST, 0,
11849 newSVpvs("Odd name/value argument for subroutine at %s line %d.\n")),
11851 op_append_list(OP_LIST,
11852 newSVOP(OP_CONST, 0, newSViv(1)),
11853 newSVOP(OP_CONST, 0, newSViv(2))),
11854 newOP(OP_CALLER, 0))))));
11855 if (pos != min_arity)
11856 chkop = newLOGOP(OP_AND, 0,
11858 scalar(newUNOP(OP_RV2AV, 0,
11859 newGVOP(OP_GV, 0, PL_defgv))),
11860 newSVOP(OP_CONST, 0, newSViv(pos))),
11862 initops = op_append_list(OP_LINESEQ,
11863 newSTATEOP(0, NULL, chkop),
11868 op_prepend_elem(OP_ASLICE,
11869 newOP(OP_PUSHMARK, 0),
11870 newLISTOP(OP_ASLICE, 0,
11872 newSVOP(OP_CONST, 0, newSViv(pos)),
11873 newUNOP(OP_AV2ARYLEN, 0,
11874 ref(newUNOP(OP_RV2AV, 0,
11875 newGVOP(OP_GV, 0, PL_defgv)),
11877 ref(newUNOP(OP_RV2AV, 0,
11878 newGVOP(OP_GV, 0, PL_defgv)),
11880 newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv));
11881 initops = op_append_list(OP_LINESEQ, initops,
11882 newSTATEOP(0, NULL,
11883 newASSIGNOP(OPf_STACKED, var, 0, slice)));
11890 qerror(Perl_mess(aTHX_ "Parse error"));
11894 c = lex_peek_unichar(0);
11896 case /*(*/')': break;
11899 lex_token_boundary();
11900 lex_read_unichar(0);
11902 c = lex_peek_unichar(0);
11903 } while (c == ',');
11909 if (min_arity != 0) {
11910 initops = op_append_list(OP_LINESEQ,
11911 newSTATEOP(0, NULL,
11914 scalar(newUNOP(OP_RV2AV, 0,
11915 newGVOP(OP_GV, 0, PL_defgv))),
11916 newSVOP(OP_CONST, 0, newSViv(min_arity))),
11917 op_convert_list(OP_DIE, 0,
11918 op_convert_list(OP_SPRINTF, 0,
11919 op_append_list(OP_LIST,
11920 newSVOP(OP_CONST, 0,
11921 newSVpvs("Too few arguments for subroutine at %s line %d.\n")),
11923 op_append_list(OP_LIST,
11924 newSVOP(OP_CONST, 0, newSViv(1)),
11925 newSVOP(OP_CONST, 0, newSViv(2))),
11926 newOP(OP_CALLER, 0))))))),
11929 if (max_arity != -1) {
11930 initops = op_append_list(OP_LINESEQ,
11931 newSTATEOP(0, NULL,
11934 scalar(newUNOP(OP_RV2AV, 0,
11935 newGVOP(OP_GV, 0, PL_defgv))),
11936 newSVOP(OP_CONST, 0, newSViv(max_arity))),
11937 op_convert_list(OP_DIE, 0,
11938 op_convert_list(OP_SPRINTF, 0,
11939 op_append_list(OP_LIST,
11940 newSVOP(OP_CONST, 0,
11941 newSVpvs("Too many arguments for subroutine at %s line %d.\n")),
11943 op_append_list(OP_LIST,
11944 newSVOP(OP_CONST, 0, newSViv(1)),
11945 newSVOP(OP_CONST, 0, newSViv(2))),
11946 newOP(OP_CALLER, 0))))))),
11953 * ex: set ts=8 sts=4 sw=4 et: