3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 =head1 Lexer interface
26 This is the lower layer of the Perl parser, managing characters and tokens.
28 =for apidoc AmU|yy_parser *|PL_parser
30 Pointer to a structure encapsulating the state of the parsing operation
31 currently in progress. The pointer can be locally changed to perform
32 a nested parse without interfering with the state of an outer parse.
33 Individual members of C<PL_parser> have their own documentation.
39 #define PERL_IN_TOKE_C
41 #include "dquote_inline.h"
43 #define new_constant(a,b,c,d,e,f,g) \
44 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
46 #define pl_yylval (PL_parser->yylval)
48 /* XXX temporary backwards compatibility */
49 #define PL_lex_brackets (PL_parser->lex_brackets)
50 #define PL_lex_allbrackets (PL_parser->lex_allbrackets)
51 #define PL_lex_fakeeof (PL_parser->lex_fakeeof)
52 #define PL_lex_brackstack (PL_parser->lex_brackstack)
53 #define PL_lex_casemods (PL_parser->lex_casemods)
54 #define PL_lex_casestack (PL_parser->lex_casestack)
55 #define PL_lex_defer (PL_parser->lex_defer)
56 #define PL_lex_dojoin (PL_parser->lex_dojoin)
57 #define PL_lex_formbrack (PL_parser->lex_formbrack)
58 #define PL_lex_inpat (PL_parser->lex_inpat)
59 #define PL_lex_inwhat (PL_parser->lex_inwhat)
60 #define PL_lex_op (PL_parser->lex_op)
61 #define PL_lex_repl (PL_parser->lex_repl)
62 #define PL_lex_starts (PL_parser->lex_starts)
63 #define PL_lex_stuff (PL_parser->lex_stuff)
64 #define PL_multi_start (PL_parser->multi_start)
65 #define PL_multi_open (PL_parser->multi_open)
66 #define PL_multi_close (PL_parser->multi_close)
67 #define PL_preambled (PL_parser->preambled)
68 #define PL_sublex_info (PL_parser->sublex_info)
69 #define PL_linestr (PL_parser->linestr)
70 #define PL_expect (PL_parser->expect)
71 #define PL_copline (PL_parser->copline)
72 #define PL_bufptr (PL_parser->bufptr)
73 #define PL_oldbufptr (PL_parser->oldbufptr)
74 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
75 #define PL_linestart (PL_parser->linestart)
76 #define PL_bufend (PL_parser->bufend)
77 #define PL_last_uni (PL_parser->last_uni)
78 #define PL_last_lop (PL_parser->last_lop)
79 #define PL_last_lop_op (PL_parser->last_lop_op)
80 #define PL_lex_state (PL_parser->lex_state)
81 #define PL_rsfp (PL_parser->rsfp)
82 #define PL_rsfp_filters (PL_parser->rsfp_filters)
83 #define PL_in_my (PL_parser->in_my)
84 #define PL_in_my_stash (PL_parser->in_my_stash)
85 #define PL_tokenbuf (PL_parser->tokenbuf)
86 #define PL_multi_end (PL_parser->multi_end)
87 #define PL_error_count (PL_parser->error_count)
89 # define PL_nexttoke (PL_parser->nexttoke)
90 # define PL_nexttype (PL_parser->nexttype)
91 # define PL_nextval (PL_parser->nextval)
93 static const char* const ident_too_long = "Identifier too long";
95 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
97 #define XENUMMASK 0x3f
99 #define XFAKEBRACK 0x80
101 #ifdef USE_UTF8_SCRIPTS
102 # define UTF cBOOL(!IN_BYTES)
104 # define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
107 /* The maximum number of characters preceding the unrecognized one to display */
108 #define UNRECOGNIZED_PRECEDE_COUNT 10
110 /* In variables named $^X, these are the legal values for X.
111 * 1999-02-27 mjd-perl-patch@plover.com */
112 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
114 #define SPACE_OR_TAB(c) isBLANK_A(c)
116 #define HEXFP_PEEK(s) \
118 (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
119 isALPHA_FOLD_EQ(s[0], 'p'))
121 /* LEX_* are values for PL_lex_state, the state of the lexer.
122 * They are arranged oddly so that the guard on the switch statement
123 * can get by with a single comparison (if the compiler is smart enough).
125 * These values refer to the various states within a sublex parse,
126 * i.e. within a double quotish string
129 /* #define LEX_NOTPARSING 11 is done in perl.h. */
131 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
132 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
133 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
134 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
135 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
137 /* at end of code, eg "$x" followed by: */
138 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
139 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
141 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
142 string or after \E, $foo, etc */
143 #define LEX_INTERPCONST 2 /* NOT USED */
144 #define LEX_FORMLINE 1 /* expecting a format line */
145 #define LEX_KNOWNEXT 0 /* next token known; just return it */
149 static const char* const lex_state_names[] = {
164 #include "keywords.h"
166 /* CLINE is a macro that ensures PL_copline has a sane value */
168 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
171 * Convenience functions to return different tokens and prime the
172 * lexer for the next token. They all take an argument.
174 * TOKEN : generic token (used for '(', DOLSHARP, etc)
175 * OPERATOR : generic operator
176 * AOPERATOR : assignment operator
177 * PREBLOCK : beginning the block after an if, while, foreach, ...
178 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
179 * PREREF : *EXPR where EXPR is not a simple identifier
180 * TERM : expression term
181 * POSTDEREF : postfix dereference (->$* ->@[...] etc.)
182 * LOOPX : loop exiting command (goto, last, dump, etc)
183 * FTST : file test operator
184 * FUN0 : zero-argument function
185 * FUN0OP : zero-argument function, with its op created in this file
186 * FUN1 : not used, except for not, which isn't a UNIOP
187 * BOop : bitwise or or xor
189 * BCop : bitwise complement
190 * SHop : shift operator
191 * PWop : power operator
192 * PMop : pattern-matching operator
193 * Aop : addition-level operator
194 * AopNOASSIGN : addition-level operator that is never part of .=
195 * Mop : multiplication-level operator
196 * Eop : equality-testing operator
197 * Rop : relational operator <= != gt
199 * Also see LOP and lop() below.
202 #ifdef DEBUGGING /* Serve -DT. */
203 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
205 # define REPORT(retval) (retval)
208 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
209 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
210 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
211 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
212 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
213 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
214 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
215 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
216 #define LOOPX(f) return (PL_bufptr = force_word(s,WORD,TRUE,FALSE), \
218 PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
220 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
221 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
222 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
223 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
224 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
225 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
226 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
228 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
229 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
230 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
231 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
232 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
233 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
234 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
235 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
237 /* This bit of chicanery makes a unary function followed by
238 * a parenthesis into a function with one argument, highest precedence.
239 * The UNIDOR macro is for unary functions that can be followed by the //
240 * operator (such as C<shift // 0>).
242 #define UNI3(f,x,have_x) { \
243 pl_yylval.ival = f; \
244 if (have_x) PL_expect = x; \
246 PL_last_uni = PL_oldbufptr; \
247 PL_last_lop_op = f; \
249 return REPORT( (int)FUNC1 ); \
251 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
253 #define UNI(f) UNI3(f,XTERM,1)
254 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
255 #define UNIPROTO(f,optional) { \
256 if (optional) PL_last_uni = PL_oldbufptr; \
260 #define UNIBRACK(f) UNI3(f,0,0)
262 /* grandfather return to old style */
265 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
266 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
267 pl_yylval.ival = (f); \
273 #define COPLINE_INC_WITH_HERELINES \
275 CopLINE_inc(PL_curcop); \
276 if (PL_parser->herelines) \
277 CopLINE(PL_curcop) += PL_parser->herelines, \
278 PL_parser->herelines = 0; \
280 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
281 * is no sublex_push to follow. */
282 #define COPLINE_SET_FROM_MULTI_END \
284 CopLINE_set(PL_curcop, PL_multi_end); \
285 if (PL_multi_end != PL_multi_start) \
286 PL_parser->herelines = 0; \
292 /* how to interpret the pl_yylval associated with the token */
296 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
301 static struct debug_tokens {
303 enum token_type type;
305 } const debug_tokens[] =
307 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
308 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
309 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
310 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
311 { ARROW, TOKENTYPE_NONE, "ARROW" },
312 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
313 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
314 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
315 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
316 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
317 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
318 { DO, TOKENTYPE_NONE, "DO" },
319 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
320 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
321 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
322 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
323 { ELSE, TOKENTYPE_NONE, "ELSE" },
324 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
325 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
326 { FOR, TOKENTYPE_IVAL, "FOR" },
327 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
328 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
329 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
330 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
331 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
332 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
333 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
334 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
335 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
336 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
337 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
338 { IF, TOKENTYPE_IVAL, "IF" },
339 { LABEL, TOKENTYPE_PVAL, "LABEL" },
340 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
341 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
342 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
343 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
344 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
345 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
346 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
347 { MY, TOKENTYPE_IVAL, "MY" },
348 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
349 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
350 { OROP, TOKENTYPE_IVAL, "OROP" },
351 { OROR, TOKENTYPE_NONE, "OROR" },
352 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
353 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
354 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
355 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
356 { POSTJOIN, TOKENTYPE_NONE, "POSTJOIN" },
357 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
358 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
359 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
360 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
361 { PREINC, TOKENTYPE_NONE, "PREINC" },
362 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
363 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
364 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
365 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
366 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
367 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
368 { SUB, TOKENTYPE_NONE, "SUB" },
369 { THING, TOKENTYPE_OPVAL, "THING" },
370 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
371 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
372 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
373 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
374 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
375 { USE, TOKENTYPE_IVAL, "USE" },
376 { WHEN, TOKENTYPE_IVAL, "WHEN" },
377 { WHILE, TOKENTYPE_IVAL, "WHILE" },
378 { WORD, TOKENTYPE_OPVAL, "WORD" },
379 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
380 { 0, TOKENTYPE_NONE, NULL }
383 /* dump the returned token in rv, plus any optional arg in pl_yylval */
386 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
388 PERL_ARGS_ASSERT_TOKEREPORT;
391 const char *name = NULL;
392 enum token_type type = TOKENTYPE_NONE;
393 const struct debug_tokens *p;
394 SV* const report = newSVpvs("<== ");
396 for (p = debug_tokens; p->token; p++) {
397 if (p->token == (int)rv) {
404 Perl_sv_catpv(aTHX_ report, name);
405 else if (isGRAPH(rv))
407 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
409 sv_catpvs(report, " (pending identifier)");
412 sv_catpvs(report, "EOF");
414 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
419 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
421 case TOKENTYPE_OPNUM:
422 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
423 PL_op_name[lvalp->ival]);
426 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
428 case TOKENTYPE_OPVAL:
430 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
431 PL_op_name[lvalp->opval->op_type]);
432 if (lvalp->opval->op_type == OP_CONST) {
433 Perl_sv_catpvf(aTHX_ report, " %s",
434 SvPEEK(cSVOPx_sv(lvalp->opval)));
439 sv_catpvs(report, "(opval=null)");
442 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
448 /* print the buffer with suitable escapes */
451 S_printbuf(pTHX_ const char *const fmt, const char *const s)
453 SV* const tmp = newSVpvs("");
455 PERL_ARGS_ASSERT_PRINTBUF;
457 GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */
458 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
466 S_deprecate_commaless_var_list(pTHX) {
468 deprecate("comma-less variable list");
469 return REPORT(','); /* grandfather non-comma-format format */
475 * This subroutine looks for an '=' next to the operator that has just been
476 * parsed and turns it into an ASSIGNOP if it finds one.
480 S_ao(pTHX_ int toketype)
482 if (*PL_bufptr == '=') {
484 if (toketype == ANDAND)
485 pl_yylval.ival = OP_ANDASSIGN;
486 else if (toketype == OROR)
487 pl_yylval.ival = OP_ORASSIGN;
488 else if (toketype == DORDOR)
489 pl_yylval.ival = OP_DORASSIGN;
492 return REPORT(toketype);
497 * When Perl expects an operator and finds something else, no_op
498 * prints the warning. It always prints "<something> found where
499 * operator expected. It prints "Missing semicolon on previous line?"
500 * if the surprise occurs at the start of the line. "do you need to
501 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
502 * where the compiler doesn't know if foo is a method call or a function.
503 * It prints "Missing operator before end of line" if there's nothing
504 * after the missing operator, or "... before <...>" if there is something
505 * after the missing operator.
507 * PL_bufptr is expected to point to the start of the thing that was found,
508 * and s after the next token or partial token.
512 S_no_op(pTHX_ const char *const what, char *s)
514 char * const oldbp = PL_bufptr;
515 const bool is_first = (PL_oldbufptr == PL_linestart);
517 PERL_ARGS_ASSERT_NO_OP;
523 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
524 if (ckWARN_d(WARN_SYNTAX)) {
526 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
527 "\t(Missing semicolon on previous line?)\n");
528 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
530 for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
531 t += UTF ? UTF8SKIP(t) : 1)
533 if (t < PL_bufptr && isSPACE(*t))
534 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
535 "\t(Do you need to predeclare %"UTF8f"?)\n",
536 UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
540 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
541 "\t(Missing operator before %"UTF8f"?)\n",
542 UTF8fARG(UTF, s - oldbp, oldbp));
550 * Complain about missing quote/regexp/heredoc terminator.
551 * If it's called with NULL then it cauterizes the line buffer.
552 * If we're in a delimited string and the delimiter is a control
553 * character, it's reformatted into a two-char sequence like ^C.
558 S_missingterm(pTHX_ char *s)
563 char * const nl = strrchr(s,'\n');
567 else if ((U8) PL_multi_close < 32) {
569 tmpbuf[1] = (char)toCTRL(PL_multi_close);
574 *tmpbuf = (char)PL_multi_close;
578 q = strchr(s,'"') ? '\'' : '"';
579 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
585 * Check whether the named feature is enabled.
588 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
590 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
592 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
594 assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
596 if (namelen > MAX_FEATURE_LEN)
598 memcpy(&he_name[8], name, namelen);
600 return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
601 REFCOUNTED_HE_EXISTS));
605 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
606 * utf16-to-utf8-reversed.
609 #ifdef PERL_CR_FILTER
613 const char *s = SvPVX_const(sv);
614 const char * const e = s + SvCUR(sv);
616 PERL_ARGS_ASSERT_STRIP_RETURN;
618 /* outer loop optimized to do nothing if there are no CR-LFs */
620 if (*s++ == '\r' && *s == '\n') {
621 /* hit a CR-LF, need to copy the rest */
625 if (*s == '\r' && s[1] == '\n')
636 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
638 const I32 count = FILTER_READ(idx+1, sv, maxlen);
639 if (count > 0 && !maxlen)
646 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
648 Creates and initialises a new lexer/parser state object, supplying
649 a context in which to lex and parse from a new source of Perl code.
650 A pointer to the new state object is placed in L</PL_parser>. An entry
651 is made on the save stack so that upon unwinding the new state object
652 will be destroyed and the former value of L</PL_parser> will be restored.
653 Nothing else need be done to clean up the parsing context.
655 The code to be parsed comes from C<line> and C<rsfp>. C<line>, if
656 non-null, provides a string (in SV form) containing code to be parsed.
657 A copy of the string is made, so subsequent modification of C<line>
658 does not affect parsing. C<rsfp>, if non-null, provides an input stream
659 from which code will be read to be parsed. If both are non-null, the
660 code in C<line> comes first and must consist of complete lines of input,
661 and C<rsfp> supplies the remainder of the source.
663 The C<flags> parameter is reserved for future use. Currently it is only
664 used by perl internally, so extensions should always pass zero.
669 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
670 can share filters with the current parser.
671 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
672 caller, hence isn't owned by the parser, so shouldn't be closed on parser
673 destruction. This is used to handle the case of defaulting to reading the
674 script from the standard input because no filename was given on the command
675 line (without getting confused by situation where STDIN has been closed, so
676 the script handle is opened on fd 0) */
679 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
681 const char *s = NULL;
682 yy_parser *parser, *oparser;
683 if (flags && flags & ~LEX_START_FLAGS)
684 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
686 /* create and initialise a parser */
688 Newxz(parser, 1, yy_parser);
689 parser->old_parser = oparser = PL_parser;
692 parser->stack = NULL;
694 parser->stack_size = 0;
696 /* on scope exit, free this parser and restore any outer one */
698 parser->saved_curcop = PL_curcop;
700 /* initialise lexer state */
702 parser->nexttoke = 0;
703 parser->error_count = oparser ? oparser->error_count : 0;
704 parser->copline = parser->preambling = NOLINE;
705 parser->lex_state = LEX_NORMAL;
706 parser->expect = XSTATE;
708 parser->rsfp_filters =
709 !(flags & LEX_START_SAME_FILTER) || !oparser
711 : MUTABLE_AV(SvREFCNT_inc(
712 oparser->rsfp_filters
713 ? oparser->rsfp_filters
714 : (oparser->rsfp_filters = newAV())
717 Newx(parser->lex_brackstack, 120, char);
718 Newx(parser->lex_casestack, 12, char);
719 *parser->lex_casestack = '\0';
720 Newxz(parser->lex_shared, 1, LEXSHARED);
724 s = SvPV_const(line, len);
725 parser->linestr = flags & LEX_START_COPIED
726 ? SvREFCNT_inc_simple_NN(line)
727 : newSVpvn_flags(s, len, SvUTF8(line));
728 sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2);
730 parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
732 parser->oldoldbufptr =
735 parser->linestart = SvPVX(parser->linestr);
736 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
737 parser->last_lop = parser->last_uni = NULL;
739 STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
740 |LEX_DONT_CLOSE_RSFP));
741 parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
742 |LEX_DONT_CLOSE_RSFP));
744 parser->in_pod = parser->filtered = 0;
748 /* delete a parser object */
751 Perl_parser_free(pTHX_ const yy_parser *parser)
753 PERL_ARGS_ASSERT_PARSER_FREE;
755 PL_curcop = parser->saved_curcop;
756 SvREFCNT_dec(parser->linestr);
758 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
759 PerlIO_clearerr(parser->rsfp);
760 else if (parser->rsfp && (!parser->old_parser
761 || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
762 PerlIO_close(parser->rsfp);
763 SvREFCNT_dec(parser->rsfp_filters);
764 SvREFCNT_dec(parser->lex_stuff);
765 SvREFCNT_dec(parser->sublex_info.repl);
767 Safefree(parser->lex_brackstack);
768 Safefree(parser->lex_casestack);
769 Safefree(parser->lex_shared);
770 PL_parser = parser->old_parser;
775 Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab)
777 I32 nexttoke = parser->nexttoke;
778 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
780 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
781 && parser->nextval[nexttoke].opval
782 && parser->nextval[nexttoke].opval->op_slabbed
783 && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
784 op_free(parser->nextval[nexttoke].opval);
785 parser->nextval[nexttoke].opval = NULL;
792 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
794 Buffer scalar containing the chunk currently under consideration of the
795 text currently being lexed. This is always a plain string scalar (for
796 which C<SvPOK> is true). It is not intended to be used as a scalar by
797 normal scalar means; instead refer to the buffer directly by the pointer
798 variables described below.
800 The lexer maintains various C<char*> pointers to things in the
801 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
802 reallocated, all of these pointers must be updated. Don't attempt to
803 do this manually, but rather use L</lex_grow_linestr> if you need to
804 reallocate the buffer.
806 The content of the text chunk in the buffer is commonly exactly one
807 complete line of input, up to and including a newline terminator,
808 but there are situations where it is otherwise. The octets of the
809 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
810 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
811 flag on this scalar, which may disagree with it.
813 For direct examination of the buffer, the variable
814 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
815 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
816 of these pointers is usually preferable to examination of the scalar
817 through normal scalar means.
819 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
821 Direct pointer to the end of the chunk of text currently being lexed, the
822 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
823 + SvCUR(PL_parser-E<gt>linestr)>. A C<NUL> character (zero octet) is
824 always located at the end of the buffer, and does not count as part of
825 the buffer's contents.
827 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
829 Points to the current position of lexing inside the lexer buffer.
830 Characters around this point may be freely examined, within
831 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
832 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
833 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
835 Lexing code (whether in the Perl core or not) moves this pointer past
836 the characters that it consumes. It is also expected to perform some
837 bookkeeping whenever a newline character is consumed. This movement
838 can be more conveniently performed by the function L</lex_read_to>,
839 which handles newlines appropriately.
841 Interpretation of the buffer's octets can be abstracted out by
842 using the slightly higher-level functions L</lex_peek_unichar> and
843 L</lex_read_unichar>.
845 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
847 Points to the start of the current line inside the lexer buffer.
848 This is useful for indicating at which column an error occurred, and
849 not much else. This must be updated by any lexing code that consumes
850 a newline; the function L</lex_read_to> handles this detail.
856 =for apidoc Amx|bool|lex_bufutf8
858 Indicates whether the octets in the lexer buffer
859 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
860 of Unicode characters. If not, they should be interpreted as Latin-1
861 characters. This is analogous to the C<SvUTF8> flag for scalars.
863 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
864 contains valid UTF-8. Lexing code must be robust in the face of invalid
867 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
868 is significant, but not the whole story regarding the input character
869 encoding. Normally, when a file is being read, the scalar contains octets
870 and its C<SvUTF8> flag is off, but the octets should be interpreted as
871 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
872 however, the scalar may have the C<SvUTF8> flag on, and in this case its
873 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
874 is in effect. This logic may change in the future; use this function
875 instead of implementing the logic yourself.
881 Perl_lex_bufutf8(pTHX)
887 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
889 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
890 at least C<len> octets (including terminating C<NUL>). Returns a
891 pointer to the reallocated buffer. This is necessary before making
892 any direct modification of the buffer that would increase its length.
893 L</lex_stuff_pvn> provides a more convenient way to insert text into
896 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
897 this function updates all of the lexer's variables that point directly
904 Perl_lex_grow_linestr(pTHX_ STRLEN len)
908 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
909 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
910 linestr = PL_parser->linestr;
911 buf = SvPVX(linestr);
912 if (len <= SvLEN(linestr))
914 bufend_pos = PL_parser->bufend - buf;
915 bufptr_pos = PL_parser->bufptr - buf;
916 oldbufptr_pos = PL_parser->oldbufptr - buf;
917 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
918 linestart_pos = PL_parser->linestart - buf;
919 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
920 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
921 re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
922 PL_parser->lex_shared->re_eval_start - buf : 0;
924 buf = sv_grow(linestr, len);
926 PL_parser->bufend = buf + bufend_pos;
927 PL_parser->bufptr = buf + bufptr_pos;
928 PL_parser->oldbufptr = buf + oldbufptr_pos;
929 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
930 PL_parser->linestart = buf + linestart_pos;
931 if (PL_parser->last_uni)
932 PL_parser->last_uni = buf + last_uni_pos;
933 if (PL_parser->last_lop)
934 PL_parser->last_lop = buf + last_lop_pos;
935 if (PL_parser->lex_shared->re_eval_start)
936 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
941 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
943 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
944 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
945 reallocating the buffer if necessary. This means that lexing code that
946 runs later will see the characters as if they had appeared in the input.
947 It is not recommended to do this as part of normal parsing, and most
948 uses of this facility run the risk of the inserted characters being
949 interpreted in an unintended manner.
951 The string to be inserted is represented by C<len> octets starting
952 at C<pv>. These octets are interpreted as either UTF-8 or Latin-1,
953 according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
954 The characters are recoded for the lexer buffer, according to how the
955 buffer is currently being interpreted (L</lex_bufutf8>). If a string
956 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
957 function is more convenient.
963 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
967 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
968 if (flags & ~(LEX_STUFF_UTF8))
969 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
971 if (flags & LEX_STUFF_UTF8) {
974 STRLEN highhalf = 0; /* Count of variants */
975 const char *p, *e = pv+len;
976 for (p = pv; p != e; p++) {
977 if (! UTF8_IS_INVARIANT(*p)) {
983 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
984 bufptr = PL_parser->bufptr;
985 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
986 SvCUR_set(PL_parser->linestr,
987 SvCUR(PL_parser->linestr) + len+highhalf);
988 PL_parser->bufend += len+highhalf;
989 for (p = pv; p != e; p++) {
991 if (! UTF8_IS_INVARIANT(c)) {
992 *bufptr++ = UTF8_TWO_BYTE_HI(c);
993 *bufptr++ = UTF8_TWO_BYTE_LO(c);
1000 if (flags & LEX_STUFF_UTF8) {
1001 STRLEN highhalf = 0;
1002 const char *p, *e = pv+len;
1003 for (p = pv; p != e; p++) {
1005 if (UTF8_IS_ABOVE_LATIN1(c)) {
1006 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1007 "non-Latin-1 character into Latin-1 input");
1008 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1011 } else if (! UTF8_IS_INVARIANT(c)) {
1012 /* malformed UTF-8 */
1014 SAVESPTR(PL_warnhook);
1015 PL_warnhook = PERL_WARNHOOK_FATAL;
1016 utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
1022 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1023 bufptr = PL_parser->bufptr;
1024 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1025 SvCUR_set(PL_parser->linestr,
1026 SvCUR(PL_parser->linestr) + len-highhalf);
1027 PL_parser->bufend += len-highhalf;
1030 if (UTF8_IS_INVARIANT(*p)) {
1036 *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
1042 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1043 bufptr = PL_parser->bufptr;
1044 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1045 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1046 PL_parser->bufend += len;
1047 Copy(pv, bufptr, len, char);
1053 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1055 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1056 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1057 reallocating the buffer if necessary. This means that lexing code that
1058 runs later will see the characters as if they had appeared in the input.
1059 It is not recommended to do this as part of normal parsing, and most
1060 uses of this facility run the risk of the inserted characters being
1061 interpreted in an unintended manner.
1063 The string to be inserted is represented by octets starting at C<pv>
1064 and continuing to the first nul. These octets are interpreted as either
1065 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1066 in C<flags>. The characters are recoded for the lexer buffer, according
1067 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1068 If it is not convenient to nul-terminate a string to be inserted, the
1069 L</lex_stuff_pvn> function is more appropriate.
1075 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1077 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1078 lex_stuff_pvn(pv, strlen(pv), flags);
1082 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1084 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1085 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1086 reallocating the buffer if necessary. This means that lexing code that
1087 runs later will see the characters as if they had appeared in the input.
1088 It is not recommended to do this as part of normal parsing, and most
1089 uses of this facility run the risk of the inserted characters being
1090 interpreted in an unintended manner.
1092 The string to be inserted is the string value of C<sv>. The characters
1093 are recoded for the lexer buffer, according to how the buffer is currently
1094 being interpreted (L</lex_bufutf8>). If a string to be inserted is
1095 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1096 need to construct a scalar.
1102 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1106 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1108 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1110 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1114 =for apidoc Amx|void|lex_unstuff|char *ptr
1116 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1117 C<ptr>. Text following C<ptr> will be moved, and the buffer shortened.
1118 This hides the discarded text from any lexing code that runs later,
1119 as if the text had never appeared.
1121 This is not the normal way to consume lexed text. For that, use
1128 Perl_lex_unstuff(pTHX_ char *ptr)
1132 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1133 buf = PL_parser->bufptr;
1135 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1138 bufend = PL_parser->bufend;
1140 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1141 unstuff_len = ptr - buf;
1142 Move(ptr, buf, bufend+1-ptr, char);
1143 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1144 PL_parser->bufend = bufend - unstuff_len;
1148 =for apidoc Amx|void|lex_read_to|char *ptr
1150 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1151 to C<ptr>. This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
1152 performing the correct bookkeeping whenever a newline character is passed.
1153 This is the normal way to consume lexed text.
1155 Interpretation of the buffer's octets can be abstracted out by
1156 using the slightly higher-level functions L</lex_peek_unichar> and
1157 L</lex_read_unichar>.
1163 Perl_lex_read_to(pTHX_ char *ptr)
1166 PERL_ARGS_ASSERT_LEX_READ_TO;
1167 s = PL_parser->bufptr;
1168 if (ptr < s || ptr > PL_parser->bufend)
1169 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1170 for (; s != ptr; s++)
1172 COPLINE_INC_WITH_HERELINES;
1173 PL_parser->linestart = s+1;
1175 PL_parser->bufptr = ptr;
1179 =for apidoc Amx|void|lex_discard_to|char *ptr
1181 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1182 up to C<ptr>. The remaining content of the buffer will be moved, and
1183 all pointers into the buffer updated appropriately. C<ptr> must not
1184 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1185 it is not permitted to discard text that has yet to be lexed.
1187 Normally it is not necessarily to do this directly, because it suffices to
1188 use the implicit discarding behaviour of L</lex_next_chunk> and things
1189 based on it. However, if a token stretches across multiple lines,
1190 and the lexing code has kept multiple lines of text in the buffer for
1191 that purpose, then after completion of the token it would be wise to
1192 explicitly discard the now-unneeded earlier lines, to avoid future
1193 multi-line tokens growing the buffer without bound.
1199 Perl_lex_discard_to(pTHX_ char *ptr)
1203 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1204 buf = SvPVX(PL_parser->linestr);
1206 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1209 if (ptr > PL_parser->bufptr)
1210 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1211 discard_len = ptr - buf;
1212 if (PL_parser->oldbufptr < ptr)
1213 PL_parser->oldbufptr = ptr;
1214 if (PL_parser->oldoldbufptr < ptr)
1215 PL_parser->oldoldbufptr = ptr;
1216 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1217 PL_parser->last_uni = NULL;
1218 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1219 PL_parser->last_lop = NULL;
1220 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1221 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1222 PL_parser->bufend -= discard_len;
1223 PL_parser->bufptr -= discard_len;
1224 PL_parser->oldbufptr -= discard_len;
1225 PL_parser->oldoldbufptr -= discard_len;
1226 if (PL_parser->last_uni)
1227 PL_parser->last_uni -= discard_len;
1228 if (PL_parser->last_lop)
1229 PL_parser->last_lop -= discard_len;
1233 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1235 Reads in the next chunk of text to be lexed, appending it to
1236 L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1237 looked to the end of the current chunk and wants to know more. It is
1238 usual, but not necessary, for lexing to have consumed the entirety of
1239 the current chunk at this time.
1241 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1242 chunk (i.e., the current chunk has been entirely consumed), normally the
1243 current chunk will be discarded at the same time that the new chunk is
1244 read in. If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
1245 will not be discarded. If the current chunk has not been entirely
1246 consumed, then it will not be discarded regardless of the flag.
1248 Returns true if some new text was added to the buffer, or false if the
1249 buffer has reached the end of the input text.
1254 #define LEX_FAKE_EOF 0x80000000
1255 #define LEX_NO_TERM 0x40000000 /* here-doc */
1258 Perl_lex_next_chunk(pTHX_ U32 flags)
1262 STRLEN old_bufend_pos, new_bufend_pos;
1263 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1264 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1265 bool got_some_for_debugger = 0;
1267 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1268 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1269 if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1271 linestr = PL_parser->linestr;
1272 buf = SvPVX(linestr);
1273 if (!(flags & LEX_KEEP_PREVIOUS)
1274 && PL_parser->bufptr == PL_parser->bufend)
1276 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1278 if (PL_parser->last_uni != PL_parser->bufend)
1279 PL_parser->last_uni = NULL;
1280 if (PL_parser->last_lop != PL_parser->bufend)
1281 PL_parser->last_lop = NULL;
1282 last_uni_pos = last_lop_pos = 0;
1286 old_bufend_pos = PL_parser->bufend - buf;
1287 bufptr_pos = PL_parser->bufptr - buf;
1288 oldbufptr_pos = PL_parser->oldbufptr - buf;
1289 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1290 linestart_pos = PL_parser->linestart - buf;
1291 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1292 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1294 if (flags & LEX_FAKE_EOF) {
1296 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1298 } else if (filter_gets(linestr, old_bufend_pos)) {
1300 got_some_for_debugger = 1;
1301 } else if (flags & LEX_NO_TERM) {
1304 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1305 sv_setpvs(linestr, "");
1307 /* End of real input. Close filehandle (unless it was STDIN),
1308 * then add implicit termination.
1310 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1311 PerlIO_clearerr(PL_parser->rsfp);
1312 else if (PL_parser->rsfp)
1313 (void)PerlIO_close(PL_parser->rsfp);
1314 PL_parser->rsfp = NULL;
1315 PL_parser->in_pod = PL_parser->filtered = 0;
1316 if (!PL_in_eval && PL_minus_p) {
1318 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1319 PL_minus_n = PL_minus_p = 0;
1320 } else if (!PL_in_eval && PL_minus_n) {
1321 sv_catpvs(linestr, /*{*/";}");
1324 sv_catpvs(linestr, ";");
1327 buf = SvPVX(linestr);
1328 new_bufend_pos = SvCUR(linestr);
1329 PL_parser->bufend = buf + new_bufend_pos;
1330 PL_parser->bufptr = buf + bufptr_pos;
1331 PL_parser->oldbufptr = buf + oldbufptr_pos;
1332 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1333 PL_parser->linestart = buf + linestart_pos;
1334 if (PL_parser->last_uni)
1335 PL_parser->last_uni = buf + last_uni_pos;
1336 if (PL_parser->last_lop)
1337 PL_parser->last_lop = buf + last_lop_pos;
1338 if (PL_parser->preambling != NOLINE) {
1339 CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1340 PL_parser->preambling = NOLINE;
1342 if ( got_some_for_debugger
1343 && PERLDB_LINE_OR_SAVESRC
1344 && PL_curstash != PL_debstash)
1346 /* debugger active and we're not compiling the debugger code,
1347 * so store the line into the debugger's array of lines
1349 update_debugger_info(NULL, buf+old_bufend_pos,
1350 new_bufend_pos-old_bufend_pos);
1356 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1358 Looks ahead one (Unicode) character in the text currently being lexed.
1359 Returns the codepoint (unsigned integer value) of the next character,
1360 or -1 if lexing has reached the end of the input text. To consume the
1361 peeked character, use L</lex_read_unichar>.
1363 If the next character is in (or extends into) the next chunk of input
1364 text, the next chunk will be read in. Normally the current chunk will be
1365 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1366 bit set, then the current chunk will not be discarded.
1368 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1369 is encountered, an exception is generated.
1375 Perl_lex_peek_unichar(pTHX_ U32 flags)
1379 if (flags & ~(LEX_KEEP_PREVIOUS))
1380 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1381 s = PL_parser->bufptr;
1382 bufend = PL_parser->bufend;
1388 if (!lex_next_chunk(flags))
1390 s = PL_parser->bufptr;
1391 bufend = PL_parser->bufend;
1394 if (UTF8_IS_INVARIANT(head))
1396 if (UTF8_IS_START(head)) {
1397 len = UTF8SKIP(&head);
1398 while ((STRLEN)(bufend-s) < len) {
1399 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1401 s = PL_parser->bufptr;
1402 bufend = PL_parser->bufend;
1405 unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1406 if (retlen == (STRLEN)-1) {
1407 /* malformed UTF-8 */
1409 SAVESPTR(PL_warnhook);
1410 PL_warnhook = PERL_WARNHOOK_FATAL;
1411 utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
1417 if (!lex_next_chunk(flags))
1419 s = PL_parser->bufptr;
1426 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1428 Reads the next (Unicode) character in the text currently being lexed.
1429 Returns the codepoint (unsigned integer value) of the character read,
1430 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1431 if lexing has reached the end of the input text. To non-destructively
1432 examine the next character, use L</lex_peek_unichar> instead.
1434 If the next character is in (or extends into) the next chunk of input
1435 text, the next chunk will be read in. Normally the current chunk will be
1436 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1437 bit set, then the current chunk will not be discarded.
1439 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1440 is encountered, an exception is generated.
1446 Perl_lex_read_unichar(pTHX_ U32 flags)
1449 if (flags & ~(LEX_KEEP_PREVIOUS))
1450 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1451 c = lex_peek_unichar(flags);
1454 COPLINE_INC_WITH_HERELINES;
1456 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1458 ++(PL_parser->bufptr);
1464 =for apidoc Amx|void|lex_read_space|U32 flags
1466 Reads optional spaces, in Perl style, in the text currently being
1467 lexed. The spaces may include ordinary whitespace characters and
1468 Perl-style comments. C<#line> directives are processed if encountered.
1469 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1470 at a non-space character (or the end of the input text).
1472 If spaces extend into the next chunk of input text, the next chunk will
1473 be read in. Normally the current chunk will be discarded at the same
1474 time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
1475 chunk will not be discarded.
1480 #define LEX_NO_INCLINE 0x40000000
1481 #define LEX_NO_NEXT_CHUNK 0x80000000
1484 Perl_lex_read_space(pTHX_ U32 flags)
1487 const bool can_incline = !(flags & LEX_NO_INCLINE);
1488 bool need_incline = 0;
1489 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1490 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1491 s = PL_parser->bufptr;
1492 bufend = PL_parser->bufend;
1498 } while (!(c == '\n' || (c == 0 && s == bufend)));
1499 } else if (c == '\n') {
1502 PL_parser->linestart = s;
1508 } else if (isSPACE(c)) {
1510 } else if (c == 0 && s == bufend) {
1513 if (flags & LEX_NO_NEXT_CHUNK)
1515 PL_parser->bufptr = s;
1516 l = CopLINE(PL_curcop);
1517 CopLINE(PL_curcop) += PL_parser->herelines + 1;
1518 got_more = lex_next_chunk(flags);
1519 CopLINE_set(PL_curcop, l);
1520 s = PL_parser->bufptr;
1521 bufend = PL_parser->bufend;
1524 if (can_incline && need_incline && PL_parser->rsfp) {
1534 PL_parser->bufptr = s;
1539 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1541 This function performs syntax checking on a prototype, C<proto>.
1542 If C<warn> is true, any illegal characters or mismatched brackets
1543 will trigger illegalproto warnings, declaring that they were
1544 detected in the prototype for C<name>.
1546 The return value is C<true> if this is a valid prototype, and
1547 C<false> if it is not, regardless of whether C<warn> was C<true> or
1550 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1557 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
1559 STRLEN len, origlen;
1560 char *p = proto ? SvPV(proto, len) : NULL;
1561 bool bad_proto = FALSE;
1562 bool in_brackets = FALSE;
1563 bool after_slash = FALSE;
1564 char greedy_proto = ' ';
1565 bool proto_after_greedy_proto = FALSE;
1566 bool must_be_last = FALSE;
1567 bool underscore = FALSE;
1568 bool bad_proto_after_underscore = FALSE;
1570 PERL_ARGS_ASSERT_VALIDATE_PROTO;
1576 for (; len--; p++) {
1579 proto_after_greedy_proto = TRUE;
1581 if (!strchr(";@%", *p))
1582 bad_proto_after_underscore = TRUE;
1585 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1592 in_brackets = FALSE;
1593 else if ((*p == '@' || *p == '%')
1597 must_be_last = TRUE;
1606 after_slash = FALSE;
1611 SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1614 ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1615 origlen, UNI_DISPLAY_ISPRINT)
1616 : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1618 if (proto_after_greedy_proto)
1619 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1620 "Prototype after '%c' for %"SVf" : %s",
1621 greedy_proto, SVfARG(name), p);
1623 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1624 "Missing ']' in prototype for %"SVf" : %s",
1627 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1628 "Illegal character in prototype for %"SVf" : %s",
1630 if (bad_proto_after_underscore)
1631 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1632 "Illegal character after '_' in prototype for %"SVf" : %s",
1636 return (! (proto_after_greedy_proto || bad_proto) );
1641 * This subroutine has nothing to do with tilting, whether at windmills
1642 * or pinball tables. Its name is short for "increment line". It
1643 * increments the current line number in CopLINE(PL_curcop) and checks
1644 * to see whether the line starts with a comment of the form
1645 * # line 500 "foo.pm"
1646 * If so, it sets the current line number and file to the values in the comment.
1650 S_incline(pTHX_ const char *s)
1658 PERL_ARGS_ASSERT_INCLINE;
1660 COPLINE_INC_WITH_HERELINES;
1661 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1662 && s+1 == PL_bufend && *s == ';') {
1663 /* fake newline in string eval */
1664 CopLINE_dec(PL_curcop);
1669 while (SPACE_OR_TAB(*s))
1671 if (strnEQ(s, "line", 4))
1675 if (SPACE_OR_TAB(*s))
1679 while (SPACE_OR_TAB(*s))
1687 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1689 while (SPACE_OR_TAB(*s))
1691 if (*s == '"' && (t = strchr(s+1, '"'))) {
1697 while (*t && !isSPACE(*t))
1701 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1703 if (*e != '\n' && *e != '\0')
1704 return; /* false alarm */
1706 if (!grok_atoUV(n, &uv, &e))
1708 line_num = ((line_t)uv) - 1;
1711 const STRLEN len = t - s;
1713 if (!PL_rsfp && !PL_parser->filtered) {
1714 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1715 * to *{"::_<newfilename"} */
1716 /* However, the long form of evals is only turned on by the
1717 debugger - usually they're "(eval %lu)" */
1718 GV * const cfgv = CopFILEGV(PL_curcop);
1721 STRLEN tmplen2 = len;
1725 if (tmplen2 + 2 <= sizeof smallbuf)
1728 Newx(tmpbuf2, tmplen2 + 2, char);
1733 memcpy(tmpbuf2 + 2, s, tmplen2);
1736 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1738 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1739 /* adjust ${"::_<newfilename"} to store the new file name */
1740 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1741 /* The line number may differ. If that is the case,
1742 alias the saved lines that are in the array.
1743 Otherwise alias the whole array. */
1744 if (CopLINE(PL_curcop) == line_num) {
1745 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1746 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1748 else if (GvAV(cfgv)) {
1749 AV * const av = GvAV(cfgv);
1750 const I32 start = CopLINE(PL_curcop)+1;
1751 I32 items = AvFILLp(av) - start;
1753 AV * const av2 = GvAVn(gv2);
1754 SV **svp = AvARRAY(av) + start;
1755 I32 l = (I32)line_num+1;
1757 av_store(av2, l++, SvREFCNT_inc(*svp++));
1762 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1765 CopFILE_free(PL_curcop);
1766 CopFILE_setn(PL_curcop, s, len);
1768 CopLINE_set(PL_curcop, line_num);
1771 #define skipspace(s) skipspace_flags(s, 0)
1775 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1777 AV *av = CopFILEAVx(PL_curcop);
1780 if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1782 sv = *av_fetch(av, 0, 1);
1783 SvUPGRADE(sv, SVt_PVMG);
1785 if (!SvPOK(sv)) sv_setpvs(sv,"");
1787 sv_catsv(sv, orig_sv);
1789 sv_catpvn(sv, buf, len);
1794 if (PL_parser->preambling == NOLINE)
1795 av_store(av, CopLINE(PL_curcop), sv);
1801 * Called to gobble the appropriate amount and type of whitespace.
1802 * Skips comments as well.
1806 S_skipspace_flags(pTHX_ char *s, U32 flags)
1808 PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1809 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1810 while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
1813 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1815 lex_read_space(flags | LEX_KEEP_PREVIOUS |
1816 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
1817 LEX_NO_NEXT_CHUNK : 0));
1819 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1820 if (PL_linestart > PL_bufptr)
1821 PL_bufptr = PL_linestart;
1829 * Check the unary operators to ensure there's no ambiguity in how they're
1830 * used. An ambiguous piece of code would be:
1832 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1833 * the +5 is its argument.
1842 if (PL_oldoldbufptr != PL_last_uni)
1844 while (isSPACE(*PL_last_uni))
1847 while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
1848 s += UTF ? UTF8SKIP(s) : 1;
1849 if ((t = strchr(s, '(')) && t < PL_bufptr)
1852 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1853 "Warning: Use of \"%"UTF8f"\" without parentheses is ambiguous",
1854 UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
1858 * LOP : macro to build a list operator. Its behaviour has been replaced
1859 * with a subroutine, S_lop() for which LOP is just another name.
1862 #define LOP(f,x) return lop(f,x,s)
1866 * Build a list operator (or something that might be one). The rules:
1867 * - if we have a next token, then it's a list operator (no parens) for
1868 * which the next token has already been parsed; e.g.,
1871 * - if the next thing is an opening paren, then it's a function
1872 * - else it's a list operator
1876 S_lop(pTHX_ I32 f, int x, char *s)
1878 PERL_ARGS_ASSERT_LOP;
1883 PL_last_lop = PL_oldbufptr;
1884 PL_last_lop_op = (OPCODE)f;
1889 return REPORT(FUNC);
1892 return REPORT(FUNC);
1895 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1896 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1897 return REPORT(LSTOP);
1903 * When the lexer realizes it knows the next token (for instance,
1904 * it is reordering tokens for the parser) then it can call S_force_next
1905 * to know what token to return the next time the lexer is called. Caller
1906 * will need to set PL_nextval[] and possibly PL_expect to ensure
1907 * the lexer handles the token correctly.
1911 S_force_next(pTHX_ I32 type)
1915 PerlIO_printf(Perl_debug_log, "### forced token:\n");
1916 tokereport(type, &NEXTVAL_NEXTTOKE);
1919 assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
1920 PL_nexttype[PL_nexttoke] = type;
1922 if (PL_lex_state != LEX_KNOWNEXT) {
1923 PL_lex_defer = PL_lex_state;
1924 PL_lex_state = LEX_KNOWNEXT;
1931 * This subroutine handles postfix deref syntax after the arrow has already
1932 * been emitted. @* $* etc. are emitted as two separate token right here.
1933 * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
1934 * only the first, leaving yylex to find the next.
1938 S_postderef(pTHX_ int const funny, char const next)
1940 assert(funny == DOLSHARP || strchr("$@%&*", funny));
1941 assert(strchr("*[{", next));
1943 PL_expect = XOPERATOR;
1944 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
1945 assert('@' == funny || '$' == funny || DOLSHARP == funny);
1946 PL_lex_state = LEX_INTERPEND;
1947 force_next(POSTJOIN);
1953 if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
1954 && !PL_lex_brackets)
1956 PL_expect = XOPERATOR;
1965 int yyc = PL_parser->yychar;
1966 if (yyc != YYEMPTY) {
1968 NEXTVAL_NEXTTOKE = PL_parser->yylval;
1969 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
1970 PL_lex_allbrackets--;
1972 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1973 } else if (yyc == '('/*)*/) {
1974 PL_lex_allbrackets--;
1979 PL_parser->yychar = YYEMPTY;
1984 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
1986 SV * const sv = newSVpvn_utf8(start, len,
1989 && !is_invariant_string((const U8*)start, len)
1990 && is_utf8_string((const U8*)start, len));
1996 * When the lexer knows the next thing is a word (for instance, it has
1997 * just seen -> and it knows that the next char is a word char, then
1998 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2002 * char *start : buffer position (must be within PL_linestr)
2003 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2004 * int check_keyword : if true, Perl checks to make sure the word isn't
2005 * a keyword (do this if the word is a label, e.g. goto FOO)
2006 * int allow_pack : if true, : characters will also be allowed (require,
2007 * use, etc. do this)
2011 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2016 PERL_ARGS_ASSERT_FORCE_WORD;
2018 start = skipspace(start);
2020 if (isIDFIRST_lazy_if(s,UTF)
2021 || (allow_pack && *s == ':') )
2023 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2024 if (check_keyword) {
2025 char *s2 = PL_tokenbuf;
2027 if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
2029 if (keyword(s2, len2, 0))
2032 if (token == METHOD) {
2037 PL_expect = XOPERATOR;
2040 NEXTVAL_NEXTTOKE.opval
2041 = (OP*)newSVOP(OP_CONST,0,
2042 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2043 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2051 * Called when the lexer wants $foo *foo &foo etc, but the program
2052 * text only contains the "foo" portion. The first argument is a pointer
2053 * to the "foo", and the second argument is the type symbol to prefix.
2054 * Forces the next token to be a "WORD".
2055 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2059 S_force_ident(pTHX_ const char *s, int kind)
2061 PERL_ARGS_ASSERT_FORCE_IDENT;
2064 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2065 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2066 UTF ? SVf_UTF8 : 0));
2067 NEXTVAL_NEXTTOKE.opval = o;
2070 o->op_private = OPpCONST_ENTERED;
2071 /* XXX see note in pp_entereval() for why we forgo typo
2072 warnings if the symbol must be introduced in an eval.
2074 gv_fetchpvn_flags(s, len,
2075 (PL_in_eval ? GV_ADDMULTI
2076 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2077 kind == '$' ? SVt_PV :
2078 kind == '@' ? SVt_PVAV :
2079 kind == '%' ? SVt_PVHV :
2087 S_force_ident_maybe_lex(pTHX_ char pit)
2089 NEXTVAL_NEXTTOKE.ival = pit;
2094 Perl_str_to_version(pTHX_ SV *sv)
2099 const char *start = SvPV_const(sv,len);
2100 const char * const end = start + len;
2101 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2103 PERL_ARGS_ASSERT_STR_TO_VERSION;
2105 while (start < end) {
2109 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2114 retval += ((NV)n)/nshift;
2123 * Forces the next token to be a version number.
2124 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2125 * and if "guessing" is TRUE, then no new token is created (and the caller
2126 * must use an alternative parsing method).
2130 S_force_version(pTHX_ char *s, int guessing)
2135 PERL_ARGS_ASSERT_FORCE_VERSION;
2143 while (isDIGIT(*d) || *d == '_' || *d == '.')
2145 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2147 s = scan_num(s, &pl_yylval);
2148 version = pl_yylval.opval;
2149 ver = cSVOPx(version)->op_sv;
2150 if (SvPOK(ver) && !SvNIOK(ver)) {
2151 SvUPGRADE(ver, SVt_PVNV);
2152 SvNV_set(ver, str_to_version(ver));
2153 SvNOK_on(ver); /* hint that it is a version */
2156 else if (guessing) {
2161 /* NOTE: The parser sees the package name and the VERSION swapped */
2162 NEXTVAL_NEXTTOKE.opval = version;
2169 * S_force_strict_version
2170 * Forces the next token to be a version number using strict syntax rules.
2174 S_force_strict_version(pTHX_ char *s)
2177 const char *errstr = NULL;
2179 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2181 while (isSPACE(*s)) /* leading whitespace */
2184 if (is_STRICT_VERSION(s,&errstr)) {
2186 s = (char *)scan_version(s, ver, 0);
2187 version = newSVOP(OP_CONST, 0, ver);
2189 else if ((*s != ';' && *s != '{' && *s != '}' )
2190 && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
2194 yyerror(errstr); /* version required */
2198 /* NOTE: The parser sees the package name and the VERSION swapped */
2199 NEXTVAL_NEXTTOKE.opval = version;
2207 * Tokenize a quoted string passed in as an SV. It finds the next
2208 * chunk, up to end of string or a backslash. It may make a new
2209 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2214 S_tokeq(pTHX_ SV *sv)
2221 PERL_ARGS_ASSERT_TOKEQ;
2225 assert (!SvIsCOW(sv));
2226 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2230 /* This is relying on the SV being "well formed" with a trailing '\0' */
2231 while (s < send && !(*s == '\\' && s[1] == '\\'))
2236 if ( PL_hints & HINT_NEW_STRING ) {
2237 pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2238 SVs_TEMP | SvUTF8(sv));
2242 if (s + 1 < send && (s[1] == '\\'))
2243 s++; /* all that, just for this */
2248 SvCUR_set(sv, d - SvPVX_const(sv));
2250 if ( PL_hints & HINT_NEW_STRING )
2251 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2256 * Now come three functions related to double-quote context,
2257 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2258 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2259 * interact with PL_lex_state, and create fake ( ... ) argument lists
2260 * to handle functions and concatenation.
2264 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
2269 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2271 * Pattern matching will set PL_lex_op to the pattern-matching op to
2272 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2274 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2276 * Everything else becomes a FUNC.
2278 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2279 * had an OP_CONST or OP_READLINE). This just sets us up for a
2280 * call to S_sublex_push().
2284 S_sublex_start(pTHX)
2286 const I32 op_type = pl_yylval.ival;
2288 if (op_type == OP_NULL) {
2289 pl_yylval.opval = PL_lex_op;
2293 if (op_type == OP_CONST) {
2294 SV *sv = PL_lex_stuff;
2295 PL_lex_stuff = NULL;
2298 if (SvTYPE(sv) == SVt_PVIV) {
2299 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2301 const char * const p = SvPV_const(sv, len);
2302 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2306 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2310 PL_sublex_info.super_state = PL_lex_state;
2311 PL_sublex_info.sub_inwhat = (U16)op_type;
2312 PL_sublex_info.sub_op = PL_lex_op;
2313 PL_lex_state = LEX_INTERPPUSH;
2317 pl_yylval.opval = PL_lex_op;
2327 * Create a new scope to save the lexing state. The scope will be
2328 * ended in S_sublex_done. Returns a '(', starting the function arguments
2329 * to the uc, lc, etc. found before.
2330 * Sets PL_lex_state to LEX_INTERPCONCAT.
2337 const bool is_heredoc = PL_multi_close == '<';
2340 PL_lex_state = PL_sublex_info.super_state;
2341 SAVEI8(PL_lex_dojoin);
2342 SAVEI32(PL_lex_brackets);
2343 SAVEI32(PL_lex_allbrackets);
2344 SAVEI32(PL_lex_formbrack);
2345 SAVEI8(PL_lex_fakeeof);
2346 SAVEI32(PL_lex_casemods);
2347 SAVEI32(PL_lex_starts);
2348 SAVEI8(PL_lex_state);
2349 SAVEI8(PL_lex_defer);
2350 SAVESPTR(PL_lex_repl);
2351 SAVEVPTR(PL_lex_inpat);
2352 SAVEI16(PL_lex_inwhat);
2355 SAVECOPLINE(PL_curcop);
2356 SAVEI32(PL_multi_end);
2357 SAVEI32(PL_parser->herelines);
2358 PL_parser->herelines = 0;
2360 SAVEI8(PL_multi_close);
2361 SAVEPPTR(PL_bufptr);
2362 SAVEPPTR(PL_bufend);
2363 SAVEPPTR(PL_oldbufptr);
2364 SAVEPPTR(PL_oldoldbufptr);
2365 SAVEPPTR(PL_last_lop);
2366 SAVEPPTR(PL_last_uni);
2367 SAVEPPTR(PL_linestart);
2368 SAVESPTR(PL_linestr);
2369 SAVEGENERICPV(PL_lex_brackstack);
2370 SAVEGENERICPV(PL_lex_casestack);
2371 SAVEGENERICPV(PL_parser->lex_shared);
2372 SAVEBOOL(PL_parser->lex_re_reparsing);
2373 SAVEI32(PL_copline);
2375 /* The here-doc parser needs to be able to peek into outer lexing
2376 scopes to find the body of the here-doc. So we put PL_linestr and
2377 PL_bufptr into lex_shared, to ‘share’ those values.
2379 PL_parser->lex_shared->ls_linestr = PL_linestr;
2380 PL_parser->lex_shared->ls_bufptr = PL_bufptr;
2382 PL_linestr = PL_lex_stuff;
2383 PL_lex_repl = PL_sublex_info.repl;
2384 PL_lex_stuff = NULL;
2385 PL_sublex_info.repl = NULL;
2387 /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2388 set for an inner quote-like operator and then an error causes scope-
2389 popping. We must not have a PL_lex_stuff value left dangling, as
2390 that breaks assumptions elsewhere. See bug #123617. */
2391 SAVEGENERICSV(PL_lex_stuff);
2392 SAVEGENERICSV(PL_sublex_info.repl);
2394 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2395 = SvPVX(PL_linestr);
2396 PL_bufend += SvCUR(PL_linestr);
2397 PL_last_lop = PL_last_uni = NULL;
2398 SAVEFREESV(PL_linestr);
2399 if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2401 PL_lex_dojoin = FALSE;
2402 PL_lex_brackets = PL_lex_formbrack = 0;
2403 PL_lex_allbrackets = 0;
2404 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2405 Newx(PL_lex_brackstack, 120, char);
2406 Newx(PL_lex_casestack, 12, char);
2407 PL_lex_casemods = 0;
2408 *PL_lex_casestack = '\0';
2410 PL_lex_state = LEX_INTERPCONCAT;
2412 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2413 PL_copline = NOLINE;
2415 Newxz(shared, 1, LEXSHARED);
2416 shared->ls_prev = PL_parser->lex_shared;
2417 PL_parser->lex_shared = shared;
2419 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2420 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2421 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2422 PL_lex_inpat = PL_sublex_info.sub_op;
2424 PL_lex_inpat = NULL;
2426 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2427 PL_in_eval &= ~EVAL_RE_REPARSING;
2434 * Restores lexer state after a S_sublex_push.
2440 if (!PL_lex_starts++) {
2441 SV * const sv = newSVpvs("");
2442 if (SvUTF8(PL_linestr))
2444 PL_expect = XOPERATOR;
2445 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2449 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2450 PL_lex_state = LEX_INTERPCASEMOD;
2454 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2455 assert(PL_lex_inwhat != OP_TRANSR);
2457 assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2458 PL_linestr = PL_lex_repl;
2460 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2461 PL_bufend += SvCUR(PL_linestr);
2462 PL_last_lop = PL_last_uni = NULL;
2463 PL_lex_dojoin = FALSE;
2464 PL_lex_brackets = 0;
2465 PL_lex_allbrackets = 0;
2466 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2467 PL_lex_casemods = 0;
2468 *PL_lex_casestack = '\0';
2470 if (SvEVALED(PL_lex_repl)) {
2471 PL_lex_state = LEX_INTERPNORMAL;
2473 /* we don't clear PL_lex_repl here, so that we can check later
2474 whether this is an evalled subst; that means we rely on the
2475 logic to ensure sublex_done() is called again only via the
2476 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2479 PL_lex_state = LEX_INTERPCONCAT;
2482 if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2483 CopLINE(PL_curcop) +=
2484 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xpad_cop_seq.xlow
2485 + PL_parser->herelines;
2486 PL_parser->herelines = 0;
2491 const line_t l = CopLINE(PL_curcop);
2493 if (PL_multi_close == '<')
2494 PL_parser->herelines += l - PL_multi_end;
2495 PL_bufend = SvPVX(PL_linestr);
2496 PL_bufend += SvCUR(PL_linestr);
2497 PL_expect = XOPERATOR;
2502 PERL_STATIC_INLINE SV*
2503 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2505 /* <s> points to first character of interior of \N{}, <e> to one beyond the
2506 * interior, hence to the "}". Finds what the name resolves to, returning
2507 * an SV* containing it; NULL if no valid one found */
2509 SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2516 const U8* first_bad_char_loc;
2517 const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2519 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2522 Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
2523 "Unknown charname '' is deprecated");
2527 if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2529 &first_bad_char_loc))
2531 /* If warnings are on, this will print a more detailed analysis of what
2532 * is wrong than the error message below */
2533 utf8n_to_uvchr(first_bad_char_loc,
2534 e - ((char *) first_bad_char_loc),
2537 /* We deliberately don't try to print the malformed character, which
2538 * might not print very well; it also may be just the first of many
2539 * malformations, so don't print what comes after it */
2540 yyerror_pv(Perl_form(aTHX_
2541 "Malformed UTF-8 character immediately after '%.*s'",
2542 (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr),
2547 res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2548 /* include the <}> */
2549 e - backslash_ptr + 1);
2551 SvREFCNT_dec_NN(res);
2555 /* See if the charnames handler is the Perl core's, and if so, we can skip
2556 * the validation needed for a user-supplied one, as Perl's does its own
2558 table = GvHV(PL_hintgv); /* ^H */
2559 cvp = hv_fetchs(table, "charnames", FALSE);
2560 if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2561 SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2563 const char * const name = HvNAME(stash);
2564 if (HvNAMELEN(stash) == sizeof("_charnames")-1
2565 && strEQ(name, "_charnames")) {
2570 /* Here, it isn't Perl's charname handler. We can't rely on a
2571 * user-supplied handler to validate the input name. For non-ut8 input,
2572 * look to see that the first character is legal. Then loop through the
2573 * rest checking that each is a continuation */
2575 /* This code makes the reasonable assumption that the only Latin1-range
2576 * characters that begin a character name alias are alphabetic, otherwise
2577 * would have to create a isCHARNAME_BEGIN macro */
2580 if (! isALPHAU(*s)) {
2585 if (! isCHARNAME_CONT(*s)) {
2588 if (*s == ' ' && *(s-1) == ' ') {
2591 if ((U8) *s == NBSP_NATIVE && ckWARN_d(WARN_DEPRECATED)) {
2592 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2593 "NO-BREAK SPACE in a charnames "
2594 "alias definition is deprecated");
2600 /* Similarly for utf8. For invariants can check directly; for other
2601 * Latin1, can calculate their code point and check; otherwise use a
2603 if (UTF8_IS_INVARIANT(*s)) {
2604 if (! isALPHAU(*s)) {
2608 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2609 if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
2615 if (! PL_utf8_charname_begin) {
2616 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2617 PL_utf8_charname_begin = _core_swash_init("utf8",
2618 "_Perl_Charname_Begin",
2620 1, 0, NULL, &flags);
2622 if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2629 if (UTF8_IS_INVARIANT(*s)) {
2630 if (! isCHARNAME_CONT(*s)) {
2633 if (*s == ' ' && *(s-1) == ' ') {
2638 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2639 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
2643 if (*s == *NBSP_UTF8
2644 && *(s+1) == *(NBSP_UTF8+1)
2645 && ckWARN_d(WARN_DEPRECATED))
2647 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2648 "NO-BREAK SPACE in a charnames "
2649 "alias definition is deprecated");
2654 if (! PL_utf8_charname_continue) {
2655 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2656 PL_utf8_charname_continue = _core_swash_init("utf8",
2657 "_Perl_Charname_Continue",
2659 1, 0, NULL, &flags);
2661 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2668 if (*(s-1) == ' ') {
2671 "charnames alias definitions may not contain trailing "
2672 "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2673 (int)(s - backslash_ptr + 1), backslash_ptr,
2674 (int)(e - s + 1), s + 1
2676 UTF ? SVf_UTF8 : 0);
2680 if (SvUTF8(res)) { /* Don't accept malformed input */
2681 const U8* first_bad_char_loc;
2683 const char* const str = SvPV_const(res, len);
2684 if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2685 /* If warnings are on, this will print a more detailed analysis of
2686 * what is wrong than the error message below */
2687 utf8n_to_uvchr(first_bad_char_loc,
2688 (char *) first_bad_char_loc - str,
2691 /* We deliberately don't try to print the malformed character,
2692 * which might not print very well; it also may be just the first
2693 * of many malformations, so don't print what comes after it */
2696 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2697 (int) (e - backslash_ptr + 1), backslash_ptr,
2698 (int) ((char *) first_bad_char_loc - str), str
2709 /* The final %.*s makes sure that should the trailing NUL be missing
2710 * that this print won't run off the end of the string */
2713 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2714 (int)(s - backslash_ptr + 1), backslash_ptr,
2715 (int)(e - s + 1), s + 1
2717 UTF ? SVf_UTF8 : 0);
2724 "charnames alias definitions may not contain a sequence of "
2725 "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
2726 (int)(s - backslash_ptr + 1), backslash_ptr,
2727 (int)(e - s + 1), s + 1
2729 UTF ? SVf_UTF8 : 0);
2736 Extracts the next constant part of a pattern, double-quoted string,
2737 or transliteration. This is terrifying code.
2739 For example, in parsing the double-quoted string "ab\x63$d", it would
2740 stop at the '$' and return an OP_CONST containing 'abc'.
2742 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2743 processing a pattern (PL_lex_inpat is true), a transliteration
2744 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2746 Returns a pointer to the character scanned up to. If this is
2747 advanced from the start pointer supplied (i.e. if anything was
2748 successfully parsed), will leave an OP_CONST for the substring scanned
2749 in pl_yylval. Caller must intuit reason for not parsing further
2750 by looking at the next characters herself.
2754 \N{FOO} => \N{U+hex_for_character_FOO}
2755 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2758 all other \-char, including \N and \N{ apart from \N{ABC}
2761 @ and $ where it appears to be a var, but not for $ as tail anchor
2765 In transliterations:
2766 characters are VERY literal, except for - not at the start or end
2767 of the string, which indicates a range. If the range is in bytes,
2768 scan_const expands the range to the full set of intermediate
2769 characters. If the range is in utf8, the hyphen is replaced with
2770 a certain range mark which will be handled by pmtrans() in op.c.
2772 In double-quoted strings:
2774 double-quoted style: \r and \n
2775 constants: \x31, etc.
2776 deprecated backrefs: \1 (in substitution replacements)
2777 case and quoting: \U \Q \E
2780 scan_const does *not* construct ops to handle interpolated strings.
2781 It stops processing as soon as it finds an embedded $ or @ variable
2782 and leaves it to the caller to work out what's going on.
2784 embedded arrays (whether in pattern or not) could be:
2785 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2787 $ in double-quoted strings must be the symbol of an embedded scalar.
2789 $ in pattern could be $foo or could be tail anchor. Assumption:
2790 it's a tail anchor if $ is the last thing in the string, or if it's
2791 followed by one of "()| \r\n\t"
2793 \1 (backreferences) are turned into $1 in substitutions
2795 The structure of the code is
2796 while (there's a character to process) {
2797 handle transliteration ranges
2798 skip regexp comments /(?#comment)/ and codes /(?{code})/
2799 skip #-initiated comments in //x patterns
2800 check for embedded arrays
2801 check for embedded scalars
2803 deprecate \1 in substitution replacements
2804 handle string-changing backslashes \l \U \Q \E, etc.
2805 switch (what was escaped) {
2806 handle \- in a transliteration (becomes a literal -)
2807 if a pattern and not \N{, go treat as regular character
2808 handle \132 (octal characters)
2809 handle \x15 and \x{1234} (hex characters)
2810 handle \N{name} (named characters, also \N{3,5} in a pattern)
2811 handle \cV (control characters)
2812 handle printf-style backslashes (\f, \r, \n, etc)
2815 } (end if backslash)
2816 handle regular character
2817 } (end while character to read)
2822 S_scan_const(pTHX_ char *start)
2824 char *send = PL_bufend; /* end of the constant */
2825 SV *sv = newSV(send - start); /* sv for the constant. See note below
2827 char *s = start; /* start of the constant */
2828 char *d = SvPVX(sv); /* destination for copies */
2829 bool dorange = FALSE; /* are we in a translit range? */
2830 bool didrange = FALSE; /* did we just finish a range? */
2831 bool in_charclass = FALSE; /* within /[...]/ */
2832 bool has_utf8 = FALSE; /* Output constant is UTF8 */
2833 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed to be
2834 UTF8? But, this can show as true
2835 when the source isn't utf8, as for
2836 example when it is entirely composed
2838 SV *res; /* result from charnames */
2839 STRLEN offset_to_max; /* The offset in the output to where the range
2840 high-end character is temporarily placed */
2842 /* Note on sizing: The scanned constant is placed into sv, which is
2843 * initialized by newSV() assuming one byte of output for every byte of
2844 * input. This routine expects newSV() to allocate an extra byte for a
2845 * trailing NUL, which this routine will append if it gets to the end of
2846 * the input. There may be more bytes of input than output (eg., \N{LATIN
2847 * CAPITAL LETTER A}), or more output than input if the constant ends up
2848 * recoded to utf8, but each time a construct is found that might increase
2849 * the needed size, SvGROW() is called. Its size parameter each time is
2850 * based on the best guess estimate at the time, namely the length used so
2851 * far, plus the length the current construct will occupy, plus room for
2852 * the trailing NUL, plus one byte for every input byte still unscanned */
2854 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
2857 int backslash_N = 0; /* ? was the character from \N{} */
2858 int non_portable_endpoint = 0; /* ? In a range is an endpoint
2859 platform-specific like \x65 */
2862 PERL_ARGS_ASSERT_SCAN_CONST;
2864 assert(PL_lex_inwhat != OP_TRANSR);
2865 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2866 /* If we are doing a trans and we know we want UTF8 set expectation */
2867 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2868 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2871 /* Protect sv from errors and fatal warnings. */
2872 ENTER_with_name("scan_const");
2876 || dorange /* Handle tr/// range at right edge of input */
2879 /* get transliterations out of the way (they're most literal) */
2880 if (PL_lex_inwhat == OP_TRANS) {
2882 /* But there isn't any special handling necessary unless there is a
2883 * range, so for most cases we just drop down and handle the value
2884 * as any other. There are two exceptions.
2886 * 1. A minus sign indicates that we are actually going to have
2887 * a range. In this case, skip the '-', set a flag, then drop
2888 * down to handle what should be the end range value.
2889 * 2. After we've handled that value, the next time through, that
2890 * flag is set and we fix up the range.
2892 * Ranges entirely within Latin1 are expanded out entirely, in
2893 * order to avoid the significant overhead of making a swash.
2894 * Ranges that extend above Latin1 have to have a swash, so there
2895 * is no advantage to abbreviate them here, so they are stored here
2896 * as Min, ILLEGAL_UTF8_BYTE, Max. The illegal byte signifies a
2897 * hyphen without any possible ambiguity. On EBCDIC machines, if
2898 * the range is expressed as Unicode, the Latin1 portion is
2899 * expanded out even if the entire range extends above Latin1.
2900 * This is because each code point in it has to be processed here
2901 * individually to get its native translation */
2905 /* Here, we don't think we're in a range. If we've processed
2906 * at least one character, then see if this next one is a '-',
2907 * indicating the previous one was the start of a range. But
2908 * don't bother if we're too close to the end for the minus to
2910 if (*s != '-' || s >= send - 1 || s == start) {
2912 /* A regular character. Process like any other, but first
2913 * clear any flags */
2917 non_portable_endpoint = 0;
2920 /* Drops down to generic code to process current byte */
2923 if (didrange) { /* Something like y/A-C-Z// */
2924 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
2929 s++; /* Skip past the minus */
2931 /* d now points to where the end-range character will be
2932 * placed. Save it so won't have to go finding it later,
2933 * and drop down to get that character. (Actually we
2934 * instead save the offset, to handle the case where a
2935 * realloc in the meantime could change the actual
2936 * pointer). We'll finish processing the range the next
2937 * time through the loop */
2938 offset_to_max = d - SvPVX_const(sv);
2940 } /* End of not a range */
2942 /* Here we have parsed a range. Now must handle it. At this
2944 * 'sv' is a SV* that contains the output string we are
2945 * constructing. The final two characters in that string
2946 * are the range start and range end, in order.
2947 * 'd' points to just beyond the range end in the 'sv' string,
2948 * where we would next place something
2949 * 'offset_to_max' is the offset in 'sv' at which the character
2950 * before 'd' begins.
2952 const char * max_ptr = SvPVX_const(sv) + offset_to_max;
2953 const char * min_ptr;
2955 IV range_max; /* last character in range */
2958 #ifndef EBCDIC /* Not meaningful except in EBCDIC, so initialize to false */
2959 const bool convert_unicode = FALSE;
2960 const IV real_range_max = 0;
2962 bool convert_unicode;
2963 IV real_range_max = 0;
2966 /* Get the range-ends code point values. */
2968 /* We know the utf8 is valid, because we just constructed
2969 * it ourselves in previous loop iterations */
2970 min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
2971 range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
2972 range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
2975 min_ptr = max_ptr - 1;
2976 range_min = * (U8*) min_ptr;
2977 range_max = * (U8*) max_ptr;
2981 /* On EBCDIC platforms, we may have to deal with portable
2982 * ranges. These happen if at least one range endpoint is a
2983 * Unicode value (\N{...}), or if the range is a subset of
2984 * [A-Z] or [a-z], and both ends are literal characters,
2985 * like 'A', and not like \x{C1} */
2986 if ((convert_unicode
2987 = cBOOL(backslash_N) /* \N{} forces Unicode, hence
2989 || ( ! non_portable_endpoint
2990 && (( isLOWER_A(range_min) && isLOWER_A(range_max))
2991 || (isUPPER_A(range_min) && isUPPER_A(range_max))))
2994 /* Special handling is needed for these portable ranges.
2995 * They are defined to all be in Unicode terms, which
2996 * include all Unicode code points between the end points.
2997 * Convert to Unicode to get the Unicode range. Later we
2998 * will convert each code point in the range back to
3000 range_min = NATIVE_TO_UNI(range_min);
3001 range_max = NATIVE_TO_UNI(range_max);
3005 if (range_min > range_max) {
3006 if (convert_unicode) {
3007 /* Need to convert back to native for meaningful
3008 * messages for this platform */
3009 range_min = UNI_TO_NATIVE(range_min);
3010 range_max = UNI_TO_NATIVE(range_max);
3013 /* Use the characters themselves for the error message if
3014 * ASCII printables; otherwise some visible representation
3016 if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
3018 "Invalid range \"%c-%c\" in transliteration operator",
3019 (char)range_min, (char)range_max);
3021 else if (convert_unicode) {
3022 /* diag_listed_as: Invalid range "%s" in transliteration operator */
3024 "Invalid range \"\\N{U+%04"UVXf"}-\\N{U+%04"UVXf"}\""
3025 " in transliteration operator",
3026 range_min, range_max);
3029 /* diag_listed_as: Invalid range "%s" in transliteration operator */
3031 "Invalid range \"\\x{%04"UVXf"}-\\x{%04"UVXf"}\""
3032 " in transliteration operator",
3033 range_min, range_max);
3039 /* We try to avoid creating a swash. If the upper end of
3040 * this range is below 256, this range won't force a swash;
3041 * otherwise it does force a swash, and as long as we have
3042 * to have one, we might as well not expand things out.
3043 * But if it's EBCDIC, we may have to look at each
3044 * character below 256 if we have to convert to/from
3048 && (range_min > 255 || ! convert_unicode)
3051 /* Move the high character one byte to the right; then
3052 * insert between it and the range begin, an illegal
3053 * byte which serves to indicate this is a range (using
3054 * a '-' could be ambiguous). */
3056 while (e-- > max_ptr) {
3059 *(e + 1) = (char) ILLEGAL_UTF8_BYTE;
3063 /* Here, we're going to expand out the range. For EBCDIC
3064 * the range can extend above 255 (not so in ASCII), so
3065 * for EBCDIC, split it into the parts above and below
3068 if (range_max > 255) {
3069 real_range_max = range_max;
3075 /* Here we need to expand out the string to contain each
3076 * character in the range. Grow the output to handle this */
3078 save_offset = min_ptr - SvPVX_const(sv);
3080 /* The base growth is the number of code points in the range */
3081 grow = range_max - range_min + 1;
3084 /* But if the output is UTF-8, some of those characters may
3085 * need two bytes (since the maximum range value here is
3086 * 255, the max bytes per character is two). On ASCII
3087 * platforms, it's not much trouble to get an accurate
3088 * count of what's needed. But on EBCDIC, the ones that
3089 * need 2 bytes are scattered around, so just use a worst
3090 * case value instead of calculating for that platform. */
3094 /* Only those above 127 require 2 bytes. This may be
3095 * everything in the range, or not */
3096 if (range_min > 127) {
3099 else if (range_max > 127) {
3100 grow += range_max - 127;
3105 /* Subtract 3 for the bytes that were already accounted for
3106 * (min, max, and the hyphen) */
3107 SvGROW(sv, SvLEN(sv) + grow - 3);
3108 d = SvPVX(sv) + save_offset; /* refresh d after realloc */
3110 /* Here, we expand out the range. On ASCII platforms, the
3111 * compiler should optimize out the 'convert_unicode==TRUE'
3112 * portion of this */
3113 if (convert_unicode) {
3116 /* Recall that the min and max are now in Unicode terms, so
3117 * we have to convert each character to its native
3120 for (i = range_min; i <= range_max; i++) {
3121 append_utf8_from_native_byte(LATIN1_TO_NATIVE((U8) i),
3126 for (i = range_min; i <= range_max; i++) {
3127 *d++ = (char)LATIN1_TO_NATIVE((U8) i);
3134 /* Here, no conversions are necessary, which means that the
3135 * first character in the range is already in 'd' and
3136 * valid, so we can skip overwriting it */
3139 for (i = range_min + 1; i <= range_max; i++) {
3140 append_utf8_from_native_byte((U8) i, (U8 **) &d);
3145 for (i = range_min + 1; i <= range_max; i++) {
3151 /* (Compilers should optimize this out for non-EBCDIC). If the
3152 * original range extended above 255, add in that portion */
3153 if (real_range_max) {
3154 *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
3155 *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
3156 if (real_range_max > 0x101)
3157 *d++ = (char) ILLEGAL_UTF8_BYTE;
3158 if (real_range_max > 0x100)
3159 d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
3163 /* mark the range as done, and continue */
3167 non_portable_endpoint = 0;
3171 } /* End of is a range */
3172 } /* End of transliteration. Joins main code after these else's */
3173 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3176 while (s1 >= start && *s1-- == '\\')
3179 in_charclass = TRUE;
3182 else if (*s == ']' && PL_lex_inpat && in_charclass) {
3185 while (s1 >= start && *s1-- == '\\')
3188 in_charclass = FALSE;
3191 /* skip for regexp comments /(?#comment)/, except for the last
3192 * char, which will be done separately.
3193 * Stop on (?{..}) and friends */
3195 else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3197 while (s+1 < send && *s != ')')
3200 else if (!PL_lex_casemods
3201 && ( s[2] == '{' /* This should match regcomp.c */
3202 || (s[2] == '?' && s[3] == '{')))
3208 /* likewise skip #-initiated comments in //x patterns */
3212 && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
3214 while (s+1 < send && *s != '\n')
3218 /* no further processing of single-quoted regex */
3219 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3220 goto default_action;
3222 /* check for embedded arrays
3223 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3225 else if (*s == '@' && s[1]) {
3226 if (UTF ? isIDFIRST_utf8((U8*)s+1) : isWORDCHAR_A(s[1]))
3228 if (strchr(":'{$", s[1]))
3230 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3231 break; /* in regexp, neither @+ nor @- are interpolated */
3234 /* check for embedded scalars. only stop if we're sure it's a
3237 else if (*s == '$') {
3238 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
3240 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3242 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3243 "Possible unintended interpolation of $\\ in regex");
3245 break; /* in regexp, $ might be tail anchor */
3249 /* End of else if chain - OP_TRANS rejoin rest */
3252 if (*s == '\\' && s+1 < send) {
3253 char* e; /* Can be used for ending '}', etc. */
3257 /* warn on \1 - \9 in substitution replacements, but note that \11
3258 * is an octal; and \19 is \1 followed by '9' */
3259 if (PL_lex_inwhat == OP_SUBST
3265 /* diag_listed_as: \%d better written as $%d */
3266 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3271 /* string-change backslash escapes */
3272 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3276 /* In a pattern, process \N, but skip any other backslash escapes.
3277 * This is because we don't want to translate an escape sequence
3278 * into a meta symbol and have the regex compiler use the meta
3279 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3280 * in spite of this, we do have to process \N here while the proper
3281 * charnames handler is in scope. See bugs #56444 and #62056.
3283 * There is a complication because \N in a pattern may also stand
3284 * for 'match a non-nl', and not mean a charname, in which case its
3285 * processing should be deferred to the regex compiler. To be a
3286 * charname it must be followed immediately by a '{', and not look
3287 * like \N followed by a curly quantifier, i.e., not something like
3288 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3290 else if (PL_lex_inpat
3293 || regcurly(s + 1)))
3296 goto default_action;
3302 if ((isALPHANUMERIC(*s)))
3303 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3304 "Unrecognized escape \\%c passed through",
3306 /* default action is to copy the quoted character */
3307 goto default_action;
3310 /* eg. \132 indicates the octal constant 0132 */
3311 case '0': case '1': case '2': case '3':
3312 case '4': case '5': case '6': case '7':
3314 I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3316 uv = grok_oct(s, &len, &flags, NULL);
3318 if (len < 3 && s < send && isDIGIT(*s)
3319 && ckWARN(WARN_MISC))
3321 Perl_warner(aTHX_ packWARN(WARN_MISC),
3322 "%s", form_short_octal_warning(s, len));
3325 goto NUM_ESCAPE_INSERT;
3327 /* eg. \o{24} indicates the octal constant \024 */
3332 bool valid = grok_bslash_o(&s, &uv, &error,
3333 TRUE, /* Output warning */
3334 FALSE, /* Not strict */
3335 TRUE, /* Output warnings for
3342 goto NUM_ESCAPE_INSERT;
3345 /* eg. \x24 indicates the hex constant 0x24 */
3350 bool valid = grok_bslash_x(&s, &uv, &error,
3351 TRUE, /* Output warning */
3352 FALSE, /* Not strict */
3353 TRUE, /* Output warnings for
3363 /* Insert oct or hex escaped character. */
3365 /* Here uv is the ordinal of the next character being added */
3366 if (UVCHR_IS_INVARIANT(uv)) {
3370 if (!has_utf8 && uv > 255) {
3371 /* Might need to recode whatever we have accumulated so
3372 * far if it contains any chars variant in utf8 or
3375 SvCUR_set(sv, d - SvPVX_const(sv));
3378 /* See Note on sizing above. */
3379 sv_utf8_upgrade_flags_grow(
3381 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE
3382 /* Above-latin1 in string
3383 * implies no encoding */
3384 |SV_UTF8_NO_ENCODING,
3385 UVCHR_SKIP(uv) + (STRLEN)(send - s) + 1);
3386 d = SvPVX(sv) + SvCUR(sv);
3391 /* Usually, there will already be enough room in 'sv'
3392 * since such escapes are likely longer than any UTF-8
3393 * sequence they can end up as. This isn't the case on
3394 * EBCDIC where \x{40000000} contains 12 bytes, and the
3395 * UTF-8 for it contains 14. And, we have to allow for
3396 * a trailing NUL. It probably can't happen on ASCII
3397 * platforms, but be safe */
3398 const STRLEN needed = d - SvPVX(sv) + UVCHR_SKIP(uv)
3400 if (UNLIKELY(needed > SvLEN(sv))) {
3401 SvCUR_set(sv, d - SvPVX_const(sv));
3402 d = sv_grow(sv, needed) + SvCUR(sv);
3405 d = (char*)uvchr_to_utf8((U8*)d, uv);
3406 if (PL_lex_inwhat == OP_TRANS
3407 && PL_sublex_info.sub_op)
3409 PL_sublex_info.sub_op->op_private |=
3410 (PL_lex_repl ? OPpTRANS_FROM_UTF
3419 non_portable_endpoint++;
3424 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3425 * named character, like \N{LATIN SMALL LETTER A}, or a named
3426 * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3427 * GRAVE} (except y/// can't handle the latter, croaking). For
3428 * convenience all three forms are referred to as "named
3429 * characters" below.
3431 * For patterns, \N also can mean to match a non-newline. Code
3432 * before this 'switch' statement should already have handled
3433 * this situation, and hence this code only has to deal with
3434 * the named character cases.
3436 * For non-patterns, the named characters are converted to
3437 * their string equivalents. In patterns, named characters are
3438 * not converted to their ultimate forms for the same reasons
3439 * that other escapes aren't. Instead, they are converted to
3440 * the \N{U+...} form to get the value from the charnames that
3441 * is in effect right now, while preserving the fact that it
3442 * was a named character, so that the regex compiler knows
3445 * The structure of this section of code (besides checking for
3446 * errors and upgrading to utf8) is:
3447 * If the named character is of the form \N{U+...}, pass it
3448 * through if a pattern; otherwise convert the code point
3450 * Otherwise must be some \N{NAME}: convert to
3451 * \N{U+c1.c2...} if a pattern; otherwise convert to utf8
3453 * Transliteration is an exception. The conversion to utf8 is
3454 * only done if the code point requires it to be representable.
3456 * Here, 's' points to the 'N'; the test below is guaranteed to
3457 * succeed if we are being called on a pattern, as we already
3458 * know from a test above that the next character is a '{'. A
3459 * non-pattern \N must mean 'named character', which requires
3463 yyerror("Missing braces on \\N{}");
3468 /* If there is no matching '}', it is an error. */
3469 if (! (e = strchr(s, '}'))) {
3470 if (! PL_lex_inpat) {
3471 yyerror("Missing right brace on \\N{}");
3473 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3478 /* Here it looks like a named character */
3480 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3481 s += 2; /* Skip to next char after the 'U+' */
3484 /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3485 /* Check the syntax. */
3488 if (!isXDIGIT(*s)) {
3491 "Invalid hexadecimal number in \\N{U+...}"
3499 else if ((*s == '.' || *s == '_')
3505 /* Pass everything through unchanged.
3506 * +1 is for the '}' */
3507 Copy(orig_s, d, e - orig_s + 1, char);
3508 d += e - orig_s + 1;
3510 else { /* Not a pattern: convert the hex to string */
3511 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3512 | PERL_SCAN_SILENT_ILLDIGIT
3513 | PERL_SCAN_DISALLOW_PREFIX;
3515 uv = grok_hex(s, &len, &flags, NULL);
3516 if (len == 0 || (len != (STRLEN)(e - s)))
3519 /* For non-tr///, if the destination is not in utf8,
3520 * unconditionally recode it to be so. This is
3521 * because \N{} implies Unicode semantics, and scalars
3522 * have to be in utf8 to guarantee those semantics.
3523 * tr/// doesn't care about Unicode rules, so no need
3524 * there to upgrade to UTF-8 for small enough code
3526 if (! has_utf8 && ( uv > 0xFF
3527 || PL_lex_inwhat != OP_TRANS))
3529 SvCUR_set(sv, d - SvPVX_const(sv));
3532 /* See Note on sizing above. */
3533 sv_utf8_upgrade_flags_grow(
3535 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3536 UVCHR_SKIP(uv) + (STRLEN)(send - e) + 1);
3537 d = SvPVX(sv) + SvCUR(sv);
3541 /* Add the (Unicode) code point to the output. */
3542 if (OFFUNI_IS_INVARIANT(uv)) {
3543 *d++ = (char) LATIN1_TO_NATIVE(uv);
3546 d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3550 else /* Here is \N{NAME} but not \N{U+...}. */
3551 if ((res = get_and_check_backslash_N_name(s, e)))
3554 const char *str = SvPV_const(res, len);
3557 if (! len) { /* The name resolved to an empty string */
3558 Copy("\\N{}", d, 4, char);
3562 /* In order to not lose information for the regex
3563 * compiler, pass the result in the specially made
3564 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3565 * the code points in hex of each character
3566 * returned by charnames */
3568 const char *str_end = str + len;
3569 const STRLEN off = d - SvPVX_const(sv);
3571 if (! SvUTF8(res)) {
3572 /* For the non-UTF-8 case, we can determine the
3573 * exact length needed without having to parse
3574 * through the string. Each character takes up
3575 * 2 hex digits plus either a trailing dot or
3577 const char initial_text[] = "\\N{U+";
3578 const STRLEN initial_len = sizeof(initial_text)
3580 d = off + SvGROW(sv, off
3583 /* +1 for trailing NUL */
3586 + (STRLEN)(send - e));
3587 Copy(initial_text, d, initial_len, char);
3589 while (str < str_end) {
3592 my_snprintf(hex_string,
3596 /* The regex compiler is
3597 * expecting Unicode, not
3599 NATIVE_TO_LATIN1(*str));
3600 PERL_MY_SNPRINTF_POST_GUARD(len,
3601 sizeof(hex_string));
3602 Copy(hex_string, d, 3, char);
3606 d--; /* Below, we will overwrite the final
3607 dot with a right brace */
3610 STRLEN char_length; /* cur char's byte length */
3612 /* and the number of bytes after this is
3613 * translated into hex digits */
3614 STRLEN output_length;
3616 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3617 * for max('U+', '.'); and 1 for NUL */
3618 char hex_string[2 * UTF8_MAXBYTES + 5];
3620 /* Get the first character of the result. */
3621 U32 uv = utf8n_to_uvchr((U8 *) str,
3625 /* Convert first code point to Unicode hex,
3626 * including the boiler plate before it. */
3628 my_snprintf(hex_string, sizeof(hex_string),
3630 (unsigned int) NATIVE_TO_UNI(uv));
3632 /* Make sure there is enough space to hold it */
3633 d = off + SvGROW(sv, off
3635 + (STRLEN)(send - e)
3636 + 2); /* '}' + NUL */
3638 Copy(hex_string, d, output_length, char);
3641 /* For each subsequent character, append dot and
3642 * its Unicode code point in hex */
3643 while ((str += char_length) < str_end) {
3644 const STRLEN off = d - SvPVX_const(sv);
3645 U32 uv = utf8n_to_uvchr((U8 *) str,
3650 my_snprintf(hex_string,
3653 (unsigned int) NATIVE_TO_UNI(uv));
3655 d = off + SvGROW(sv, off
3657 + (STRLEN)(send - e)
3658 + 2); /* '}' + NUL */
3659 Copy(hex_string, d, output_length, char);
3664 *d++ = '}'; /* Done. Add the trailing brace */
3667 else { /* Here, not in a pattern. Convert the name to a
3670 if (PL_lex_inwhat == OP_TRANS) {
3671 str = SvPV_const(res, len);
3672 if (len > ((SvUTF8(res))
3676 yyerror(Perl_form(aTHX_
3677 "%.*s must not be a named sequence"
3678 " in transliteration operator",
3679 /* +1 to include the "}" */
3680 (int) (e + 1 - start), start));
3681 goto end_backslash_N;
3684 else if (! SvUTF8(res)) {
3685 /* Make sure \N{} return is UTF-8. This is because
3686 * \N{} implies Unicode semantics, and scalars have to
3687 * be in utf8 to guarantee those semantics; but not
3688 * needed in tr/// */
3689 sv_utf8_upgrade_flags(res, SV_UTF8_NO_ENCODING);
3690 str = SvPV_const(res, len);
3693 /* Upgrade destination to be utf8 if this new
3695 if (! has_utf8 && SvUTF8(res)) {
3696 SvCUR_set(sv, d - SvPVX_const(sv));
3699 /* See Note on sizing above. */
3700 sv_utf8_upgrade_flags_grow(sv,
3701 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3702 len + (STRLEN)(send - s) + 1);
3703 d = SvPVX(sv) + SvCUR(sv);
3705 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3707 /* See Note on sizing above. (NOTE: SvCUR() is not
3708 * set correctly here). */
3709 const STRLEN off = d - SvPVX_const(sv);
3710 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3712 Copy(str, d, len, char);
3718 } /* End \N{NAME} */
3722 backslash_N++; /* \N{} is defined to be Unicode */
3724 s = e + 1; /* Point to just after the '}' */
3727 /* \c is a control character */
3731 *d++ = grok_bslash_c(*s++, 1);
3734 yyerror("Missing control char name in \\c");
3737 non_portable_endpoint++;
3741 /* printf-style backslashes, formfeeds, newlines, etc */
3767 } /* end if (backslash) */
3770 /* If we started with encoded form, or already know we want it,
3771 then encode the next character */
3772 if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3775 /* One might think that it is wasted effort in the case of the
3776 * source being utf8 (this_utf8 == TRUE) to take the next character
3777 * in the source, convert it to an unsigned value, and then convert
3778 * it back again. But the source has not been validated here. The
3779 * routine that does the conversion checks for errors like
3782 const UV nextuv = (this_utf8)
3783 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
3785 const STRLEN need = UVCHR_SKIP(nextuv);
3787 SvCUR_set(sv, d - SvPVX_const(sv));
3790 /* See Note on sizing above. */
3791 sv_utf8_upgrade_flags_grow(sv,
3792 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3793 need + (STRLEN)(send - s) + 1);
3794 d = SvPVX(sv) + SvCUR(sv);
3796 } else if (need > len) {
3797 /* encoded value larger than old, may need extra space (NOTE:
3798 * SvCUR() is not set correctly here). See Note on sizing
3800 const STRLEN off = d - SvPVX_const(sv);
3801 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3805 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3810 } /* while loop to process each character */
3812 /* terminate the string and set up the sv */
3814 SvCUR_set(sv, d - SvPVX_const(sv));
3815 if (SvCUR(sv) >= SvLEN(sv))
3816 Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3817 " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3820 if (IN_ENCODING && !has_utf8) {
3821 sv_recode_to_utf8(sv, _get_encoding());
3827 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3828 PL_sublex_info.sub_op->op_private |=
3829 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3833 /* shrink the sv if we allocated more than we used */
3834 if (SvCUR(sv) + 5 < SvLEN(sv)) {
3835 SvPV_shrink_to_cur(sv);
3838 /* return the substring (via pl_yylval) only if we parsed anything */
3841 for (; s2 < s; s2++) {
3843 COPLINE_INC_WITH_HERELINES;
3845 SvREFCNT_inc_simple_void_NN(sv);
3846 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
3847 && ! PL_parser->lex_re_reparsing)
3849 const char *const key = PL_lex_inpat ? "qr" : "q";
3850 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3854 if (PL_lex_inwhat == OP_TRANS) {
3857 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3860 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3868 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3871 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3873 LEAVE_with_name("scan_const");
3878 * Returns TRUE if there's more to the expression (e.g., a subscript),
3881 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3883 * ->[ and ->{ return TRUE
3884 * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
3885 * { and [ outside a pattern are always subscripts, so return TRUE
3886 * if we're outside a pattern and it's not { or [, then return FALSE
3887 * if we're in a pattern and the first char is a {
3888 * {4,5} (any digits around the comma) returns FALSE
3889 * if we're in a pattern and the first char is a [
3891 * [SOMETHING] has a funky algorithm to decide whether it's a
3892 * character class or not. It has to deal with things like
3893 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3894 * anything else returns TRUE
3897 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3900 S_intuit_more(pTHX_ char *s)
3902 PERL_ARGS_ASSERT_INTUIT_MORE;
3904 if (PL_lex_brackets)
3906 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3908 if (*s == '-' && s[1] == '>'
3909 && FEATURE_POSTDEREF_QQ_IS_ENABLED
3910 && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
3911 ||(s[2] == '@' && strchr("*[{",s[3])) ))
3913 if (*s != '{' && *s != '[')
3918 /* In a pattern, so maybe we have {n,m}. */
3926 /* On the other hand, maybe we have a character class */
3929 if (*s == ']' || *s == '^')
3932 /* this is terrifying, and it works */
3935 const char * const send = strchr(s,']');
3936 unsigned char un_char, last_un_char;
3937 char tmpbuf[sizeof PL_tokenbuf * 4];
3939 if (!send) /* has to be an expression */
3941 weight = 2; /* let's weigh the evidence */
3945 else if (isDIGIT(*s)) {
3947 if (isDIGIT(s[1]) && s[2] == ']')
3953 Zero(seen,256,char);
3955 for (; s < send; s++) {
3956 last_un_char = un_char;
3957 un_char = (unsigned char)*s;
3962 weight -= seen[un_char] * 10;
3963 if (isWORDCHAR_lazy_if(s+1,UTF)) {
3965 char *tmp = PL_bufend;
3966 PL_bufend = (char*)send;
3967 scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
3969 len = (int)strlen(tmpbuf);
3970 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3971 UTF ? SVf_UTF8 : 0, SVt_PV))
3978 && strchr("[#!%*<>()-=",s[1]))
3980 if (/*{*/ strchr("])} =",s[2]))
3989 if (strchr("wds]",s[1]))
3991 else if (seen[(U8)'\''] || seen[(U8)'"'])
3993 else if (strchr("rnftbxcav",s[1]))
3995 else if (isDIGIT(s[1])) {
3997 while (s[1] && isDIGIT(s[1]))
4007 if (strchr("aA01! ",last_un_char))
4009 if (strchr("zZ79~",s[1]))
4011 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4012 weight -= 5; /* cope with negative subscript */
4015 if (!isWORDCHAR(last_un_char)
4016 && !(last_un_char == '$' || last_un_char == '@'
4017 || last_un_char == '&')
4018 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4022 if (keyword(d, s - d, 0))
4025 if (un_char == last_un_char + 1)
4027 weight -= seen[un_char];
4032 if (weight >= 0) /* probably a character class */
4042 * Does all the checking to disambiguate
4044 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
4045 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4047 * First argument is the stuff after the first token, e.g. "bar".
4049 * Not a method if foo is a filehandle.
4050 * Not a method if foo is a subroutine prototyped to take a filehandle.
4051 * Not a method if it's really "Foo $bar"
4052 * Method if it's "foo $bar"
4053 * Not a method if it's really "print foo $bar"
4054 * Method if it's really "foo package::" (interpreted as package->foo)
4055 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4056 * Not a method if bar is a filehandle or package, but is quoted with
4061 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
4063 char *s = start + (*start == '$');
4064 char tmpbuf[sizeof PL_tokenbuf];
4067 /* Mustn't actually add anything to a symbol table.
4068 But also don't want to "initialise" any placeholder
4069 constants that might already be there into full
4070 blown PVGVs with attached PVCV. */
4072 ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
4074 PERL_ARGS_ASSERT_INTUIT_METHOD;
4076 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4078 if (cv && SvPOK(cv)) {
4079 const char *proto = CvPROTO(cv);
4081 while (*proto && (isSPACE(*proto) || *proto == ';'))
4088 if (*start == '$') {
4089 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
4090 || isUPPER(*PL_tokenbuf))
4095 return *s == '(' ? FUNCMETH : METHOD;
4098 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4099 /* start is the beginning of the possible filehandle/object,
4100 * and s is the end of it
4101 * tmpbuf is a copy of it (but with single quotes as double colons)
4104 if (!keyword(tmpbuf, len, 0)) {
4105 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4110 indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
4111 if (indirgv && GvCVu(indirgv))
4113 /* filehandle or package name makes it a method */
4114 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4116 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4117 return 0; /* no assumptions -- "=>" quotes bareword */
4119 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
4120 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4121 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4125 return *s == '(' ? FUNCMETH : METHOD;
4131 /* Encoded script support. filter_add() effectively inserts a
4132 * 'pre-processing' function into the current source input stream.
4133 * Note that the filter function only applies to the current source file
4134 * (e.g., it will not affect files 'require'd or 'use'd by this one).
4136 * The datasv parameter (which may be NULL) can be used to pass
4137 * private data to this instance of the filter. The filter function
4138 * can recover the SV using the FILTER_DATA macro and use it to
4139 * store private buffers and state information.
4141 * The supplied datasv parameter is upgraded to a PVIO type
4142 * and the IoDIRP/IoANY field is used to store the function pointer,
4143 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4144 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4145 * private use must be set using malloc'd pointers.
4149 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4157 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4158 Perl_croak(aTHX_ "Source filters apply only to byte streams");
4160 if (!PL_rsfp_filters)
4161 PL_rsfp_filters = newAV();
4164 SvUPGRADE(datasv, SVt_PVIO);
4165 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4166 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4167 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4168 FPTR2DPTR(void *, IoANY(datasv)),
4169 SvPV_nolen(datasv)));
4170 av_unshift(PL_rsfp_filters, 1);
4171 av_store(PL_rsfp_filters, 0, datasv) ;
4173 !PL_parser->filtered
4174 && PL_parser->lex_flags & LEX_EVALBYTES
4175 && PL_bufptr < PL_bufend
4177 const char *s = PL_bufptr;
4178 while (s < PL_bufend) {
4180 SV *linestr = PL_parser->linestr;
4181 char *buf = SvPVX(linestr);
4182 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4183 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4184 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4185 STRLEN const linestart_pos = PL_parser->linestart - buf;
4186 STRLEN const last_uni_pos =
4187 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4188 STRLEN const last_lop_pos =
4189 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4190 av_push(PL_rsfp_filters, linestr);
4191 PL_parser->linestr =
4192 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4193 buf = SvPVX(PL_parser->linestr);
4194 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4195 PL_parser->bufptr = buf + bufptr_pos;
4196 PL_parser->oldbufptr = buf + oldbufptr_pos;
4197 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4198 PL_parser->linestart = buf + linestart_pos;
4199 if (PL_parser->last_uni)
4200 PL_parser->last_uni = buf + last_uni_pos;
4201 if (PL_parser->last_lop)
4202 PL_parser->last_lop = buf + last_lop_pos;
4203 SvLEN(linestr) = SvCUR(linestr);
4204 SvCUR(linestr) = s-SvPVX(linestr);
4205 PL_parser->filtered = 1;
4215 /* Delete most recently added instance of this filter function. */
4217 Perl_filter_del(pTHX_ filter_t funcp)
4221 PERL_ARGS_ASSERT_FILTER_DEL;
4224 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4225 FPTR2DPTR(void*, funcp)));
4227 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4229 /* if filter is on top of stack (usual case) just pop it off */
4230 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4231 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4232 sv_free(av_pop(PL_rsfp_filters));
4236 /* we need to search for the correct entry and clear it */
4237 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4241 /* Invoke the idxth filter function for the current rsfp. */
4242 /* maxlen 0 = read one text line */
4244 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4248 /* This API is bad. It should have been using unsigned int for maxlen.
4249 Not sure if we want to change the API, but if not we should sanity
4250 check the value here. */
4251 unsigned int correct_length = maxlen < 0 ? PERL_INT_MAX : maxlen;
4253 PERL_ARGS_ASSERT_FILTER_READ;
4255 if (!PL_parser || !PL_rsfp_filters)
4257 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
4258 /* Provide a default input filter to make life easy. */
4259 /* Note that we append to the line. This is handy. */
4260 DEBUG_P(PerlIO_printf(Perl_debug_log,
4261 "filter_read %d: from rsfp\n", idx));
4262 if (correct_length) {
4265 const int old_len = SvCUR(buf_sv);
4267 /* ensure buf_sv is large enough */
4268 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4269 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4270 correct_length)) <= 0) {
4271 if (PerlIO_error(PL_rsfp))
4272 return -1; /* error */
4274 return 0 ; /* end of file */
4276 SvCUR_set(buf_sv, old_len + len) ;
4277 SvPVX(buf_sv)[old_len + len] = '\0';
4280 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4281 if (PerlIO_error(PL_rsfp))
4282 return -1; /* error */
4284 return 0 ; /* end of file */
4287 return SvCUR(buf_sv);
4289 /* Skip this filter slot if filter has been deleted */
4290 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4291 DEBUG_P(PerlIO_printf(Perl_debug_log,
4292 "filter_read %d: skipped (filter deleted)\n",
4294 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4296 if (SvTYPE(datasv) != SVt_PVIO) {
4297 if (correct_length) {
4299 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4300 if (!remainder) return 0; /* eof */
4301 if (correct_length > remainder) correct_length = remainder;
4302 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4303 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4306 const char *s = SvEND(datasv);
4307 const char *send = SvPVX(datasv) + SvLEN(datasv);
4315 if (s == send) return 0; /* eof */
4316 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4317 SvCUR_set(datasv, s-SvPVX(datasv));
4319 return SvCUR(buf_sv);
4321 /* Get function pointer hidden within datasv */
4322 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4323 DEBUG_P(PerlIO_printf(Perl_debug_log,
4324 "filter_read %d: via function %p (%s)\n",
4325 idx, (void*)datasv, SvPV_nolen_const(datasv)));
4326 /* Call function. The function is expected to */
4327 /* call "FILTER_READ(idx+1, buf_sv)" first. */
4328 /* Return: <0:error, =0:eof, >0:not eof */
4329 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4333 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4335 PERL_ARGS_ASSERT_FILTER_GETS;
4337 #ifdef PERL_CR_FILTER
4338 if (!PL_rsfp_filters) {
4339 filter_add(S_cr_textfilter,NULL);
4342 if (PL_rsfp_filters) {
4344 SvCUR_set(sv, 0); /* start with empty line */
4345 if (FILTER_READ(0, sv, 0) > 0)
4346 return ( SvPVX(sv) ) ;
4351 return (sv_gets(sv, PL_rsfp, append));
4355 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4359 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4361 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4365 && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
4366 && (gv = gv_fetchpvn_flags(pkgname,
4368 ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4370 return GvHV(gv); /* Foo:: */
4373 /* use constant CLASS => 'MyClass' */
4374 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4375 if (gv && GvCV(gv)) {
4376 SV * const sv = cv_const_sv(GvCV(gv));
4378 return gv_stashsv(sv, 0);
4381 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4386 S_tokenize_use(pTHX_ int is_use, char *s) {
4387 PERL_ARGS_ASSERT_TOKENIZE_USE;
4389 if (PL_expect != XSTATE)
4390 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4391 is_use ? "use" : "no"));
4394 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4395 s = force_version(s, TRUE);
4396 if (*s == ';' || *s == '}'
4397 || (s = skipspace(s), (*s == ';' || *s == '}'))) {
4398 NEXTVAL_NEXTTOKE.opval = NULL;
4401 else if (*s == 'v') {
4402 s = force_word(s,WORD,FALSE,TRUE);
4403 s = force_version(s, FALSE);
4407 s = force_word(s,WORD,FALSE,TRUE);
4408 s = force_version(s, FALSE);
4410 pl_yylval.ival = is_use;
4414 static const char* const exp_name[] =
4415 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4416 "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
4421 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4423 S_word_takes_any_delimeter(char *p, STRLEN len)
4425 return (len == 1 && strchr("msyq", p[0]))
4427 && ((p[0] == 't' && p[1] == 'r')
4428 || (p[0] == 'q' && strchr("qwxr", p[1]))));
4432 S_check_scalar_slice(pTHX_ char *s)
4435 while (*s == ' ' || *s == '\t') s++;
4436 if (*s == 'q' && s[1] == 'w'
4437 && !isWORDCHAR_lazy_if(s+2,UTF))
4439 while (*s && (isWORDCHAR_lazy_if(s,UTF) || strchr(" \t$#+-'\"", *s)))
4440 s += UTF ? UTF8SKIP(s) : 1;
4441 if (*s == '}' || *s == ']')
4442 pl_yylval.ival = OPpSLICEWARNING;
4448 Works out what to call the token just pulled out of the input
4449 stream. The yacc parser takes care of taking the ops we return and
4450 stitching them into a tree.
4453 The type of the next token
4456 Switch based on the current state:
4457 - if we already built the token before, use it
4458 - if we have a case modifier in a string, deal with that
4459 - handle other cases of interpolation inside a string
4460 - scan the next line if we are inside a format
4461 In the normal state switch on the next character:
4463 if alphabetic, go to key lookup
4464 unrecoginized character - croak
4465 - 0/4/26: handle end-of-line or EOF
4466 - cases for whitespace
4467 - \n and #: handle comments and line numbers
4468 - various operators, brackets and sigils
4471 - 'v': vstrings (or go to key lookup)
4472 - 'x' repetition operator (or go to key lookup)
4473 - other ASCII alphanumerics (key lookup begins here):
4476 scan built-in keyword (but do nothing with it yet)
4477 check for statement label
4478 check for lexical subs
4479 goto just_a_word if there is one
4480 see whether built-in keyword is overridden
4481 switch on keyword number:
4482 - default: just_a_word:
4483 not a built-in keyword; handle bareword lookup
4484 disambiguate between method and sub call
4485 fall back to bareword
4486 - cases for built-in keywords
4494 char *s = PL_bufptr;
4498 const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil);
4502 /* orig_keyword, gvp, and gv are initialized here because
4503 * jump to the label just_a_word_zero can bypass their
4504 * initialization later. */
4505 I32 orig_keyword = 0;
4510 SV* tmp = newSVpvs("");
4511 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4512 (IV)CopLINE(PL_curcop),
4513 lex_state_names[PL_lex_state],
4514 exp_name[PL_expect],
4515 pv_display(tmp, s, strlen(s), 0, 60));
4519 /* when we've already built the next token, just pull it out of the queue */
4522 pl_yylval = PL_nextval[PL_nexttoke];
4524 PL_lex_state = PL_lex_defer;
4525 PL_lex_defer = LEX_NORMAL;
4529 next_type = PL_nexttype[PL_nexttoke];
4530 if (next_type & (7<<24)) {
4531 if (next_type & (1<<24)) {
4532 if (PL_lex_brackets > 100)
4533 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4534 PL_lex_brackstack[PL_lex_brackets++] =
4535 (char) ((next_type >> 16) & 0xff);
4537 if (next_type & (2<<24))
4538 PL_lex_allbrackets++;
4539 if (next_type & (4<<24))
4540 PL_lex_allbrackets--;
4541 next_type &= 0xffff;
4543 return REPORT(next_type == 'p' ? pending_ident() : next_type);
4547 switch (PL_lex_state) {
4549 case LEX_INTERPNORMAL:
4552 /* interpolated case modifiers like \L \U, including \Q and \E.
4553 when we get here, PL_bufptr is at the \
4555 case LEX_INTERPCASEMOD:
4557 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4559 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4560 PL_bufptr, PL_bufend, *PL_bufptr);
4562 /* handle \E or end of string */
4563 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4565 if (PL_lex_casemods) {
4566 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4567 PL_lex_casestack[PL_lex_casemods] = '\0';
4569 if (PL_bufptr != PL_bufend
4570 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4571 || oldmod == 'F')) {
4573 PL_lex_state = LEX_INTERPCONCAT;
4575 PL_lex_allbrackets--;
4578 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4579 /* Got an unpaired \E */
4580 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4581 "Useless use of \\E");
4583 if (PL_bufptr != PL_bufend)
4585 PL_lex_state = LEX_INTERPCONCAT;
4589 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4590 "### Saw case modifier\n"); });
4592 if (s[1] == '\\' && s[2] == 'E') {
4594 PL_lex_state = LEX_INTERPCONCAT;
4599 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4600 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
4601 if ((*s == 'L' || *s == 'U' || *s == 'F')
4602 && (strchr(PL_lex_casestack, 'L')
4603 || strchr(PL_lex_casestack, 'U')
4604 || strchr(PL_lex_casestack, 'F')))
4606 PL_lex_casestack[--PL_lex_casemods] = '\0';
4607 PL_lex_allbrackets--;
4610 if (PL_lex_casemods > 10)
4611 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4612 PL_lex_casestack[PL_lex_casemods++] = *s;
4613 PL_lex_casestack[PL_lex_casemods] = '\0';
4614 PL_lex_state = LEX_INTERPCONCAT;
4615 NEXTVAL_NEXTTOKE.ival = 0;
4616 force_next((2<<24)|'(');
4618 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4620 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4622 NEXTVAL_NEXTTOKE.ival = OP_LC;
4624 NEXTVAL_NEXTTOKE.ival = OP_UC;
4626 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4628 NEXTVAL_NEXTTOKE.ival = OP_FC;
4630 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4634 if (PL_lex_starts) {
4637 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4638 if (PL_lex_casemods == 1 && PL_lex_inpat)
4641 AopNOASSIGN(OP_CONCAT);
4647 case LEX_INTERPPUSH:
4648 return REPORT(sublex_push());
4650 case LEX_INTERPSTART:
4651 if (PL_bufptr == PL_bufend)
4652 return REPORT(sublex_done());
4653 DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4654 "### Interpolated variable\n"); });
4656 /* for /@a/, we leave the joining for the regex engine to do
4657 * (unless we're within \Q etc) */
4658 PL_lex_dojoin = (*PL_bufptr == '@'
4659 && (!PL_lex_inpat || PL_lex_casemods));
4660 PL_lex_state = LEX_INTERPNORMAL;
4661 if (PL_lex_dojoin) {
4662 NEXTVAL_NEXTTOKE.ival = 0;
4664 force_ident("\"", '$');
4665 NEXTVAL_NEXTTOKE.ival = 0;
4667 NEXTVAL_NEXTTOKE.ival = 0;
4668 force_next((2<<24)|'(');
4669 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
4672 /* Convert (?{...}) and friends to 'do {...}' */
4673 if (PL_lex_inpat && *PL_bufptr == '(') {
4674 PL_parser->lex_shared->re_eval_start = PL_bufptr;
4676 if (*PL_bufptr != '{')
4678 PL_expect = XTERMBLOCK;
4682 if (PL_lex_starts++) {
4684 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4685 if (!PL_lex_casemods && PL_lex_inpat)
4688 AopNOASSIGN(OP_CONCAT);
4692 case LEX_INTERPENDMAYBE:
4693 if (intuit_more(PL_bufptr)) {
4694 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
4700 /* Treat state as LEX_NORMAL if we have no inner lexing scope.
4701 XXX This hack can be removed if we stop setting PL_lex_state to
4702 LEX_KNOWNEXT, as can the hack under LEX_INTREPCONCAT below. */
4703 if (UNLIKELY(!PL_lex_inwhat)) {
4704 PL_lex_state = LEX_NORMAL;
4708 if (PL_lex_dojoin) {
4709 const U8 dojoin_was = PL_lex_dojoin;
4710 PL_lex_dojoin = FALSE;
4711 PL_lex_state = LEX_INTERPCONCAT;
4712 PL_lex_allbrackets--;
4713 return REPORT(dojoin_was == 1 ? ')' : POSTJOIN);
4715 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
4716 && SvEVALED(PL_lex_repl))
4718 if (PL_bufptr != PL_bufend)
4719 Perl_croak(aTHX_ "Bad evalled substitution pattern");
4722 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
4723 re_eval_str. If the here-doc body’s length equals the previous
4724 value of re_eval_start, re_eval_start will now be null. So
4725 check re_eval_str as well. */
4726 if (PL_parser->lex_shared->re_eval_start
4727 || PL_parser->lex_shared->re_eval_str) {
4729 if (*PL_bufptr != ')')
4730 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
4732 /* having compiled a (?{..}) expression, return the original
4733 * text too, as a const */
4734 if (PL_parser->lex_shared->re_eval_str) {
4735 sv = PL_parser->lex_shared->re_eval_str;
4736 PL_parser->lex_shared->re_eval_str = NULL;
4738 PL_bufptr - PL_parser->lex_shared->re_eval_start);
4739 SvPV_shrink_to_cur(sv);
4741 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
4742 PL_bufptr - PL_parser->lex_shared->re_eval_start);
4743 NEXTVAL_NEXTTOKE.opval =
4744 (OP*)newSVOP(OP_CONST, 0,
4747 PL_parser->lex_shared->re_eval_start = NULL;
4753 case LEX_INTERPCONCAT:
4755 if (PL_lex_brackets)
4756 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
4757 (long) PL_lex_brackets);
4759 /* Treat state as LEX_NORMAL when not in an inner lexing scope.
4760 XXX This hack can be removed if we stop setting PL_lex_state to
4762 if (UNLIKELY(!PL_lex_inwhat)) {
4763 PL_lex_state = LEX_NORMAL;
4767 if (PL_bufptr == PL_bufend)
4768 return REPORT(sublex_done());
4770 /* m'foo' still needs to be parsed for possible (?{...}) */
4771 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
4772 SV *sv = newSVsv(PL_linestr);
4774 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
4778 s = scan_const(PL_bufptr);
4780 PL_lex_state = LEX_INTERPCASEMOD;
4782 PL_lex_state = LEX_INTERPSTART;
4785 if (s != PL_bufptr) {
4786 NEXTVAL_NEXTTOKE = pl_yylval;
4789 if (PL_lex_starts++) {
4790 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4791 if (!PL_lex_casemods && PL_lex_inpat)
4794 AopNOASSIGN(OP_CONCAT);
4804 s = scan_formline(PL_bufptr);
4805 if (!PL_lex_formbrack)
4814 /* We really do *not* want PL_linestr ever becoming a COW. */
4815 assert (!SvIsCOW(PL_linestr));
4817 PL_oldoldbufptr = PL_oldbufptr;
4819 PL_parser->saw_infix_sigil = 0;
4825 if (! isUTF8_CHAR((U8 *) s, (U8 *) PL_bufend)) {
4827 SAVESPTR(PL_warnhook);
4828 PL_warnhook = PERL_WARNHOOK_FATAL;
4829 utf8n_to_uvchr((U8*)s, PL_bufend-s, NULL, 0);
4832 if (isIDFIRST_utf8((U8*)s)) {
4836 else if (isALNUMC(*s)) {
4840 SV *dsv = newSVpvs_flags("", SVs_TEMP);
4841 const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s,
4843 SVs_TEMP | SVf_UTF8),
4844 10, UNI_DISPLAY_ISPRINT)
4845 : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
4846 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4847 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4848 d = UTF ? (char *) utf8_hop((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4852 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %"UTF8f"<-- HERE near column %d", c,
4853 UTF8fARG(UTF, (s - d), d),
4858 goto fake_eof; /* emulate EOF on ^D or ^Z */
4860 if ((!PL_rsfp || PL_lex_inwhat)
4861 && (!PL_parser->filtered || s+1 < PL_bufend)) {
4865 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
4867 yyerror((const char *)
4869 ? "Format not terminated"
4870 : "Missing right curly or square bracket"));
4872 DEBUG_T( { PerlIO_printf(Perl_debug_log,
4873 "### Tokener got EOF\n");
4877 if (s++ < PL_bufend)
4878 goto retry; /* ignore stray nulls */
4881 if (!PL_in_eval && !PL_preambled) {
4882 PL_preambled = TRUE;
4884 /* Generate a string of Perl code to load the debugger.
4885 * If PERL5DB is set, it will return the contents of that,
4886 * otherwise a compile-time require of perl5db.pl. */
4888 const char * const pdb = PerlEnv_getenv("PERL5DB");
4891 sv_setpv(PL_linestr, pdb);
4892 sv_catpvs(PL_linestr,";");
4894 SETERRNO(0,SS_NORMAL);
4895 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4897 PL_parser->preambling = CopLINE(PL_curcop);
4899 sv_setpvs(PL_linestr,"");
4900 if (PL_preambleav) {
4901 SV **svp = AvARRAY(PL_preambleav);
4902 SV **const end = svp + AvFILLp(PL_preambleav);
4904 sv_catsv(PL_linestr, *svp);
4906 sv_catpvs(PL_linestr, ";");
4908 sv_free(MUTABLE_SV(PL_preambleav));
4909 PL_preambleav = NULL;
4912 sv_catpvs(PL_linestr,
4913 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
4914 if (PL_minus_n || PL_minus_p) {
4915 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
4917 sv_catpvs(PL_linestr,"chomp;");
4920 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4921 || *PL_splitstr == '"')
4922 && strchr(PL_splitstr + 1, *PL_splitstr))
4923 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
4925 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4926 bytes can be used as quoting characters. :-) */
4927 const char *splits = PL_splitstr;
4928 sv_catpvs(PL_linestr, "our @F=split(q\0");
4931 if (*splits == '\\')
4932 sv_catpvn(PL_linestr, splits, 1);
4933 sv_catpvn(PL_linestr, splits, 1);
4934 } while (*splits++);
4935 /* This loop will embed the trailing NUL of
4936 PL_linestr as the last thing it does before
4938 sv_catpvs(PL_linestr, ");");
4942 sv_catpvs(PL_linestr,"our @F=split(' ');");
4945 sv_catpvs(PL_linestr, "\n");
4946 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4947 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4948 PL_last_lop = PL_last_uni = NULL;
4949 if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
4950 update_debugger_info(PL_linestr, NULL, 0);
4955 bof = PL_rsfp ? TRUE : FALSE;
4958 fake_eof = LEX_FAKE_EOF;
4960 PL_bufptr = PL_bufend;
4961 COPLINE_INC_WITH_HERELINES;
4962 if (!lex_next_chunk(fake_eof)) {
4963 CopLINE_dec(PL_curcop);
4965 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4967 CopLINE_dec(PL_curcop);
4969 /* If it looks like the start of a BOM or raw UTF-16,
4970 * check if it in fact is. */
4973 || *(U8*)s == BOM_UTF8_FIRST_BYTE
4977 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4978 bof = (offset == (Off_t)SvCUR(PL_linestr));
4979 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
4980 /* offset may include swallowed CR */
4982 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
4985 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4986 s = swallow_bom((U8*)s);
4989 if (PL_parser->in_pod) {
4990 /* Incest with pod. */
4991 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
4992 sv_setpvs(PL_linestr, "");
4993 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4994 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
4995 PL_last_lop = PL_last_uni = NULL;
4996 PL_parser->in_pod = 0;
4999 if (PL_rsfp || PL_parser->filtered)
5001 } while (PL_parser->in_pod);
5002 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5003 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5004 PL_last_lop = PL_last_uni = NULL;
5005 if (CopLINE(PL_curcop) == 1) {
5006 while (s < PL_bufend && isSPACE(*s))
5008 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5012 if (*s == '#' && *(s+1) == '!')
5014 #ifdef ALTERNATE_SHEBANG
5016 static char const as[] = ALTERNATE_SHEBANG;
5017 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5018 d = s + (sizeof(as) - 1);
5020 #endif /* ALTERNATE_SHEBANG */
5029 while (*d && !isSPACE(*d))
5033 #ifdef ARG_ZERO_IS_SCRIPT
5034 if (ipathend > ipath) {
5036 * HP-UX (at least) sets argv[0] to the script name,
5037 * which makes $^X incorrect. And Digital UNIX and Linux,
5038 * at least, set argv[0] to the basename of the Perl
5039 * interpreter. So, having found "#!", we'll set it right.
5041 SV* copfilesv = CopFILESV(PL_curcop);
5044 GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5046 assert(SvPOK(x) || SvGMAGICAL(x));
5047 if (sv_eq(x, copfilesv)) {
5048 sv_setpvn(x, ipath, ipathend - ipath);
5054 const char *bstart = SvPV_const(copfilesv, blen);
5055 const char * const lstart = SvPV_const(x, llen);
5057 bstart += blen - llen;
5058 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5059 sv_setpvn(x, ipath, ipathend - ipath);
5066 /* Anything to do if no copfilesv? */
5068 TAINT_NOT; /* $^X is always tainted, but that's OK */
5070 #endif /* ARG_ZERO_IS_SCRIPT */
5075 d = instr(s,"perl -");
5077 d = instr(s,"perl");
5078 if (d && d[4] == '6')
5081 /* avoid getting into infinite loops when shebang
5082 * line contains "Perl" rather than "perl" */
5084 for (d = ipathend-4; d >= ipath; --d) {
5085 if (isALPHA_FOLD_EQ(*d, 'p')
5086 && !ibcmp(d, "perl", 4))
5096 #ifdef ALTERNATE_SHEBANG
5098 * If the ALTERNATE_SHEBANG on this system starts with a
5099 * character that can be part of a Perl expression, then if
5100 * we see it but not "perl", we're probably looking at the
5101 * start of Perl code, not a request to hand off to some
5102 * other interpreter. Similarly, if "perl" is there, but
5103 * not in the first 'word' of the line, we assume the line
5104 * contains the start of the Perl program.
5106 if (d && *s != '#') {
5107 const char *c = ipath;
5108 while (*c && !strchr("; \t\r\n\f\v#", *c))
5111 d = NULL; /* "perl" not in first word; ignore */
5113 *s = '#'; /* Don't try to parse shebang line */
5115 #endif /* ALTERNATE_SHEBANG */
5120 && !instr(s,"indir")
5121 && instr(PL_origargv[0],"perl"))
5128 while (s < PL_bufend && isSPACE(*s))
5130 if (s < PL_bufend) {
5131 Newx(newargv,PL_origargc+3,char*);
5133 while (s < PL_bufend && !isSPACE(*s))
5136 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5139 newargv = PL_origargv;
5142 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5144 Perl_croak(aTHX_ "Can't exec %s", ipath);
5147 while (*d && !isSPACE(*d))
5149 while (SPACE_OR_TAB(*d))
5153 const bool switches_done = PL_doswitches;
5154 const U32 oldpdb = PL_perldb;
5155 const bool oldn = PL_minus_n;
5156 const bool oldp = PL_minus_p;
5160 bool baduni = FALSE;
5162 const char *d2 = d1 + 1;
5163 if (parse_unicode_opts((const char **)&d2)
5167 if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
5168 const char * const m = d1;
5169 while (*d1 && !isSPACE(*d1))
5171 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5174 d1 = moreswitches(d1);
5176 if (PL_doswitches && !switches_done) {
5177 int argc = PL_origargc;
5178 char **argv = PL_origargv;
5181 } while (argc && argv[0][0] == '-' && argv[0][1]);
5182 init_argv_symbols(argc,argv);
5184 if ( (PERLDB_LINE_OR_SAVESRC && !oldpdb)
5185 || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5186 /* if we have already added "LINE: while (<>) {",
5187 we must not do it again */
5189 sv_setpvs(PL_linestr, "");
5190 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5191 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5192 PL_last_lop = PL_last_uni = NULL;
5193 PL_preambled = FALSE;
5194 if (PERLDB_LINE_OR_SAVESRC)
5195 (void)gv_fetchfile(PL_origfilename);
5202 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5203 PL_lex_state = LEX_FORMLINE;
5204 force_next(FORMRBRACK);
5209 #ifdef PERL_STRICT_CR
5210 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5212 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5214 case ' ': case '\t': case '\f': case '\v':
5219 if (PL_lex_state != LEX_NORMAL
5220 || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
5222 const bool in_comment = *s == '#';
5223 if (*s == '#' && s == PL_linestart && PL_in_eval
5224 && !PL_rsfp && !PL_parser->filtered) {
5225 /* handle eval qq[#line 1 "foo"\n ...] */
5226 CopLINE_dec(PL_curcop);
5230 while (d < PL_bufend && *d != '\n')
5234 else if (d > PL_bufend)
5235 /* Found by Ilya: feed random input to Perl. */
5236 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5239 if (in_comment && d == PL_bufend
5240 && PL_lex_state == LEX_INTERPNORMAL
5241 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5242 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5245 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5246 PL_lex_state = LEX_FORMLINE;
5247 force_next(FORMRBRACK);
5252 while (s < PL_bufend && *s != '\n')
5260 else if (s > PL_bufend)
5261 /* Found by Ilya: feed random input to Perl. */
5262 Perl_croak(aTHX_ "panic: input overflow");
5266 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5274 while (s < PL_bufend && SPACE_OR_TAB(*s))
5277 if (strnEQ(s,"=>",2)) {
5278 s = force_word(PL_bufptr,WORD,FALSE,FALSE);
5279 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5280 OPERATOR('-'); /* unary minus */
5283 case 'r': ftst = OP_FTEREAD; break;
5284 case 'w': ftst = OP_FTEWRITE; break;
5285 case 'x': ftst = OP_FTEEXEC; break;
5286 case 'o': ftst = OP_FTEOWNED; break;
5287 case 'R': ftst = OP_FTRREAD; break;
5288 case 'W': ftst = OP_FTRWRITE; break;
5289 case 'X': ftst = OP_FTREXEC; break;
5290 case 'O': ftst = OP_FTROWNED; break;
5291 case 'e': ftst = OP_FTIS; break;
5292 case 'z': ftst = OP_FTZERO; break;
5293 case 's': ftst = OP_FTSIZE; break;
5294 case 'f': ftst = OP_FTFILE; break;
5295 case 'd': ftst = OP_FTDIR; break;
5296 case 'l': ftst = OP_FTLINK; break;
5297 case 'p': ftst = OP_FTPIPE; break;
5298 case 'S': ftst = OP_FTSOCK; break;
5299 case 'u': ftst = OP_FTSUID; break;
5300 case 'g': ftst = OP_FTSGID; break;
5301 case 'k': ftst = OP_FTSVTX; break;
5302 case 'b': ftst = OP_FTBLK; break;
5303 case 'c': ftst = OP_FTCHR; break;
5304 case 't': ftst = OP_FTTTY; break;
5305 case 'T': ftst = OP_FTTEXT; break;
5306 case 'B': ftst = OP_FTBINARY; break;
5307 case 'M': case 'A': case 'C':
5308 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5310 case 'M': ftst = OP_FTMTIME; break;
5311 case 'A': ftst = OP_FTATIME; break;
5312 case 'C': ftst = OP_FTCTIME; break;
5320 PL_last_uni = PL_oldbufptr;
5321 PL_last_lop_op = (OPCODE)ftst;
5322 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5323 "### Saw file test %c\n", (int)tmp);
5328 /* Assume it was a minus followed by a one-letter named
5329 * subroutine call (or a -bareword), then. */
5330 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5331 "### '-%c' looked like a file test but was not\n",
5338 const char tmp = *s++;
5341 if (PL_expect == XOPERATOR)
5346 else if (*s == '>') {
5349 if (((*s == '$' || *s == '&') && s[1] == '*')
5350 ||(*s == '$' && s[1] == '#' && s[2] == '*')
5351 ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
5352 ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5355 PL_expect = XPOSTDEREF;
5358 if (isIDFIRST_lazy_if(s,UTF)) {
5359 s = force_word(s,METHOD,FALSE,TRUE);
5367 if (PL_expect == XOPERATOR) {
5369 && !PL_lex_allbrackets
5370 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5378 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5380 OPERATOR('-'); /* unary minus */
5386 const char tmp = *s++;
5389 if (PL_expect == XOPERATOR)
5394 if (PL_expect == XOPERATOR) {
5396 && !PL_lex_allbrackets
5397 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5405 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5412 if (PL_expect == XPOSTDEREF) POSTDEREF('*');
5413 if (PL_expect != XOPERATOR) {
5414 s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5415 PL_expect = XOPERATOR;
5416 force_ident(PL_tokenbuf, '*');
5424 if (*s == '=' && !PL_lex_allbrackets
5425 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5433 && !PL_lex_allbrackets
5434 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5439 PL_parser->saw_infix_sigil = 1;
5444 if (PL_expect == XOPERATOR) {
5446 && !PL_lex_allbrackets
5447 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5452 PL_parser->saw_infix_sigil = 1;
5455 else if (PL_expect == XPOSTDEREF) POSTDEREF('%');
5456 PL_tokenbuf[0] = '%';
5457 s = scan_ident(s, PL_tokenbuf + 1,
5458 sizeof PL_tokenbuf - 1, FALSE);
5460 if (!PL_tokenbuf[1]) {
5463 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5465 PL_tokenbuf[0] = '@';
5467 PL_expect = XOPERATOR;
5468 force_ident_maybe_lex('%');
5473 bof = FEATURE_BITWISE_IS_ENABLED;
5474 if (bof && s[1] == '.')
5476 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5477 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5483 BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
5485 if (PL_lex_brackets > 100)
5486 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5487 PL_lex_brackstack[PL_lex_brackets++] = 0;
5488 PL_lex_allbrackets++;
5490 const char tmp = *s++;
5495 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5497 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5500 Perl_ck_warner_d(aTHX_
5501 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5502 "Smartmatch is experimental");
5506 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
5508 BCop(OP_SCOMPLEMENT);
5510 BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
5512 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5519 goto just_a_word_zero_gv;
5525 switch (PL_expect) {
5527 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5529 PL_bufptr = s; /* update in case we back off */
5532 "Use of := for an empty attribute list is not allowed");
5539 PL_expect = XTERMBLOCK;
5543 while (isIDFIRST_lazy_if(s,UTF)) {
5546 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5547 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5548 if (tmp < 0) tmp = -tmp;
5563 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5565 d = scan_str(d,TRUE,TRUE,FALSE,NULL);
5566 COPLINE_SET_FROM_MULTI_END;
5568 /* MUST advance bufptr here to avoid bogus
5569 "at end of line" context messages from yyerror().
5571 PL_bufptr = s + len;
5572 yyerror("Unterminated attribute parameter in attribute list");
5576 return REPORT(0); /* EOF indicator */
5580 sv_catsv(sv, PL_lex_stuff);
5581 attrs = op_append_elem(OP_LIST, attrs,
5582 newSVOP(OP_CONST, 0, sv));
5583 SvREFCNT_dec_NN(PL_lex_stuff);
5584 PL_lex_stuff = NULL;
5587 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5589 if (PL_in_my == KEY_our) {
5590 deprecate(":unique");
5593 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5596 /* NOTE: any CV attrs applied here need to be part of
5597 the CVf_BUILTIN_ATTRS define in cv.h! */
5598 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5600 CvLVALUE_on(PL_compcv);
5602 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5604 deprecate(":locked");
5606 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5608 CvMETHOD_on(PL_compcv);
5610 else if (!PL_in_my && len == 5
5611 && strnEQ(SvPVX(sv), "const", len))
5614 Perl_ck_warner_d(aTHX_
5615 packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
5616 ":const is experimental"
5618 CvANONCONST_on(PL_compcv);
5619 if (!CvANON(PL_compcv))
5620 yyerror(":const is not permitted on named "
5623 /* After we've set the flags, it could be argued that
5624 we don't need to do the attributes.pm-based setting
5625 process, and shouldn't bother appending recognized
5626 flags. To experiment with that, uncomment the
5627 following "else". (Note that's already been
5628 uncommented. That keeps the above-applied built-in
5629 attributes from being intercepted (and possibly
5630 rejected) by a package's attribute routines, but is
5631 justified by the performance win for the common case
5632 of applying only built-in attributes.) */
5634 attrs = op_append_elem(OP_LIST, attrs,
5635 newSVOP(OP_CONST, 0,
5639 if (*s == ':' && s[1] != ':')
5642 break; /* require real whitespace or :'s */
5643 /* XXX losing whitespace on sequential attributes here */
5648 && !(PL_expect == XOPERATOR
5649 ? (*s == '=' || *s == ')')
5650 : (*s == '{' || *s == '(')))
5652 const char q = ((*s == '\'') ? '"' : '\'');
5653 /* If here for an expression, and parsed no attrs, back
5655 if (PL_expect == XOPERATOR && !attrs) {
5659 /* MUST advance bufptr here to avoid bogus "at end of line"
5660 context messages from yyerror().
5663 yyerror( (const char *)
5665 ? Perl_form(aTHX_ "Invalid separator character "
5666 "%c%c%c in attribute list", q, *s, q)
5667 : "Unterminated attribute list" ) );
5675 NEXTVAL_NEXTTOKE.opval = attrs;
5681 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5685 PL_lex_allbrackets--;
5689 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5690 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
5694 PL_lex_allbrackets++;
5697 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5704 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5707 PL_lex_allbrackets--;
5713 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5716 if (PL_lex_brackets <= 0)
5717 /* diag_listed_as: Unmatched right %s bracket */
5718 yyerror("Unmatched right square bracket");
5721 PL_lex_allbrackets--;
5722 if (PL_lex_state == LEX_INTERPNORMAL) {
5723 if (PL_lex_brackets == 0) {
5724 if (*s == '-' && s[1] == '>')
5725 PL_lex_state = LEX_INTERPENDMAYBE;
5726 else if (*s != '[' && *s != '{')
5727 PL_lex_state = LEX_INTERPEND;
5734 if (PL_lex_brackets > 100) {
5735 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5737 switch (PL_expect) {
5740 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5741 PL_lex_allbrackets++;
5742 OPERATOR(HASHBRACK);
5744 while (s < PL_bufend && SPACE_OR_TAB(*s))
5747 PL_tokenbuf[0] = '\0';
5748 if (d < PL_bufend && *d == '-') {
5749 PL_tokenbuf[0] = '-';
5751 while (d < PL_bufend && SPACE_OR_TAB(*d))
5754 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
5755 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5757 while (d < PL_bufend && SPACE_OR_TAB(*d))
5760 const char minus = (PL_tokenbuf[0] == '-');
5761 s = force_word(s + minus, WORD, FALSE, TRUE);
5769 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5770 PL_lex_allbrackets++;
5775 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5776 PL_lex_allbrackets++;
5780 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5781 PL_lex_allbrackets++;
5786 if (PL_oldoldbufptr == PL_last_lop)
5787 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5789 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5790 PL_lex_allbrackets++;
5793 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5795 /* This hack is to get the ${} in the message. */
5797 yyerror("syntax error");
5800 OPERATOR(HASHBRACK);
5802 if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
5803 /* ${...} or @{...} etc., but not print {...}
5804 * Skip the disambiguation and treat this as a block.
5806 goto block_expectation;
5808 /* This hack serves to disambiguate a pair of curlies
5809 * as being a block or an anon hash. Normally, expectation
5810 * determines that, but in cases where we're not in a
5811 * position to expect anything in particular (like inside
5812 * eval"") we have to resolve the ambiguity. This code
5813 * covers the case where the first term in the curlies is a
5814 * quoted string. Most other cases need to be explicitly
5815 * disambiguated by prepending a "+" before the opening
5816 * curly in order to force resolution as an anon hash.
5818 * XXX should probably propagate the outer expectation
5819 * into eval"" to rely less on this hack, but that could
5820 * potentially break current behavior of eval"".
5824 if (*s == '\'' || *s == '"' || *s == '`') {
5825 /* common case: get past first string, handling escapes */
5826 for (t++; t < PL_bufend && *t != *s;)
5831 else if (*s == 'q') {
5834 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5835 && !isWORDCHAR(*t))))
5837 /* skip q//-like construct */
5839 char open, close, term;
5842 while (t < PL_bufend && isSPACE(*t))
5844 /* check for q => */
5845 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5846 OPERATOR(HASHBRACK);
5850 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5854 for (t++; t < PL_bufend; t++) {
5855 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5857 else if (*t == open)
5861 for (t++; t < PL_bufend; t++) {
5862 if (*t == '\\' && t+1 < PL_bufend)
5864 else if (*t == close && --brackets <= 0)
5866 else if (*t == open)
5873 /* skip plain q word */
5874 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
5875 t += UTF ? UTF8SKIP(t) : 1;
5877 else if (isWORDCHAR_lazy_if(t,UTF)) {
5878 t += UTF ? UTF8SKIP(t) : 1;
5879 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
5880 t += UTF ? UTF8SKIP(t) : 1;
5882 while (t < PL_bufend && isSPACE(*t))
5884 /* if comma follows first term, call it an anon hash */
5885 /* XXX it could be a comma expression with loop modifiers */
5886 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
5887 || (*t == '=' && t[1] == '>')))
5888 OPERATOR(HASHBRACK);
5889 if (PL_expect == XREF)
5892 /* If there is an opening brace or 'sub:', treat it
5893 as a term to make ${{...}}{k} and &{sub:attr...}
5894 dwim. Otherwise, treat it as a statement, so
5895 map {no strict; ...} works.
5902 if (strnEQ(s, "sub", 3)) {
5913 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5919 pl_yylval.ival = CopLINE(PL_curcop);
5920 PL_copline = NOLINE; /* invalidate current command line number */
5921 TOKEN(formbrack ? '=' : '{');
5923 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5927 if (PL_lex_brackets <= 0)
5928 /* diag_listed_as: Unmatched right %s bracket */
5929 yyerror("Unmatched right curly bracket");
5931 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
5932 PL_lex_allbrackets--;
5933 if (PL_lex_state == LEX_INTERPNORMAL) {
5934 if (PL_lex_brackets == 0) {
5935 if (PL_expect & XFAKEBRACK) {
5936 PL_expect &= XENUMMASK;
5937 PL_lex_state = LEX_INTERPEND;
5939 return yylex(); /* ignore fake brackets */
5941 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5942 && SvEVALED(PL_lex_repl))
5943 PL_lex_state = LEX_INTERPEND;
5944 else if (*s == '-' && s[1] == '>')
5945 PL_lex_state = LEX_INTERPENDMAYBE;
5946 else if (*s != '[' && *s != '{')
5947 PL_lex_state = LEX_INTERPEND;
5950 if (PL_expect & XFAKEBRACK) {
5951 PL_expect &= XENUMMASK;
5953 return yylex(); /* ignore fake brackets */
5955 force_next(formbrack ? '.' : '}');
5956 if (formbrack) LEAVE;
5957 if (formbrack == 2) { /* means . where arguments were expected */
5963 if (PL_expect == XPOSTDEREF) POSTDEREF('&');
5966 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5967 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5974 if (PL_expect == XOPERATOR) {
5975 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5976 && isIDFIRST_lazy_if(s,UTF))
5978 CopLINE_dec(PL_curcop);
5979 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
5980 CopLINE_inc(PL_curcop);
5983 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
5985 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5986 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5992 PL_parser->saw_infix_sigil = 1;
5993 BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
5999 PL_tokenbuf[0] = '&';
6000 s = scan_ident(s - 1, PL_tokenbuf + 1,
6001 sizeof PL_tokenbuf - 1, TRUE);
6002 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6003 if (PL_tokenbuf[1]) {
6004 force_ident_maybe_lex('&');
6013 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6014 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6022 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6024 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6025 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6029 BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
6033 const char tmp = *s++;
6035 if (!PL_lex_allbrackets
6036 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6044 if (!PL_lex_allbrackets
6045 && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
6054 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6055 && strchr("+-*/%.^&|<",tmp))
6056 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6057 "Reversed %c= operator",(int)tmp);
6059 if (PL_expect == XSTATE
6061 && (s == PL_linestart+1 || s[-2] == '\n') )
6063 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6064 || PL_lex_state != LEX_NORMAL) {
6069 if (strnEQ(s,"=cut",4)) {
6083 PL_parser->in_pod = 1;
6087 if (PL_expect == XBLOCK) {
6089 #ifdef PERL_STRICT_CR
6090 while (SPACE_OR_TAB(*t))
6092 while (SPACE_OR_TAB(*t) || *t == '\r')
6095 if (*t == '\n' || *t == '#') {
6098 SAVEI8(PL_parser->form_lex_state);
6099 SAVEI32(PL_lex_formbrack);
6100 PL_parser->form_lex_state = PL_lex_state;
6101 PL_lex_formbrack = PL_lex_brackets + 1;
6105 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6114 const char tmp = *s++;
6116 /* was this !=~ where !~ was meant?
6117 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6119 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6120 const char *t = s+1;
6122 while (t < PL_bufend && isSPACE(*t))
6125 if (*t == '/' || *t == '?'
6126 || ((*t == 'm' || *t == 's' || *t == 'y')
6127 && !isWORDCHAR(t[1]))
6128 || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6129 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6130 "!=~ should be !~");
6132 if (!PL_lex_allbrackets
6133 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6146 if (PL_expect != XOPERATOR) {
6147 if (s[1] != '<' && !strchr(s,'>'))
6149 if (s[1] == '<' && s[2] != '>')
6150 s = scan_heredoc(s);
6152 s = scan_inputsymbol(s);
6153 PL_expect = XOPERATOR;
6154 TOKEN(sublex_start());
6160 if (*s == '=' && !PL_lex_allbrackets
6161 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6166 SHop(OP_LEFT_SHIFT);
6171 if (!PL_lex_allbrackets
6172 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6180 if (!PL_lex_allbrackets
6181 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6190 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6198 const char tmp = *s++;
6200 if (*s == '=' && !PL_lex_allbrackets
6201 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6206 SHop(OP_RIGHT_SHIFT);
6208 else if (tmp == '=') {
6209 if (!PL_lex_allbrackets
6210 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6219 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6228 if (PL_expect == XOPERATOR) {
6229 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6230 return deprecate_commaless_var_list();
6233 else if (PL_expect == XPOSTDEREF) {
6236 POSTDEREF(DOLSHARP);
6241 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6242 PL_tokenbuf[0] = '@';
6243 s = scan_ident(s + 1, PL_tokenbuf + 1,
6244 sizeof PL_tokenbuf - 1, FALSE);
6245 if (PL_expect == XOPERATOR) {
6247 if (PL_bufptr > s) {
6249 PL_bufptr = PL_oldbufptr;
6251 no_op("Array length", d);
6253 if (!PL_tokenbuf[1])
6255 PL_expect = XOPERATOR;
6256 force_ident_maybe_lex('#');
6260 PL_tokenbuf[0] = '$';
6261 s = scan_ident(s, PL_tokenbuf + 1,
6262 sizeof PL_tokenbuf - 1, FALSE);
6263 if (PL_expect == XOPERATOR) {
6265 if (PL_bufptr > s) {
6267 PL_bufptr = PL_oldbufptr;
6271 if (!PL_tokenbuf[1]) {
6273 yyerror("Final $ should be \\$ or $name");
6279 const char tmp = *s;
6280 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6283 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6284 && intuit_more(s)) {
6286 PL_tokenbuf[0] = '@';
6287 if (ckWARN(WARN_SYNTAX)) {
6290 while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
6291 t += UTF ? UTF8SKIP(t) : 1;
6293 PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
6294 while (t < PL_bufend && *t != ']')
6296 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6297 "Multidimensional syntax %"UTF8f" not supported",
6298 UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
6302 else if (*s == '{') {
6304 PL_tokenbuf[0] = '%';
6305 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6306 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6308 char tmpbuf[sizeof PL_tokenbuf];
6311 } while (isSPACE(*t));
6312 if (isIDFIRST_lazy_if(t,UTF)) {
6314 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6319 && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6320 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6321 "You need to quote \"%"UTF8f"\"",
6322 UTF8fARG(UTF, len, tmpbuf));
6328 PL_expect = XOPERATOR;
6329 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6330 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6331 if (!islop || PL_last_lop_op == OP_GREPSTART)
6332 PL_expect = XOPERATOR;
6333 else if (strchr("$@\"'`q", *s))
6334 PL_expect = XTERM; /* e.g. print $fh "foo" */
6335 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6336 PL_expect = XTERM; /* e.g. print $fh &sub */
6337 else if (isIDFIRST_lazy_if(s,UTF)) {
6338 char tmpbuf[sizeof PL_tokenbuf];
6340 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6341 if ((t2 = keyword(tmpbuf, len, 0))) {
6342 /* binary operators exclude handle interpretations */
6354 PL_expect = XTERM; /* e.g. print $fh length() */
6359 PL_expect = XTERM; /* e.g. print $fh subr() */
6362 else if (isDIGIT(*s))
6363 PL_expect = XTERM; /* e.g. print $fh 3 */
6364 else if (*s == '.' && isDIGIT(s[1]))
6365 PL_expect = XTERM; /* e.g. print $fh .3 */
6366 else if ((*s == '?' || *s == '-' || *s == '+')
6367 && !isSPACE(s[1]) && s[1] != '=')
6368 PL_expect = XTERM; /* e.g. print $fh -1 */
6369 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6371 PL_expect = XTERM; /* e.g. print $fh /.../
6372 XXX except DORDOR operator
6374 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6376 PL_expect = XTERM; /* print $fh <<"EOF" */
6379 force_ident_maybe_lex('$');
6383 if (PL_expect == XPOSTDEREF)
6385 PL_tokenbuf[0] = '@';
6386 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6387 if (PL_expect == XOPERATOR) {
6389 if (PL_bufptr > s) {
6391 PL_bufptr = PL_oldbufptr;
6396 if (!PL_tokenbuf[1]) {
6399 if (PL_lex_state == LEX_NORMAL)
6401 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6403 PL_tokenbuf[0] = '%';
6405 /* Warn about @ where they meant $. */
6406 if (*s == '[' || *s == '{') {
6407 if (ckWARN(WARN_SYNTAX)) {
6408 S_check_scalar_slice(aTHX_ s);
6412 PL_expect = XOPERATOR;
6413 force_ident_maybe_lex('@');
6416 case '/': /* may be division, defined-or, or pattern */
6417 if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6418 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6419 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6424 else if (PL_expect == XOPERATOR) {
6426 if (*s == '=' && !PL_lex_allbrackets
6427 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6435 /* Disable warning on "study /blah/" */
6436 if (PL_oldoldbufptr == PL_last_uni
6437 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6438 || memNE(PL_last_uni, "study", 5)
6439 || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
6442 s = scan_pat(s,OP_MATCH);
6443 TERM(sublex_start());
6446 case '?': /* conditional */
6448 if (!PL_lex_allbrackets
6449 && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
6454 PL_lex_allbrackets++;
6458 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6459 #ifdef PERL_STRICT_CR
6462 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6464 && (s == PL_linestart || s[-1] == '\n') )
6467 formbrack = 2; /* dot seen where arguments expected */
6470 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6474 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6477 if (!PL_lex_allbrackets
6478 && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
6486 pl_yylval.ival = OPf_SPECIAL;
6492 if (*s == '=' && !PL_lex_allbrackets
6493 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6501 case '0': case '1': case '2': case '3': case '4':
6502 case '5': case '6': case '7': case '8': case '9':
6503 s = scan_num(s, &pl_yylval);
6504 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6505 if (PL_expect == XOPERATOR)
6510 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6513 COPLINE_SET_FROM_MULTI_END;
6514 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6515 if (PL_expect == XOPERATOR) {
6516 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6517 return deprecate_commaless_var_list();
6522 pl_yylval.ival = OP_CONST;
6523 TERM(sublex_start());
6526 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6529 printbuf("### Saw string before %s\n", s);
6531 PerlIO_printf(Perl_debug_log,
6532 "### Saw unterminated string\n");
6534 if (PL_expect == XOPERATOR) {
6535 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6536 return deprecate_commaless_var_list();
6543 pl_yylval.ival = OP_CONST;
6544 /* FIXME. I think that this can be const if char *d is replaced by
6545 more localised variables. */
6546 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6547 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6548 pl_yylval.ival = OP_STRINGIFY;
6552 if (pl_yylval.ival == OP_CONST)
6553 COPLINE_SET_FROM_MULTI_END;
6554 TERM(sublex_start());
6557 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6560 printbuf("### Saw backtick string before %s\n", s);
6562 PerlIO_printf(Perl_debug_log,
6563 "### Saw unterminated backtick string\n");
6565 if (PL_expect == XOPERATOR)
6566 no_op("Backticks",s);
6569 pl_yylval.ival = OP_BACKTICK;
6570 TERM(sublex_start());
6574 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6576 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6578 if (PL_expect == XOPERATOR)
6579 no_op("Backslash",s);
6583 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6584 char *start = s + 2;
6585 while (isDIGIT(*start) || *start == '_')
6587 if (*start == '.' && isDIGIT(start[1])) {
6588 s = scan_num(s, &pl_yylval);
6591 else if ((*start == ':' && start[1] == ':')
6592 || (PL_expect == XSTATE && *start == ':'))
6594 else if (PL_expect == XSTATE) {
6596 while (d < PL_bufend && isSPACE(*d)) d++;
6597 if (*d == ':') goto keylookup;
6599 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6600 if (!isALPHA(*start) && (PL_expect == XTERM
6601 || PL_expect == XREF || PL_expect == XSTATE
6602 || PL_expect == XTERMORDORDOR)) {
6603 GV *const gv = gv_fetchpvn_flags(s, start - s,
6604 UTF ? SVf_UTF8 : 0, SVt_PVCV);
6606 s = scan_num(s, &pl_yylval);
6613 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
6666 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
6668 /* Some keywords can be followed by any delimiter, including ':' */
6669 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
6671 /* x::* is just a word, unless x is "CORE" */
6672 if (!anydelim && *s == ':' && s[1] == ':') {
6673 if (strEQ(PL_tokenbuf, "CORE")) goto case_KEY_CORE;
6678 while (d < PL_bufend && isSPACE(*d))
6679 d++; /* no comments skipped here, or s### is misparsed */
6681 /* Is this a word before a => operator? */
6682 if (*d == '=' && d[1] == '>') {
6686 = (OP*)newSVOP(OP_CONST, 0,
6687 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6688 pl_yylval.opval->op_private = OPpCONST_BARE;
6692 /* Check for plugged-in keyword */
6696 char *saved_bufptr = PL_bufptr;
6698 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
6700 if (result == KEYWORD_PLUGIN_DECLINE) {
6701 /* not a plugged-in keyword */
6702 PL_bufptr = saved_bufptr;
6703 } else if (result == KEYWORD_PLUGIN_STMT) {
6704 pl_yylval.opval = o;
6706 if (!PL_nexttoke) PL_expect = XSTATE;
6707 return REPORT(PLUGSTMT);
6708 } else if (result == KEYWORD_PLUGIN_EXPR) {
6709 pl_yylval.opval = o;
6711 if (!PL_nexttoke) PL_expect = XOPERATOR;
6712 return REPORT(PLUGEXPR);
6714 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6719 /* Check for built-in keyword */
6720 tmp = keyword(PL_tokenbuf, len, 0);
6722 /* Is this a label? */
6723 if (!anydelim && PL_expect == XSTATE
6724 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
6726 pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
6727 pl_yylval.pval[len] = '\0';
6728 pl_yylval.pval[len+1] = UTF ? 1 : 0;
6733 /* Check for lexical sub */
6734 if (PL_expect != XOPERATOR) {
6735 char tmpbuf[sizeof PL_tokenbuf + 1];
6737 Copy(PL_tokenbuf, tmpbuf+1, len, char);
6738 off = pad_findmy_pvn(tmpbuf, len+1, 0);
6739 if (off != NOT_IN_PAD) {
6740 assert(off); /* we assume this is boolean-true below */
6741 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
6742 HV * const stash = PAD_COMPNAME_OURSTASH(off);
6743 HEK * const stashname = HvNAME_HEK(stash);
6744 sv = newSVhek(stashname);
6745 sv_catpvs(sv, "::");
6746 sv_catpvn_flags(sv, PL_tokenbuf, len,
6747 (UTF ? SV_CATUTF8 : SV_CATBYTES));
6748 gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
6758 rv2cv_op = newOP(OP_PADANY, 0);
6759 rv2cv_op->op_targ = off;
6760 cv = find_lexical_cv(off);
6768 if (tmp < 0) { /* second-class keyword? */
6769 GV *ogv = NULL; /* override (winner) */
6770 GV *hgv = NULL; /* hidden (loser) */
6771 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
6773 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6774 (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
6776 && (cv = GvCVu(gv)))
6778 if (GvIMPORTED_CV(gv))
6780 else if (! CvMETHOD(cv))
6784 && (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
6787 && (isGV_with_GP(gv)
6788 ? GvCVu(gv) && GvIMPORTED_CV(gv)
6789 : SvPCS_IMPORTED(gv)
6790 && (gv_init(gv, PL_globalstash, PL_tokenbuf,
6798 tmp = 0; /* overridden by import or by GLOBAL */
6801 && -tmp==KEY_lock /* XXX generalizable kludge */
6804 tmp = 0; /* any sub overrides "weak" keyword */
6806 else { /* no override */
6808 if (tmp == KEY_dump) {
6809 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6810 "dump() better written as CORE::dump()");
6814 if (hgv && tmp != KEY_x) /* never ambiguous */
6815 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6816 "Ambiguous call resolved as CORE::%s(), "
6817 "qualify as such or use &",
6822 if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
6823 && (!anydelim || *s != '#')) {
6824 /* no override, and not s### either; skipspace is safe here
6825 * check for => on following line */
6827 STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
6828 STRLEN soff = s - SvPVX(PL_linestr);
6829 s = skipspace_flags(s, LEX_NO_INCLINE);
6830 arrow = *s == '=' && s[1] == '>';
6831 PL_bufptr = SvPVX(PL_linestr) + bufoff;
6832 s = SvPVX(PL_linestr) + soff;
6840 default: /* not a keyword */
6841 /* Trade off - by using this evil construction we can pull the
6842 variable gv into the block labelled keylookup. If not, then
6843 we have to give it function scope so that the goto from the
6844 earlier ':' case doesn't bypass the initialisation. */
6846 just_a_word_zero_gv:
6858 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
6862 /* Get the rest if it looks like a package qualifier */
6864 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
6866 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
6869 Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
6870 UTF8fARG(UTF, len, PL_tokenbuf),
6871 *s == '\'' ? "'" : "::");
6876 if (PL_expect == XOPERATOR) {
6877 if (PL_bufptr == PL_linestart) {
6878 CopLINE_dec(PL_curcop);
6879 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6880 CopLINE_inc(PL_curcop);
6883 no_op("Bareword",s);
6886 /* See if the name is "Foo::",
6887 in which case Foo is a bareword
6888 (and a package name). */
6891 && PL_tokenbuf[len - 2] == ':'
6892 && PL_tokenbuf[len - 1] == ':')
6894 if (ckWARN(WARN_BAREWORD)
6895 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
6896 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
6897 "Bareword \"%"UTF8f"\" refers to nonexistent package",
6898 UTF8fARG(UTF, len, PL_tokenbuf));
6900 PL_tokenbuf[len] = '\0';
6909 /* if we saw a global override before, get the right name */
6912 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6915 SV * const tmp_sv = sv;
6916 sv = newSVpvs("CORE::GLOBAL::");
6917 sv_catsv(sv, tmp_sv);
6918 SvREFCNT_dec(tmp_sv);
6922 /* Presume this is going to be a bareword of some sort. */
6924 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6925 pl_yylval.opval->op_private = OPpCONST_BARE;
6927 /* And if "Foo::", then that's what it certainly is. */
6933 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
6934 const_op->op_private = OPpCONST_BARE;
6936 newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
6940 : SvROK(gv) && SvTYPE(SvRV(gv)) == SVt_PVCV
6943 : rv2cv_op_cv(rv2cv_op, RV2CVOPCV_RETURN_STUB);
6946 /* Use this var to track whether intuit_method has been
6947 called. intuit_method returns 0 or > 255. */
6950 /* See if it's the indirect object for a list operator. */
6953 && PL_oldoldbufptr < PL_bufptr
6954 && (PL_oldoldbufptr == PL_last_lop
6955 || PL_oldoldbufptr == PL_last_uni)
6956 && /* NO SKIPSPACE BEFORE HERE! */
6958 || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
6961 bool immediate_paren = *s == '(';
6963 /* (Now we can afford to cross potential line boundary.) */
6966 /* Two barewords in a row may indicate method call. */
6968 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$')
6969 && (tmp = intuit_method(s, lex ? NULL : sv, cv)))
6974 /* If not a declared subroutine, it's an indirect object. */
6975 /* (But it's an indir obj regardless for sort.) */
6976 /* Also, if "_" follows a filetest operator, it's a bareword */
6979 ( !immediate_paren && (PL_last_lop_op == OP_SORT
6981 && (PL_last_lop_op != OP_MAPSTART
6982 && PL_last_lop_op != OP_GREPSTART))))
6983 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6984 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
6988 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
6993 PL_expect = XOPERATOR;
6996 /* Is this a word before a => operator? */
6997 if (*s == '=' && s[1] == '>' && !pkgname) {
7000 if (gvp || (lex && !off)) {
7001 assert (cSVOPx(pl_yylval.opval)->op_sv == sv);
7002 /* This is our own scalar, created a few lines
7003 above, so this is safe. */
7005 sv_setpv(sv, PL_tokenbuf);
7006 if (UTF && !IN_BYTES
7007 && is_utf8_string((U8*)PL_tokenbuf, len))
7014 /* If followed by a paren, it's certainly a subroutine. */
7019 while (SPACE_OR_TAB(*d))
7021 if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
7026 NEXTVAL_NEXTTOKE.opval =
7027 off ? rv2cv_op : pl_yylval.opval;
7029 op_free(pl_yylval.opval), force_next(PRIVATEREF);
7030 else op_free(rv2cv_op), force_next(WORD);
7035 /* If followed by var or block, call it a method (unless sub) */
7037 if ((*s == '$' || *s == '{') && !cv) {
7039 PL_last_lop = PL_oldbufptr;
7040 PL_last_lop_op = OP_METHOD;
7041 if (!PL_lex_allbrackets
7042 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7044 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7046 PL_expect = XBLOCKTERM;
7048 return REPORT(METHOD);
7051 /* If followed by a bareword, see if it looks like indir obj. */
7053 if (tmp == 1 && !orig_keyword
7054 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
7055 && (tmp = intuit_method(s, lex ? NULL : sv, cv))) {
7058 assert(cSVOPx(pl_yylval.opval)->op_sv == sv);
7060 sv_setpvn(sv, PL_tokenbuf, len);
7061 if (UTF && !IN_BYTES
7062 && is_utf8_string((U8*)PL_tokenbuf, len))
7064 else SvUTF8_off(sv);
7067 if (tmp == METHOD && !PL_lex_allbrackets
7068 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7070 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7075 /* Not a method, so call it a subroutine (if defined) */
7078 /* Check for a constant sub */
7079 if ((sv = cv_const_sv_or_av(cv))) {
7082 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7083 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7084 if (SvTYPE(sv) == SVt_PVAV)
7085 pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7088 pl_yylval.opval->op_private = 0;
7089 pl_yylval.opval->op_folded = 1;
7090 pl_yylval.opval->op_flags |= OPf_SPECIAL;
7095 op_free(pl_yylval.opval);
7097 off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7098 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7099 PL_last_lop = PL_oldbufptr;
7100 PL_last_lop_op = OP_ENTERSUB;
7101 /* Is there a prototype? */
7105 STRLEN protolen = CvPROTOLEN(cv);
7106 const char *proto = CvPROTO(cv);
7108 proto = S_strip_spaces(aTHX_ proto, &protolen);
7111 if ((optional = *proto == ';'))
7114 while (*proto == ';');
7118 *proto == '$' || *proto == '_'
7119 || *proto == '*' || *proto == '+'
7124 *proto == '\\' && proto[1] && proto[2] == '\0'
7127 UNIPROTO(UNIOPSUB,optional);
7128 if (*proto == '\\' && proto[1] == '[') {
7129 const char *p = proto + 2;
7130 while(*p && *p != ']')
7132 if(*p == ']' && !p[1])
7133 UNIPROTO(UNIOPSUB,optional);
7135 if (*proto == '&' && *s == '{') {
7137 sv_setpvs(PL_subname, "__ANON__");
7139 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7140 if (!PL_lex_allbrackets
7141 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7143 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7148 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7150 force_next(off ? PRIVATEREF : WORD);
7151 if (!PL_lex_allbrackets
7152 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7154 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7159 /* Call it a bare word */
7161 if (PL_hints & HINT_STRICT_SUBS)
7162 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7165 /* after "print" and similar functions (corresponding to
7166 * "F? L" in opcode.pl), whatever wasn't already parsed as
7167 * a filehandle should be subject to "strict subs".
7168 * Likewise for the optional indirect-object argument to system
7169 * or exec, which can't be a bareword */
7170 if ((PL_last_lop_op == OP_PRINT
7171 || PL_last_lop_op == OP_PRTF
7172 || PL_last_lop_op == OP_SAY
7173 || PL_last_lop_op == OP_SYSTEM
7174 || PL_last_lop_op == OP_EXEC)
7175 && (PL_hints & HINT_STRICT_SUBS))
7176 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7177 if (lastchar != '-') {
7178 if (ckWARN(WARN_RESERVED)) {
7182 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7184 /* PL_warn_reserved is constant */
7185 GCC_DIAG_IGNORE(-Wformat-nonliteral);
7186 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7196 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7197 && saw_infix_sigil) {
7198 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7199 "Operator or semicolon missing before %c%"UTF8f,
7201 UTF8fARG(UTF, strlen(PL_tokenbuf),
7203 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7204 "Ambiguous use of %c resolved as operator %c",
7205 lastchar, lastchar);
7212 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7217 (OP*)newSVOP(OP_CONST, 0,
7218 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7221 case KEY___PACKAGE__:
7223 (OP*)newSVOP(OP_CONST, 0,
7225 ? newSVhek(HvNAME_HEK(PL_curstash))
7232 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7233 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
7236 gv = (GV *)*hv_fetchs(stash, "DATA", 1);
7238 gv_init(gv,stash,"DATA",4,0);
7241 GvIOp(gv) = newIO();
7242 IoIFP(GvIOp(gv)) = PL_rsfp;
7243 #if defined(HAS_FCNTL) && defined(F_SETFD) && defined(FD_CLOEXEC)
7245 const int fd = PerlIO_fileno(PL_rsfp);
7247 fcntl(fd,F_SETFD, FD_CLOEXEC);
7251 /* Mark this internal pseudo-handle as clean */
7252 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7253 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7254 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7256 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7257 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7258 /* if the script was opened in binmode, we need to revert
7259 * it to text mode for compatibility; but only iff it has CRs
7260 * XXX this is a questionable hack at best. */
7261 if (PL_bufend-PL_bufptr > 2
7262 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7265 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7266 loc = PerlIO_tell(PL_rsfp);
7267 (void)PerlIO_seek(PL_rsfp, 0L, 0);
7270 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7272 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7273 #endif /* NETWARE */
7275 PerlIO_seek(PL_rsfp, loc, 0);
7279 #ifdef PERLIO_LAYERS
7282 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7283 else if (IN_ENCODING) {
7289 XPUSHs(_get_encoding());
7291 call_method("name", G_SCALAR);
7295 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7296 Perl_form(aTHX_ ":encoding(%"SVf")",
7309 FUN0OP(CvCLONE(PL_compcv)
7310 ? newOP(OP_RUNCV, 0)
7311 : newPVOP(OP_RUNCV,0,NULL));
7320 if (PL_expect == XSTATE) {
7331 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7332 if ((*s == ':' && s[1] == ':')
7333 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7337 Copy(PL_bufptr, PL_tokenbuf, olen, char);
7341 Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
7342 UTF8fARG(UTF, len, PL_tokenbuf));
7345 else if (tmp == KEY_require || tmp == KEY_do
7347 /* that's a way to remember we saw "CORE::" */
7359 LOP(OP_ACCEPT,XTERM);
7362 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7367 LOP(OP_ATAN2,XTERM);
7373 LOP(OP_BINMODE,XTERM);
7376 LOP(OP_BLESS,XTERM);
7385 /* We have to disambiguate the two senses of
7386 "continue". If the next token is a '{' then
7387 treat it as the start of a continue block;
7388 otherwise treat it as a control operator.
7398 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7408 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7417 if (!PL_cryptseen) {
7418 PL_cryptseen = TRUE;
7422 LOP(OP_CRYPT,XTERM);
7425 LOP(OP_CHMOD,XTERM);
7428 LOP(OP_CHOWN,XTERM);
7431 LOP(OP_CONNECT,XTERM);
7451 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7453 if (len && (len != 4 || strNE(PL_tokenbuf+1, "CORE"))
7454 && !keyword(PL_tokenbuf + 1, len, 0)) {
7457 force_ident_maybe_lex('&');
7462 if (orig_keyword == KEY_do) {
7471 PL_hints |= HINT_BLOCK_SCOPE;
7481 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7482 STR_WITH_LEN("NDBM_File::"),
7483 STR_WITH_LEN("DB_File::"),
7484 STR_WITH_LEN("GDBM_File::"),
7485 STR_WITH_LEN("SDBM_File::"),
7486 STR_WITH_LEN("ODBM_File::"),
7488 LOP(OP_DBMOPEN,XTERM);
7500 pl_yylval.ival = CopLINE(PL_curcop);
7504 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7516 if (*s == '{') { /* block eval */
7517 PL_expect = XTERMBLOCK;
7518 UNIBRACK(OP_ENTERTRY);
7520 else { /* string eval */
7522 UNIBRACK(OP_ENTEREVAL);
7527 UNIBRACK(-OP_ENTEREVAL);
7541 case KEY_endhostent:
7547 case KEY_endservent:
7550 case KEY_endprotoent:
7561 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7563 pl_yylval.ival = CopLINE(PL_curcop);
7565 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
7568 if ((PL_bufend - p) >= 3
7569 && strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7573 else if ((PL_bufend - p) >= 4
7574 && strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7577 /* skip optional package name, as in "for my abc $x (..)" */
7578 if (isIDFIRST_lazy_if(p,UTF)) {
7579 p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7583 Perl_croak(aTHX_ "Missing $ on loop variable");
7588 LOP(OP_FORMLINE,XTERM);
7597 LOP(OP_FCNTL,XTERM);
7603 LOP(OP_FLOCK,XTERM);
7606 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7611 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7616 LOP(OP_GREPSTART, XREF);
7633 case KEY_getpriority:
7634 LOP(OP_GETPRIORITY,XTERM);
7636 case KEY_getprotobyname:
7639 case KEY_getprotobynumber:
7640 LOP(OP_GPBYNUMBER,XTERM);
7642 case KEY_getprotoent:
7654 case KEY_getpeername:
7655 UNI(OP_GETPEERNAME);
7657 case KEY_gethostbyname:
7660 case KEY_gethostbyaddr:
7661 LOP(OP_GHBYADDR,XTERM);
7663 case KEY_gethostent:
7666 case KEY_getnetbyname:
7669 case KEY_getnetbyaddr:
7670 LOP(OP_GNBYADDR,XTERM);
7675 case KEY_getservbyname:
7676 LOP(OP_GSBYNAME,XTERM);
7678 case KEY_getservbyport:
7679 LOP(OP_GSBYPORT,XTERM);
7681 case KEY_getservent:
7684 case KEY_getsockname:
7685 UNI(OP_GETSOCKNAME);
7687 case KEY_getsockopt:
7688 LOP(OP_GSOCKOPT,XTERM);
7703 pl_yylval.ival = CopLINE(PL_curcop);
7704 Perl_ck_warner_d(aTHX_
7705 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
7706 "given is experimental");
7711 orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB,
7719 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7721 pl_yylval.ival = CopLINE(PL_curcop);
7725 LOP(OP_INDEX,XTERM);
7731 LOP(OP_IOCTL,XTERM);
7759 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7764 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7778 LOP(OP_LISTEN,XTERM);
7787 s = scan_pat(s,OP_MATCH);
7788 TERM(sublex_start());
7791 LOP(OP_MAPSTART, XREF);
7794 LOP(OP_MKDIR,XTERM);
7797 LOP(OP_MSGCTL,XTERM);
7800 LOP(OP_MSGGET,XTERM);
7803 LOP(OP_MSGRCV,XTERM);
7806 LOP(OP_MSGSND,XTERM);
7812 yyerror(Perl_form(aTHX_
7813 "Can't redeclare \"%s\" in \"%s\"",
7814 tmp == KEY_my ? "my" :
7815 tmp == KEY_state ? "state" : "our",
7816 PL_in_my == KEY_my ? "my" :
7817 PL_in_my == KEY_state ? "state" : "our"));
7819 PL_in_my = (U16)tmp;
7821 if (isIDFIRST_lazy_if(s,UTF)) {
7822 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7823 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7825 if (!FEATURE_LEXSUBS_IS_ENABLED)
7827 "Experimental \"%s\" subs not enabled",
7828 tmp == KEY_my ? "my" :
7829 tmp == KEY_state ? "state" : "our");
7830 Perl_ck_warner_d(aTHX_
7831 packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
7832 "The lexical_subs feature is experimental");
7835 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7836 if (!PL_in_my_stash) {
7840 len = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7841 PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(tmpbuf));
7842 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7852 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7857 s = tokenize_use(0, s);
7861 if (*s == '(' || (s = skipspace(s), *s == '('))
7864 if (!PL_lex_allbrackets
7865 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7867 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7874 if (isIDFIRST_lazy_if(s,UTF)) {
7876 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
7878 for (t=d; isSPACE(*t);)
7880 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7882 && !(t[0] == '=' && t[1] == '>')
7883 && !(t[0] == ':' && t[1] == ':')
7884 && !keyword(s, d-s, 0)
7886 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7887 "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
7888 UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
7894 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7896 pl_yylval.ival = OP_OR;
7906 LOP(OP_OPEN_DIR,XTERM);
7909 checkcomma(s,PL_tokenbuf,"filehandle");
7913 checkcomma(s,PL_tokenbuf,"filehandle");
7932 s = force_word(s,WORD,FALSE,TRUE);
7934 s = force_strict_version(s);
7938 LOP(OP_PIPE_OP,XTERM);
7941 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7944 COPLINE_SET_FROM_MULTI_END;
7945 pl_yylval.ival = OP_CONST;
7946 TERM(sublex_start());
7953 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7956 COPLINE_SET_FROM_MULTI_END;
7957 PL_expect = XOPERATOR;
7958 if (SvCUR(PL_lex_stuff)) {
7959 int warned_comma = !ckWARN(WARN_QW);
7960 int warned_comment = warned_comma;
7961 d = SvPV_force(PL_lex_stuff, len);
7963 for (; isSPACE(*d) && len; --len, ++d)
7968 if (!warned_comma || !warned_comment) {
7969 for (; !isSPACE(*d) && len; --len, ++d) {
7970 if (!warned_comma && *d == ',') {
7971 Perl_warner(aTHX_ packWARN(WARN_QW),
7972 "Possible attempt to separate words with commas");
7975 else if (!warned_comment && *d == '#') {
7976 Perl_warner(aTHX_ packWARN(WARN_QW),
7977 "Possible attempt to put comments in qw() list");
7983 for (; !isSPACE(*d) && len; --len, ++d)
7986 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
7987 words = op_append_elem(OP_LIST, words,
7988 newSVOP(OP_CONST, 0, tokeq(sv)));
7993 words = newNULLLIST();
7994 SvREFCNT_dec_NN(PL_lex_stuff);
7995 PL_lex_stuff = NULL;
7996 PL_expect = XOPERATOR;
7997 pl_yylval.opval = sawparens(words);
8002 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8005 pl_yylval.ival = OP_STRINGIFY;
8006 if (SvIVX(PL_lex_stuff) == '\'')
8007 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
8008 TERM(sublex_start());
8011 s = scan_pat(s,OP_QR);
8012 TERM(sublex_start());
8015 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8018 pl_yylval.ival = OP_BACKTICK;
8019 TERM(sublex_start());
8027 s = force_version(s, FALSE);
8029 else if (*s != 'v' || !isDIGIT(s[1])
8030 || (s = force_version(s, TRUE), *s == 'v'))
8032 *PL_tokenbuf = '\0';
8033 s = force_word(s,WORD,TRUE,TRUE);
8034 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
8035 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8036 GV_ADD | (UTF ? SVf_UTF8 : 0));
8038 yyerror("<> at require-statement should be quotes");
8040 if (orig_keyword == KEY_require) {
8046 PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
8048 PL_last_uni = PL_oldbufptr;
8049 PL_last_lop_op = OP_REQUIRE;
8051 return REPORT( (int)REQUIRE );
8060 LOP(OP_RENAME,XTERM);
8069 LOP(OP_RINDEX,XTERM);
8078 UNIDOR(OP_READLINE);
8081 UNIDOR(OP_BACKTICK);
8090 LOP(OP_REVERSE,XTERM);
8093 UNIDOR(OP_READLINK);
8100 if (pl_yylval.opval)
8101 TERM(sublex_start());
8103 TOKEN(1); /* force error */
8106 checkcomma(s,PL_tokenbuf,"filehandle");
8116 LOP(OP_SELECT,XTERM);
8122 LOP(OP_SEMCTL,XTERM);
8125 LOP(OP_SEMGET,XTERM);
8128 LOP(OP_SEMOP,XTERM);
8134 LOP(OP_SETPGRP,XTERM);
8136 case KEY_setpriority:
8137 LOP(OP_SETPRIORITY,XTERM);
8139 case KEY_sethostent:
8145 case KEY_setservent:
8148 case KEY_setprotoent:
8158 LOP(OP_SEEKDIR,XTERM);
8160 case KEY_setsockopt:
8161 LOP(OP_SSOCKOPT,XTERM);
8167 LOP(OP_SHMCTL,XTERM);
8170 LOP(OP_SHMGET,XTERM);
8173 LOP(OP_SHMREAD,XTERM);
8176 LOP(OP_SHMWRITE,XTERM);
8179 LOP(OP_SHUTDOWN,XTERM);
8188 LOP(OP_SOCKET,XTERM);
8190 case KEY_socketpair:
8191 LOP(OP_SOCKPAIR,XTERM);
8194 checkcomma(s,PL_tokenbuf,"subroutine name");
8197 s = force_word(s,WORD,TRUE,TRUE);
8201 LOP(OP_SPLIT,XTERM);
8204 LOP(OP_SPRINTF,XTERM);
8207 LOP(OP_SPLICE,XTERM);
8222 LOP(OP_SUBSTR,XTERM);
8228 char * const tmpbuf = PL_tokenbuf + 1;
8229 expectation attrful;
8230 bool have_name, have_proto;
8231 const int key = tmp;
8232 SV *format_name = NULL;
8237 if (isIDFIRST_lazy_if(s,UTF)
8239 || (*s == ':' && s[1] == ':'))
8243 attrful = XATTRBLOCK;
8244 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8246 if (key == KEY_format)
8247 format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
8249 if (memchr(tmpbuf, ':', len) || key != KEY_sub
8251 PL_tokenbuf, len + 1, 0
8253 sv_setpvn(PL_subname, tmpbuf, len);
8255 sv_setsv(PL_subname,PL_curstname);
8256 sv_catpvs(PL_subname,"::");
8257 sv_catpvn(PL_subname,tmpbuf,len);
8259 if (SvUTF8(PL_linestr))
8260 SvUTF8_on(PL_subname);
8267 if (key == KEY_my || key == KEY_our || key==KEY_state)
8270 /* diag_listed_as: Missing name in "%s sub" */
8272 "Missing name in \"%s\"", PL_bufptr);
8274 PL_expect = XTERMBLOCK;
8275 attrful = XATTRTERM;
8276 sv_setpvs(PL_subname,"?");
8280 if (key == KEY_format) {
8282 NEXTVAL_NEXTTOKE.opval
8283 = (OP*)newSVOP(OP_CONST,0, format_name);
8284 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
8290 /* Look for a prototype */
8291 if (*s == '(' && !FEATURE_SIGNATURES_IS_ENABLED) {
8292 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8293 COPLINE_SET_FROM_MULTI_END;
8295 Perl_croak(aTHX_ "Prototype not terminated");
8296 (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
8304 if (*s == ':' && s[1] != ':')
8305 PL_expect = attrful;
8306 else if ((*s != '{' && *s != '(') && key != KEY_format) {
8307 assert(key == KEY_sub || key == KEY_AUTOLOAD ||
8308 key == KEY_DESTROY || key == KEY_BEGIN ||
8309 key == KEY_UNITCHECK || key == KEY_CHECK ||
8310 key == KEY_INIT || key == KEY_END ||
8311 key == KEY_my || key == KEY_state ||
8314 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8315 else if (*s != ';' && *s != '}')
8316 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8320 NEXTVAL_NEXTTOKE.opval =
8321 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8322 PL_lex_stuff = NULL;
8327 sv_setpvs(PL_subname, "__ANON__");
8329 sv_setpvs(PL_subname, "__ANON__::__ANON__");
8332 force_ident_maybe_lex('&');
8337 LOP(OP_SYSTEM,XREF);
8340 LOP(OP_SYMLINK,XTERM);
8343 LOP(OP_SYSCALL,XTERM);
8346 LOP(OP_SYSOPEN,XTERM);
8349 LOP(OP_SYSSEEK,XTERM);
8352 LOP(OP_SYSREAD,XTERM);
8355 LOP(OP_SYSWRITE,XTERM);
8360 TERM(sublex_start());
8381 LOP(OP_TRUNCATE,XTERM);
8393 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8395 pl_yylval.ival = CopLINE(PL_curcop);
8399 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8401 pl_yylval.ival = CopLINE(PL_curcop);
8405 LOP(OP_UNLINK,XTERM);
8411 LOP(OP_UNPACK,XTERM);
8414 LOP(OP_UTIME,XTERM);
8420 LOP(OP_UNSHIFT,XTERM);
8423 s = tokenize_use(1, s);
8433 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8435 pl_yylval.ival = CopLINE(PL_curcop);
8436 Perl_ck_warner_d(aTHX_
8437 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8438 "when is experimental");
8442 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8444 pl_yylval.ival = CopLINE(PL_curcop);
8448 PL_hints |= HINT_BLOCK_SCOPE;
8455 LOP(OP_WAITPID,XTERM);
8461 /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8462 * we use the same number on EBCDIC */
8463 gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8467 if (PL_expect == XOPERATOR) {
8468 if (*s == '=' && !PL_lex_allbrackets
8469 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8479 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8481 pl_yylval.ival = OP_XOR;
8490 Looks up an identifier in the pad or in a package
8493 PRIVATEREF if this is a lexical name.
8494 WORD if this belongs to a package.
8497 if we're in a my declaration
8498 croak if they tried to say my($foo::bar)
8499 build the ops for a my() declaration
8500 if it's an access to a my() variable
8501 build ops for access to a my() variable
8502 if in a dq string, and they've said @foo and we can't find @foo
8504 build ops for a bareword
8508 S_pending_ident(pTHX)
8511 const char pit = (char)pl_yylval.ival;
8512 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8513 /* All routes through this function want to know if there is a colon. */
8514 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8516 DEBUG_T({ PerlIO_printf(Perl_debug_log,
8517 "### Pending identifier '%s'\n", PL_tokenbuf); });
8519 /* if we're in a my(), we can't allow dynamics here.
8520 $foo'bar has already been turned into $foo::bar, so
8521 just check for colons.
8523 if it's a legal name, the OP is a PADANY.
8526 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
8528 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8529 "variable %s in \"our\"",
8530 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
8531 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8535 /* "my" variable %s can't be in a package */
8536 /* PL_no_myglob is constant */
8537 GCC_DIAG_IGNORE(-Wformat-nonliteral);
8538 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8539 PL_in_my == KEY_my ? "my" : "state",
8540 *PL_tokenbuf == '&' ? "subroutin" : "variabl",
8542 UTF ? SVf_UTF8 : 0);
8546 pl_yylval.opval = newOP(OP_PADANY, 0);
8547 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8548 UTF ? SVf_UTF8 : 0);
8554 build the ops for accesses to a my() variable.
8559 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8561 if (tmp != NOT_IN_PAD) {
8562 /* might be an "our" variable" */
8563 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8564 /* build ops for a bareword */
8565 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8566 HEK * const stashname = HvNAME_HEK(stash);
8567 SV * const sym = newSVhek(stashname);
8568 sv_catpvs(sym, "::");
8569 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
8570 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8571 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8575 ((PL_tokenbuf[0] == '$') ? SVt_PV
8576 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8581 pl_yylval.opval = newOP(OP_PADANY, 0);
8582 pl_yylval.opval->op_targ = tmp;
8588 Whine if they've said @foo in a doublequoted string,
8589 and @foo isn't a variable we can find in the symbol
8592 if (ckWARN(WARN_AMBIGUOUS)
8594 && PL_lex_state != LEX_NORMAL
8595 && !PL_lex_brackets)
8597 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8598 ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8599 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
8600 /* DO NOT warn for @- and @+ */
8601 && !( PL_tokenbuf[2] == '\0'
8602 && ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8605 /* Downgraded from fatal to warning 20000522 mjd */
8606 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8607 "Possible unintended interpolation of %"UTF8f
8609 UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
8613 /* build ops for a bareword */
8614 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
8615 newSVpvn_flags(PL_tokenbuf + 1,
8617 UTF ? SVf_UTF8 : 0 ));
8618 pl_yylval.opval->op_private = OPpCONST_ENTERED;
8620 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8621 (PL_in_eval ? GV_ADDMULTI : GV_ADD)
8622 | ( UTF ? SVf_UTF8 : 0 ),
8623 ((PL_tokenbuf[0] == '$') ? SVt_PV
8624 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8630 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
8632 PERL_ARGS_ASSERT_CHECKCOMMA;
8634 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
8635 if (ckWARN(WARN_SYNTAX)) {
8638 for (w = s+2; *w && level; w++) {
8646 /* the list of chars below is for end of statements or
8647 * block / parens, boolean operators (&&, ||, //) and branch
8648 * constructs (or, and, if, until, unless, while, err, for).
8649 * Not a very solid hack... */
8650 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
8651 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8652 "%s (...) interpreted as function",name);
8655 while (s < PL_bufend && isSPACE(*s))
8659 while (s < PL_bufend && isSPACE(*s))
8661 if (isIDFIRST_lazy_if(s,UTF)) {
8662 const char * const w = s;
8663 s += UTF ? UTF8SKIP(s) : 1;
8664 while (isWORDCHAR_lazy_if(s,UTF))
8665 s += UTF ? UTF8SKIP(s) : 1;
8666 while (s < PL_bufend && isSPACE(*s))
8671 if (keyword(w, s - w, 0))
8674 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8675 if (gv && GvCVu(gv))
8679 Copy(w, tmpbuf+1, s - w, char);
8681 off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
8682 if (off != NOT_IN_PAD) return;
8684 Perl_croak(aTHX_ "No comma allowed after %s", what);
8689 /* S_new_constant(): do any overload::constant lookup.
8691 Either returns sv, or mortalizes/frees sv and returns a new SV*.
8692 Best used as sv=new_constant(..., sv, ...).
8693 If s, pv are NULL, calls subroutine with one argument,
8694 and <type> is used with error messages only.
8695 <type> is assumed to be well formed UTF-8 */
8698 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8699 SV *sv, SV *pv, const char *type, STRLEN typelen)
8702 HV * table = GvHV(PL_hintgv); /* ^H */
8707 const char *why1 = "", *why2 = "", *why3 = "";
8709 PERL_ARGS_ASSERT_NEW_CONSTANT;
8710 /* We assume that this is true: */
8711 if (*key == 'c') { assert (strEQ(key, "charnames")); }
8714 /* charnames doesn't work well if there have been errors found */
8715 if (PL_error_count > 0 && *key == 'c')
8717 SvREFCNT_dec_NN(sv);
8718 return &PL_sv_undef;
8721 sv_2mortal(sv); /* Parent created it permanently */
8723 || ! (PL_hints & HINT_LOCALIZE_HH)
8724 || ! (cvp = hv_fetch(table, key, keylen, FALSE))
8729 /* Here haven't found what we're looking for. If it is charnames,
8730 * perhaps it needs to be loaded. Try doing that before giving up */
8732 Perl_load_module(aTHX_
8734 newSVpvs("_charnames"),
8735 /* version parameter; no need to specify it, as if
8736 * we get too early a version, will fail anyway,
8737 * not being able to find '_charnames' */
8742 assert(sp == PL_stack_sp);
8743 table = GvHV(PL_hintgv);
8745 && (PL_hints & HINT_LOCALIZE_HH)
8746 && (cvp = hv_fetch(table, key, keylen, FALSE))
8752 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8753 msg = Perl_form(aTHX_
8754 "Constant(%.*s) unknown",
8755 (int)(type ? typelen : len),
8761 why3 = "} is not defined";
8764 msg = Perl_form(aTHX_
8765 /* The +3 is for '\N{'; -4 for that, plus '}' */
8766 "Unknown charname '%.*s'", (int)typelen - 4, type + 3
8770 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
8771 (int)(type ? typelen : len),
8772 (type ? type: s), why1, why2, why3);
8775 yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
8776 return SvREFCNT_inc_simple_NN(sv);
8781 pv = newSVpvn_flags(s, len, SVs_TEMP);
8783 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
8785 typesv = &PL_sv_undef;
8787 PUSHSTACKi(PERLSI_OVERLOAD);
8799 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
8803 /* Check the eval first */
8804 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
8806 const char * errstr;
8807 sv_catpvs(errsv, "Propagated");
8808 errstr = SvPV_const(errsv, errlen);
8809 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
8811 res = SvREFCNT_inc_simple_NN(sv);
8815 SvREFCNT_inc_simple_void_NN(res);
8824 why1 = "Call to &{$^H{";
8826 why3 = "}} did not return a defined value";
8828 (void)sv_2mortal(sv);
8835 PERL_STATIC_INLINE void
8836 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) {
8837 PERL_ARGS_ASSERT_PARSE_IDENT;
8841 Perl_croak(aTHX_ "%s", ident_too_long);
8842 if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
8843 /* The UTF-8 case must come first, otherwise things
8844 * like c\N{COMBINING TILDE} would start failing, as the
8845 * isWORDCHAR_A case below would gobble the 'c' up.
8848 char *t = *s + UTF8SKIP(*s);
8849 while (isIDCONT_utf8((U8*)t))
8851 if (*d + (t - *s) > e)
8852 Perl_croak(aTHX_ "%s", ident_too_long);
8853 Copy(*s, *d, t - *s, char);
8857 else if ( isWORDCHAR_A(**s) ) {
8860 } while (isWORDCHAR_A(**s) && *d < e);
8862 else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
8867 else if (allow_package && **s == ':' && (*s)[1] == ':'
8868 /* Disallow things like Foo::$bar. For the curious, this is
8869 * the code path that triggers the "Bad name after" warning
8870 * when looking for barewords.
8872 && (*s)[2] != '$') {
8882 /* Returns a NUL terminated string, with the length of the string written to
8886 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
8889 char * const e = d + destlen - 3; /* two-character token, ending NUL */
8890 bool is_utf8 = cBOOL(UTF);
8892 PERL_ARGS_ASSERT_SCAN_WORD;
8894 parse_ident(&s, &d, e, allow_package, is_utf8);
8900 /* Is the byte 'd' a legal single character identifier name? 'u' is true
8901 * iff Unicode semantics are to be used. The legal ones are any of:
8902 * a) all ASCII characters except:
8903 * 1) control and space-type ones, like NUL, SOH, \t, and SPACE;
8905 * The final case currently doesn't get this far in the program, so we
8906 * don't test for it. If that were to change, it would be ok to allow it.
8907 * c) When not under Unicode rules, any upper Latin1 character
8908 * d) Otherwise, when unicode rules are used, all XIDS characters.
8910 * Because all ASCII characters have the same representation whether
8911 * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
8912 * '{' without knowing if is UTF-8 or not.
8913 * EBCDIC already uses the rules that ASCII platforms will use after the
8914 * deprecation cycle; see comment below about the deprecation. */
8916 # define VALID_LEN_ONE_IDENT(s, is_utf8) \
8917 (isGRAPH_A(*(s)) || ((is_utf8) \
8918 ? isIDFIRST_utf8((U8*) (s)) \
8920 && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
8922 # define VALID_LEN_ONE_IDENT(s, is_utf8) \
8923 (isGRAPH_A(*(s)) || ((is_utf8) \
8924 ? isIDFIRST_utf8((U8*) (s)) \
8925 : ! isASCII_utf8((U8*) (s))))
8929 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
8931 I32 herelines = PL_parser->herelines;
8932 SSize_t bracket = -1;
8935 char * const e = d + destlen - 3; /* two-character token, ending NUL */
8936 bool is_utf8 = cBOOL(UTF);
8937 I32 orig_copline = 0, tmp_copline = 0;
8939 PERL_ARGS_ASSERT_SCAN_IDENT;
8941 if (isSPACE(*s) || !*s)
8944 while (isDIGIT(*s)) {
8946 Perl_croak(aTHX_ "%s", ident_too_long);
8950 else { /* See if it is a "normal" identifier */
8951 parse_ident(&s, &d, e, 1, is_utf8);
8956 /* Either a digit variable, or parse_ident() found an identifier
8957 (anything valid as a bareword), so job done and return. */
8958 if (PL_lex_state != LEX_NORMAL)
8959 PL_lex_state = LEX_INTERPENDMAYBE;
8963 /* Here, it is not a run-of-the-mill identifier name */
8965 if (*s == '$' && s[1]
8966 && (isIDFIRST_lazy_if(s+1,is_utf8)
8967 || isDIGIT_A((U8)s[1])
8970 || strnEQ(s+1,"::",2)) )
8972 /* Dereferencing a value in a scalar variable.
8973 The alternatives are different syntaxes for a scalar variable.
8974 Using ' as a leading package separator isn't allowed. :: is. */
8977 /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */
8979 bracket = s - SvPVX(PL_linestr);
8981 orig_copline = CopLINE(PL_curcop);
8982 if (s < PL_bufend && isSPACE(*s)) {
8986 if ((s <= PL_bufend - (is_utf8)
8989 && VALID_LEN_ONE_IDENT(s, is_utf8))
8991 /* Deprecate all non-graphic characters. Include SHY as a non-graphic,
8992 * because often it has no graphic representation. (We can't get to
8993 * here with SHY when 'is_utf8' is true, so no need to include a UTF-8
8996 ? ! isGRAPH_utf8( (U8*) s)
8997 : (! isGRAPH_L1( (U8) *s)
8998 || UNLIKELY((U8) *(s) == LATIN1_TO_NATIVE(0xAD))))
9000 deprecate("literal non-graphic characters in variable names");
9004 const STRLEN skip = UTF8SKIP(s);
9007 for ( i = 0; i < skip; i++ )
9015 /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
9016 if (*d == '^' && *s && isCONTROLVAR(*s)) {
9020 /* Warn about ambiguous code after unary operators if {...} notation isn't
9021 used. There's no difference in ambiguity; it's merely a heuristic
9022 about when not to warn. */
9023 else if (ck_uni && bracket == -1)
9025 if (bracket != -1) {
9026 /* If we were processing {...} notation then... */
9027 if (isIDFIRST_lazy_if(d,is_utf8)) {
9028 /* if it starts as a valid identifier, assume that it is one.
9029 (the later check for } being at the expected point will trap
9030 cases where this doesn't pan out.) */
9031 d += is_utf8 ? UTF8SKIP(d) : 1;
9032 parse_ident(&s, &d, e, 1, is_utf8);
9034 tmp_copline = CopLINE(PL_curcop);
9035 if (s < PL_bufend && isSPACE(*s)) {
9038 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9039 /* ${foo[0]} and ${foo{bar}} notation. */
9040 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9041 const char * const brack =
9043 ((*s == '[') ? "[...]" : "{...}");
9044 orig_copline = CopLINE(PL_curcop);
9045 CopLINE_set(PL_curcop, tmp_copline);
9046 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9047 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9048 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9049 funny, dest, brack, funny, dest, brack);
9050 CopLINE_set(PL_curcop, orig_copline);
9053 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9054 PL_lex_allbrackets++;
9058 /* Handle extended ${^Foo} variables
9059 * 1999-02-27 mjd-perl-patch@plover.com */
9060 else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
9064 while (isWORDCHAR(*s) && d < e) {
9068 Perl_croak(aTHX_ "%s", ident_too_long);
9073 tmp_copline = CopLINE(PL_curcop);
9074 if (s < PL_bufend && isSPACE(*s)) {
9078 /* Expect to find a closing } after consuming any trailing whitespace.
9082 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9083 PL_lex_state = LEX_INTERPEND;
9086 if (PL_lex_state == LEX_NORMAL) {
9087 if (ckWARN(WARN_AMBIGUOUS)
9088 && (keyword(dest, d - dest, 0)
9089 || get_cvn_flags(dest, d - dest, is_utf8
9093 SV *tmp = newSVpvn_flags( dest, d - dest,
9094 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
9097 orig_copline = CopLINE(PL_curcop);
9098 CopLINE_set(PL_curcop, tmp_copline);
9099 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9100 "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9101 funny, SVfARG(tmp), funny, SVfARG(tmp));
9102 CopLINE_set(PL_curcop, orig_copline);
9107 /* Didn't find the closing } at the point we expected, so restore
9108 state such that the next thing to process is the opening { and */
9109 s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
9110 CopLINE_set(PL_curcop, orig_copline);
9111 PL_parser->herelines = herelines;
9115 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9116 PL_lex_state = LEX_INTERPEND;
9121 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
9123 /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
9124 * found in the parse starting at 's', based on the subset that are valid
9125 * in this context input to this routine in 'valid_flags'. Advances s.
9126 * Returns TRUE if the input should be treated as a valid flag, so the next
9127 * char may be as well; otherwise FALSE. 'charset' should point to a NUL
9128 * upon first call on the current regex. This routine will set it to any
9129 * charset modifier found. The caller shouldn't change it. This way,
9130 * another charset modifier encountered in the parse can be detected as an
9131 * error, as we have decided to allow only one */
9134 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9136 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9137 if (isWORDCHAR_lazy_if(*s, UTF)) {
9138 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9139 UTF ? SVf_UTF8 : 0);
9141 /* Pretend that it worked, so will continue processing before
9150 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
9151 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
9152 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
9153 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
9154 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
9155 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9156 case LOCALE_PAT_MOD:
9158 goto multiple_charsets;
9160 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9163 case UNICODE_PAT_MOD:
9165 goto multiple_charsets;
9167 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9170 case ASCII_RESTRICT_PAT_MOD:
9172 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9176 /* Error if previous modifier wasn't an 'a', but if it was, see
9177 * if, and accept, a second occurrence (only) */
9179 || get_regex_charset(*pmfl)
9180 != REGEX_ASCII_RESTRICTED_CHARSET)
9182 goto multiple_charsets;
9184 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9188 case DEPENDS_PAT_MOD:
9190 goto multiple_charsets;
9192 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9201 if (*charset != c) {
9202 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9204 else if (c == 'a') {
9205 /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
9206 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9209 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9212 /* Pretend that it worked, so will continue processing before dieing */
9218 S_scan_pat(pTHX_ char *start, I32 type)
9222 const char * const valid_flags =
9223 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9224 char charset = '\0'; /* character set modifier */
9225 unsigned int x_mod_count = 0;
9227 PERL_ARGS_ASSERT_SCAN_PAT;
9229 s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
9231 Perl_croak(aTHX_ "Search pattern not terminated");
9233 pm = (PMOP*)newPMOP(type, 0);
9234 if (PL_multi_open == '?') {
9235 /* This is the only point in the code that sets PMf_ONCE: */
9236 pm->op_pmflags |= PMf_ONCE;
9238 /* Hence it's safe to do this bit of PMOP book-keeping here, which
9239 allows us to restrict the list needed by reset to just the ??
9241 assert(type != OP_TRANS);
9243 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9246 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9249 elements = mg->mg_len / sizeof(PMOP**);
9250 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9251 ((PMOP**)mg->mg_ptr) [elements++] = pm;
9252 mg->mg_len = elements * sizeof(PMOP**);
9253 PmopSTASH_set(pm,PL_curstash);
9257 /* if qr/...(?{..}).../, then need to parse the pattern within a new
9258 * anon CV. False positives like qr/[(?{]/ are harmless */
9260 if (type == OP_QR) {
9262 char *e, *p = SvPV(PL_lex_stuff, len);
9264 for (; p < e; p++) {
9265 if (p[0] == '(' && p[1] == '?'
9266 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9268 pm->op_pmflags |= PMf_HAS_CV;
9272 pm->op_pmflags |= PMf_IS_QR;
9275 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
9276 &s, &charset, &x_mod_count))
9278 /* issue a warning if /c is specified,but /g is not */
9279 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9281 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9282 "Use of /c modifier is meaningless without /g" );
9285 STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
9287 PL_lex_op = (OP*)pm;
9288 pl_yylval.ival = OP_MATCH;
9293 S_scan_subst(pTHX_ char *start)
9300 char charset = '\0'; /* character set modifier */
9301 unsigned int x_mod_count = 0;
9304 PERL_ARGS_ASSERT_SCAN_SUBST;
9306 pl_yylval.ival = OP_NULL;
9308 s = scan_str(start, TRUE, FALSE, FALSE, &t);
9311 Perl_croak(aTHX_ "Substitution pattern not terminated");
9315 first_start = PL_multi_start;
9316 first_line = CopLINE(PL_curcop);
9317 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9319 SvREFCNT_dec_NN(PL_lex_stuff);
9320 PL_lex_stuff = NULL;
9321 Perl_croak(aTHX_ "Substitution replacement not terminated");
9323 PL_multi_start = first_start; /* so whole substitution is taken together */
9325 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9329 if (*s == EXEC_PAT_MOD) {
9333 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
9334 &s, &charset, &x_mod_count))
9340 STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
9342 if ((pm->op_pmflags & PMf_CONTINUE)) {
9343 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9347 SV * const repl = newSVpvs("");
9350 pm->op_pmflags |= PMf_EVAL;
9353 sv_catpvs(repl, "eval ");
9355 sv_catpvs(repl, "do ");
9357 sv_catpvs(repl, "{");
9358 sv_catsv(repl, PL_sublex_info.repl);
9359 sv_catpvs(repl, "}");
9361 SvREFCNT_dec(PL_sublex_info.repl);
9362 PL_sublex_info.repl = repl;
9364 if (CopLINE(PL_curcop) != first_line) {
9365 sv_upgrade(PL_sublex_info.repl, SVt_PVNV);
9366 ((XPVNV*)SvANY(PL_sublex_info.repl))->xnv_u.xpad_cop_seq.xlow =
9367 CopLINE(PL_curcop) - first_line;
9368 CopLINE_set(PL_curcop, first_line);
9371 PL_lex_op = (OP*)pm;
9372 pl_yylval.ival = OP_SUBST;
9377 S_scan_trans(pTHX_ char *start)
9384 bool nondestruct = 0;
9387 PERL_ARGS_ASSERT_SCAN_TRANS;
9389 pl_yylval.ival = OP_NULL;
9391 s = scan_str(start,FALSE,FALSE,FALSE,&t);
9393 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9397 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9399 SvREFCNT_dec_NN(PL_lex_stuff);
9400 PL_lex_stuff = NULL;
9401 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9404 complement = del = squash = 0;
9408 complement = OPpTRANS_COMPLEMENT;
9411 del = OPpTRANS_DELETE;
9414 squash = OPpTRANS_SQUASH;
9426 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9427 o->op_private &= ~OPpTRANS_ALL;
9428 o->op_private |= del|squash|complement|
9429 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9430 (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF : 0);
9433 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9440 Takes a pointer to the first < in <<FOO.
9441 Returns a pointer to the byte following <<FOO.
9443 This function scans a heredoc, which involves different methods
9444 depending on whether we are in a string eval, quoted construct, etc.
9445 This is because PL_linestr could containing a single line of input, or
9446 a whole string being evalled, or the contents of the current quote-
9449 The two basic methods are:
9450 - Steal lines from the input stream
9451 - Scan the heredoc in PL_linestr and remove it therefrom
9453 In a file scope or filtered eval, the first method is used; in a
9454 string eval, the second.
9456 In a quote-like operator, we have to choose between the two,
9457 depending on where we can find a newline. We peek into outer lex-
9458 ing scopes until we find one with a newline in it. If we reach the
9459 outermost lexing scope and it is a file, we use the stream method.
9460 Otherwise it is treated as an eval.
9464 S_scan_heredoc(pTHX_ char *s)
9466 I32 op_type = OP_SCALAR;
9473 const bool infile = PL_rsfp || PL_parser->filtered;
9474 const line_t origline = CopLINE(PL_curcop);
9475 LEXSHARED *shared = PL_parser->lex_shared;
9477 PERL_ARGS_ASSERT_SCAN_HEREDOC;
9480 d = PL_tokenbuf + 1;
9481 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
9482 *PL_tokenbuf = '\n';
9484 while (SPACE_OR_TAB(*peek))
9486 if (*peek == '`' || *peek == '\'' || *peek =='"') {
9489 s = delimcpy(d, e, s, PL_bufend, term, &len);
9491 Perl_croak(aTHX_ "Unterminated delimiter for here document");
9497 /* <<\FOO is equivalent to <<'FOO' */
9501 if (!isWORDCHAR_lazy_if(s,UTF))
9502 deprecate("bare << to mean <<\"\"");
9504 while (isWORDCHAR_lazy_if(peek,UTF)) {
9505 peek += UTF ? UTF8SKIP(peek) : 1;
9507 len = (peek - s >= e - d) ? (e - d) : (peek - s);
9508 Copy(s, d, len, char);
9512 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
9513 Perl_croak(aTHX_ "Delimiter for here document is too long");
9516 len = d - PL_tokenbuf;
9518 #ifndef PERL_STRICT_CR
9519 d = strchr(s, '\r');
9521 char * const olds = s;
9523 while (s < PL_bufend) {
9529 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9538 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9543 tmpstr = newSV_type(SVt_PVIV);
9547 SvIV_set(tmpstr, -1);
9549 else if (term == '`') {
9550 op_type = OP_BACKTICK;
9551 SvIV_set(tmpstr, '\\');
9554 PL_multi_start = origline + 1 + PL_parser->herelines;
9555 PL_multi_open = PL_multi_close = '<';
9556 /* inside a string eval or quote-like operator */
9557 if (!infile || PL_lex_inwhat) {
9560 char * const olds = s;
9561 PERL_CONTEXT * const cx = CX_CUR();
9562 /* These two fields are not set until an inner lexing scope is
9563 entered. But we need them set here. */
9564 shared->ls_bufptr = s;
9565 shared->ls_linestr = PL_linestr;
9567 /* Look for a newline. If the current buffer does not have one,
9568 peek into the line buffer of the parent lexing scope, going
9569 up as many levels as necessary to find one with a newline
9572 while (!(s = (char *)memchr(
9573 (void *)shared->ls_bufptr, '\n',
9574 SvEND(shared->ls_linestr)-shared->ls_bufptr
9576 shared = shared->ls_prev;
9577 /* shared is only null if we have gone beyond the outermost
9578 lexing scope. In a file, we will have broken out of the
9579 loop in the previous iteration. In an eval, the string buf-
9580 fer ends with "\n;", so the while condition above will have
9581 evaluated to false. So shared can never be null. Or so you
9582 might think. Odd syntax errors like s;@{<<; can gobble up
9583 the implicit semicolon at the end of a flie, causing the
9584 file handle to be closed even when we are not in a string
9585 eval. So shared may be null in that case. */
9586 if (UNLIKELY(!shared))
9588 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
9589 most lexing scope. In a file, shared->ls_linestr at that
9590 level is just one line, so there is no body to steal. */
9591 if (infile && !shared->ls_prev) {
9596 else { /* eval or we've already hit EOF */
9597 s = (char*)memchr((void*)s, '\n', PL_bufend - s);
9601 linestr = shared->ls_linestr;
9602 bufend = SvEND(linestr);
9604 while (s < bufend - len + 1
9605 && memNE(s,PL_tokenbuf,len) )
9608 ++PL_parser->herelines;
9610 if (s >= bufend - len + 1) {
9613 sv_setpvn(tmpstr,d+1,s-d);
9615 /* the preceding stmt passes a newline */
9616 PL_parser->herelines++;
9618 /* s now points to the newline after the heredoc terminator.
9619 d points to the newline before the body of the heredoc.
9622 /* We are going to modify linestr in place here, so set
9623 aside copies of the string if necessary for re-evals or
9625 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
9626 check shared->re_eval_str. */
9627 if (shared->re_eval_start || shared->re_eval_str) {
9628 /* Set aside the rest of the regexp */
9629 if (!shared->re_eval_str)
9630 shared->re_eval_str =
9631 newSVpvn(shared->re_eval_start,
9632 bufend - shared->re_eval_start);
9633 shared->re_eval_start -= s-d;
9636 && CxTYPE(cx) == CXt_EVAL
9637 && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
9638 && cx->blk_eval.cur_text == linestr)
9640 cx->blk_eval.cur_text = newSVsv(linestr);
9641 SvSCREAM_on(cx->blk_eval.cur_text);
9643 /* Copy everything from s onwards back to d. */
9644 Move(s,d,bufend-s + 1,char);
9645 SvCUR_set(linestr, SvCUR(linestr) - (s-d));
9646 /* Setting PL_bufend only applies when we have not dug deeper
9647 into other scopes, because sublex_done sets PL_bufend to
9648 SvEND(PL_linestr). */
9649 if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
9655 char *oldbufptr_save;
9657 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
9658 term = PL_tokenbuf[1];
9660 linestr_save = PL_linestr; /* must restore this afterwards */
9661 d = s; /* and this */
9662 oldbufptr_save = PL_oldbufptr;
9663 PL_linestr = newSVpvs("");
9664 PL_bufend = SvPVX(PL_linestr);
9666 PL_bufptr = PL_bufend;
9667 CopLINE_set(PL_curcop,
9668 origline + 1 + PL_parser->herelines);
9669 if (!lex_next_chunk(LEX_NO_TERM)
9670 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
9671 /* Simply freeing linestr_save might seem simpler here, as it
9672 does not matter what PL_linestr points to, since we are
9673 about to croak; but in a quote-like op, linestr_save
9674 will have been prospectively freed already, via
9675 SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
9676 restore PL_linestr. */
9677 SvREFCNT_dec_NN(PL_linestr);
9678 PL_linestr = linestr_save;
9679 PL_oldbufptr = oldbufptr_save;
9682 CopLINE_set(PL_curcop, origline);
9683 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
9684 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
9685 /* ^That should be enough to avoid this needing to grow: */
9686 sv_catpvs(PL_linestr, "\n\0");
9687 assert(s == SvPVX(PL_linestr));
9688 PL_bufend = SvEND(PL_linestr);
9691 PL_parser->herelines++;
9692 PL_last_lop = PL_last_uni = NULL;
9693 #ifndef PERL_STRICT_CR
9694 if (PL_bufend - PL_linestart >= 2) {
9695 if ( (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
9696 || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
9698 PL_bufend[-2] = '\n';
9700 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
9702 else if (PL_bufend[-1] == '\r')
9703 PL_bufend[-1] = '\n';
9705 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9706 PL_bufend[-1] = '\n';
9708 if (*s == term && PL_bufend-s >= len
9709 && memEQ(s,PL_tokenbuf + 1,len)) {
9710 SvREFCNT_dec(PL_linestr);
9711 PL_linestr = linestr_save;
9712 PL_linestart = SvPVX(linestr_save);
9713 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
9714 PL_oldbufptr = oldbufptr_save;
9719 sv_catsv(tmpstr,PL_linestr);
9723 PL_multi_end = origline + PL_parser->herelines;
9724 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
9725 SvPV_shrink_to_cur(tmpstr);
9728 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
9730 else if (IN_ENCODING)
9731 sv_recode_to_utf8(tmpstr, _get_encoding());
9733 PL_lex_stuff = tmpstr;
9734 pl_yylval.ival = op_type;
9738 SvREFCNT_dec(tmpstr);
9739 CopLINE_set(PL_curcop, origline);
9740 missingterm(PL_tokenbuf + 1);
9744 takes: current position in input buffer
9745 returns: new position in input buffer
9746 side-effects: pl_yylval and lex_op are set.
9751 <<>> read from ARGV without magic open
9752 <FH> read from filehandle
9753 <pkg::FH> read from package qualified filehandle
9754 <pkg'FH> read from package qualified filehandle
9755 <$fh> read from filehandle in $fh
9761 S_scan_inputsymbol(pTHX_ char *start)
9763 char *s = start; /* current position in buffer */
9766 bool nomagicopen = FALSE;
9767 char *d = PL_tokenbuf; /* start of temp holding space */
9768 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
9770 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9772 end = strchr(s, '\n');
9775 if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
9782 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
9784 /* die if we didn't have space for the contents of the <>,
9785 or if it didn't end, or if we see a newline
9788 if (len >= (I32)sizeof PL_tokenbuf)
9789 Perl_croak(aTHX_ "Excessively long <> operator");
9791 Perl_croak(aTHX_ "Unterminated <> operator");
9796 Remember, only scalar variables are interpreted as filehandles by
9797 this code. Anything more complex (e.g., <$fh{$num}>) will be
9798 treated as a glob() call.
9799 This code makes use of the fact that except for the $ at the front,
9800 a scalar variable and a filehandle look the same.
9802 if (*d == '$' && d[1]) d++;
9804 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
9805 while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
9806 d += UTF ? UTF8SKIP(d) : 1;
9808 /* If we've tried to read what we allow filehandles to look like, and
9809 there's still text left, then it must be a glob() and not a getline.
9810 Use scan_str to pull out the stuff between the <> and treat it
9811 as nothing more than a string.
9814 if (d - PL_tokenbuf != len) {
9815 pl_yylval.ival = OP_GLOB;
9816 s = scan_str(start,FALSE,FALSE,FALSE,NULL);
9818 Perl_croak(aTHX_ "Glob not terminated");
9822 bool readline_overriden = FALSE;
9824 /* we're in a filehandle read situation */
9827 /* turn <> into <ARGV> */
9829 Copy("ARGV",d,5,char);
9831 /* Check whether readline() is overriden */
9832 if ((gv_readline = gv_override("readline",8)))
9833 readline_overriden = TRUE;
9835 /* if <$fh>, create the ops to turn the variable into a
9839 /* try to find it in the pad for this block, otherwise find
9840 add symbol table ops
9842 const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
9843 if (tmp != NOT_IN_PAD) {
9844 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9845 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9846 HEK * const stashname = HvNAME_HEK(stash);
9847 SV * const sym = sv_2mortal(newSVhek(stashname));
9848 sv_catpvs(sym, "::");
9854 OP * const o = newOP(OP_PADSV, 0);
9856 PL_lex_op = readline_overriden
9857 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9858 op_append_elem(OP_LIST, o,
9859 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9860 : (OP*)newUNOP(OP_READLINE, 0, o);
9868 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
9870 PL_lex_op = readline_overriden
9871 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9872 op_append_elem(OP_LIST,
9873 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9874 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9875 : (OP*)newUNOP(OP_READLINE, 0,
9876 newUNOP(OP_RV2SV, 0,
9877 newGVOP(OP_GV, 0, gv)));
9879 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9880 pl_yylval.ival = OP_NULL;
9883 /* If it's none of the above, it must be a literal filehandle
9884 (<Foo::BAR> or <FOO>) so build a simple readline OP */
9886 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
9887 PL_lex_op = readline_overriden
9888 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
9889 op_append_elem(OP_LIST,
9890 newGVOP(OP_GV, 0, gv),
9891 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9892 : (OP*)newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
9893 pl_yylval.ival = OP_NULL;
9903 start position in buffer
9904 keep_bracketed_quoted preserve \ quoting of embedded delimiters, but
9905 only if they are of the open/close form
9906 keep_delims preserve the delimiters around the string
9907 re_reparse compiling a run-time /(?{})/:
9908 collapse // to /, and skip encoding src
9909 delimp if non-null, this is set to the position of
9910 the closing delimiter, or just after it if
9911 the closing and opening delimiters differ
9912 (i.e., the opening delimiter of a substitu-
9914 returns: position to continue reading from buffer
9915 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9916 updates the read buffer.
9918 This subroutine pulls a string out of the input. It is called for:
9919 q single quotes q(literal text)
9920 ' single quotes 'literal text'
9921 qq double quotes qq(interpolate $here please)
9922 " double quotes "interpolate $here please"
9923 qx backticks qx(/bin/ls -l)
9924 ` backticks `/bin/ls -l`
9925 qw quote words @EXPORT_OK = qw( func() $spam )
9926 m// regexp match m/this/
9927 s/// regexp substitute s/this/that/
9928 tr/// string transliterate tr/this/that/
9929 y/// string transliterate y/this/that/
9930 ($*@) sub prototypes sub foo ($)
9931 (stuff) sub attr parameters sub foo : attr(stuff)
9932 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
9934 In most of these cases (all but <>, patterns and transliterate)
9935 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
9936 calls scan_str(). s/// makes yylex() call scan_subst() which calls
9937 scan_str(). tr/// and y/// make yylex() call scan_trans() which
9940 It skips whitespace before the string starts, and treats the first
9941 character as the delimiter. If the delimiter is one of ([{< then
9942 the corresponding "close" character )]}> is used as the closing
9943 delimiter. It allows quoting of delimiters, and if the string has
9944 balanced delimiters ([{<>}]) it allows nesting.
9946 On success, the SV with the resulting string is put into lex_stuff or,
9947 if that is already non-NULL, into lex_repl. The second case occurs only
9948 when parsing the RHS of the special constructs s/// and tr/// (y///).
9949 For convenience, the terminating delimiter character is stuffed into
9954 S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
9958 SV *sv; /* scalar value: string */
9959 const char *tmps; /* temp string, used for delimiter matching */
9960 char *s = start; /* current position in the buffer */
9961 char term; /* terminating character */
9962 char *to; /* current position in the sv's data */
9963 I32 brackets = 1; /* bracket nesting level */
9964 bool has_utf8 = FALSE; /* is there any utf8 content? */
9965 I32 termcode; /* terminating char. code */
9966 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
9967 STRLEN termlen; /* length of terminating string */
9968 int last_off = 0; /* last position for nesting bracket */
9971 PERL_ARGS_ASSERT_SCAN_STR;
9973 /* skip space before the delimiter */
9978 /* mark where we are, in case we need to report errors */
9981 /* after skipping whitespace, the next character is the terminator */
9984 termcode = termstr[0] = term;
9988 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
9989 Copy(s, termstr, termlen, U8);
9990 if (!UTF8_IS_INVARIANT(term))
9994 /* mark where we are */
9995 PL_multi_start = CopLINE(PL_curcop);
9996 PL_multi_open = term;
9997 herelines = PL_parser->herelines;
9999 /* find corresponding closing delimiter */
10000 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10001 termcode = termstr[0] = term = tmps[5];
10003 PL_multi_close = term;
10005 if (PL_multi_open == PL_multi_close) {
10006 keep_bracketed_quoted = FALSE;
10009 /* create a new SV to hold the contents. 79 is the SV's initial length.
10010 What a random number. */
10011 sv = newSV_type(SVt_PVIV);
10013 SvIV_set(sv, termcode);
10014 (void)SvPOK_only(sv); /* validate pointer */
10016 /* move past delimiter and try to read a complete string */
10018 sv_catpvn(sv, s, termlen);
10021 if (IN_ENCODING && !UTF && !re_reparse) {
10025 int offset = s - SvPVX_const(PL_linestr);
10026 const bool found = sv_cat_decode(sv, _get_encoding(), PL_linestr,
10027 &offset, (char*)termstr, termlen);
10031 if (SvIsCOW(PL_linestr)) {
10032 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos;
10033 STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos;
10034 STRLEN last_lop_pos, re_eval_start_pos, s_pos;
10035 char *buf = SvPVX(PL_linestr);
10036 bufend_pos = PL_parser->bufend - buf;
10037 bufptr_pos = PL_parser->bufptr - buf;
10038 oldbufptr_pos = PL_parser->oldbufptr - buf;
10039 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
10040 linestart_pos = PL_parser->linestart - buf;
10041 last_uni_pos = PL_parser->last_uni
10042 ? PL_parser->last_uni - buf
10044 last_lop_pos = PL_parser->last_lop
10045 ? PL_parser->last_lop - buf
10047 re_eval_start_pos =
10048 PL_parser->lex_shared->re_eval_start ?
10049 PL_parser->lex_shared->re_eval_start - buf : 0;
10052 sv_force_normal(PL_linestr);
10054 buf = SvPVX(PL_linestr);
10055 PL_parser->bufend = buf + bufend_pos;
10056 PL_parser->bufptr = buf + bufptr_pos;
10057 PL_parser->oldbufptr = buf + oldbufptr_pos;
10058 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
10059 PL_parser->linestart = buf + linestart_pos;
10060 if (PL_parser->last_uni)
10061 PL_parser->last_uni = buf + last_uni_pos;
10062 if (PL_parser->last_lop)
10063 PL_parser->last_lop = buf + last_lop_pos;
10064 if (PL_parser->lex_shared->re_eval_start)
10065 PL_parser->lex_shared->re_eval_start =
10066 buf + re_eval_start_pos;
10069 ns = SvPVX_const(PL_linestr) + offset;
10070 svlast = SvEND(sv) - 1;
10072 for (; s < ns; s++) {
10073 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10074 COPLINE_INC_WITH_HERELINES;
10077 goto read_more_line;
10079 /* handle quoted delimiters */
10080 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10082 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10084 if ((svlast-1 - t) % 2) {
10085 if (!keep_bracketed_quoted) {
10086 *(svlast-1) = term;
10088 SvCUR_set(sv, SvCUR(sv) - 1);
10093 if (PL_multi_open == PL_multi_close) {
10099 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
10100 /* At here, all closes are "was quoted" one,
10101 so we don't check PL_multi_close. */
10103 if (!keep_bracketed_quoted && *(t+1) == PL_multi_open)
10108 else if (*t == PL_multi_open)
10116 SvCUR_set(sv, w - SvPVX_const(sv));
10118 last_off = w - SvPVX(sv);
10119 if (--brackets <= 0)
10124 if (!keep_delims) {
10125 SvCUR_set(sv, SvCUR(sv) - 1);
10131 /* extend sv if need be */
10132 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10133 /* set 'to' to the next character in the sv's string */
10134 to = SvPVX(sv)+SvCUR(sv);
10136 /* if open delimiter is the close delimiter read unbridle */
10137 if (PL_multi_open == PL_multi_close) {
10138 for (; s < PL_bufend; s++,to++) {
10139 /* embedded newlines increment the current line number */
10140 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10141 COPLINE_INC_WITH_HERELINES;
10142 /* handle quoted delimiters */
10143 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10144 if (!keep_bracketed_quoted
10146 || (re_reparse && s[1] == '\\'))
10149 else /* any other quotes are simply copied straight through */
10152 /* terminate when run out of buffer (the for() condition), or
10153 have found the terminator */
10154 else if (*s == term) {
10157 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10160 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10166 /* if the terminator isn't the same as the start character (e.g.,
10167 matched brackets), we have to allow more in the quoting, and
10168 be prepared for nested brackets.
10171 /* read until we run out of string, or we find the terminator */
10172 for (; s < PL_bufend; s++,to++) {
10173 /* embedded newlines increment the line count */
10174 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10175 COPLINE_INC_WITH_HERELINES;
10176 /* backslashes can escape the open or closing characters */
10177 if (*s == '\\' && s+1 < PL_bufend) {
10178 if (!keep_bracketed_quoted
10179 && ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10186 /* allow nested opens and closes */
10187 else if (*s == PL_multi_close && --brackets <= 0)
10189 else if (*s == PL_multi_open)
10191 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10196 /* terminate the copied string and update the sv's end-of-string */
10198 SvCUR_set(sv, to - SvPVX_const(sv));
10201 * this next chunk reads more into the buffer if we're not done yet
10205 break; /* handle case where we are done yet :-) */
10207 #ifndef PERL_STRICT_CR
10208 if (to - SvPVX_const(sv) >= 2) {
10209 if ( (to[-2] == '\r' && to[-1] == '\n')
10210 || (to[-2] == '\n' && to[-1] == '\r'))
10214 SvCUR_set(sv, to - SvPVX_const(sv));
10216 else if (to[-1] == '\r')
10219 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10224 /* if we're out of file, or a read fails, bail and reset the current
10225 line marker so we can report where the unterminated string began
10227 COPLINE_INC_WITH_HERELINES;
10228 PL_bufptr = PL_bufend;
10229 if (!lex_next_chunk(0)) {
10231 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10237 /* at this point, we have successfully read the delimited string */
10239 if (!IN_ENCODING || UTF || re_reparse) {
10242 sv_catpvn(sv, s, termlen);
10245 if (has_utf8 || (IN_ENCODING && !re_reparse))
10248 PL_multi_end = CopLINE(PL_curcop);
10249 CopLINE_set(PL_curcop, PL_multi_start);
10250 PL_parser->herelines = herelines;
10252 /* if we allocated too much space, give some back */
10253 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10254 SvLEN_set(sv, SvCUR(sv) + 1);
10255 SvPV_renew(sv, SvLEN(sv));
10258 /* decide whether this is the first or second quoted string we've read
10263 PL_sublex_info.repl = sv;
10266 if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
10272 takes: pointer to position in buffer
10273 returns: pointer to new position in buffer
10274 side-effects: builds ops for the constant in pl_yylval.op
10276 Read a number in any of the formats that Perl accepts:
10278 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10279 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
10280 0b[01](_?[01])* binary integers
10281 0[0-7](_?[0-7])* octal integers
10282 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* hexadecimal integers
10283 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+ hexadecimal floats
10285 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10288 If it reads a number without a decimal point or an exponent, it will
10289 try converting the number to an integer and see if it can do so
10290 without loss of precision.
10294 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10296 const char *s = start; /* current position in buffer */
10297 char *d; /* destination in temp buffer */
10298 char *e; /* end of temp buffer */
10299 NV nv; /* number read, as a double */
10300 SV *sv = NULL; /* place to put the converted number */
10301 bool floatit; /* boolean: int or float? */
10302 const char *lastub = NULL; /* position of last underbar */
10303 static const char* const number_too_long = "Number too long";
10304 /* Hexadecimal floating point.
10306 * In many places (where we have quads and NV is IEEE 754 double)
10307 * we can fit the mantissa bits of a NV into an unsigned quad.
10308 * (Note that UVs might not be quads even when we have quads.)
10309 * This will not work everywhere, though (either no quads, or
10310 * using long doubles), in which case we have to resort to NV,
10311 * which will probably mean horrible loss of precision due to
10312 * multiple fp operations. */
10313 bool hexfp = FALSE;
10314 int total_bits = 0;
10315 int significant_bits = 0;
10316 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
10317 # define HEXFP_UQUAD
10318 Uquad_t hexfp_uquad = 0;
10319 int hexfp_frac_bits = 0;
10324 NV hexfp_mult = 1.0;
10325 UV high_non_zero = 0; /* highest digit */
10326 int non_zero_integer_digits = 0;
10328 PERL_ARGS_ASSERT_SCAN_NUM;
10330 /* We use the first character to decide what type of number this is */
10334 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
10336 /* if it starts with a 0, it could be an octal number, a decimal in
10337 0.13 disguise, or a hexadecimal number, or a binary number. */
10341 u holds the "number so far"
10342 shift the power of 2 of the base
10343 (hex == 4, octal == 3, binary == 1)
10344 overflowed was the number more than we can hold?
10346 Shift is used when we add a digit. It also serves as an "are
10347 we in octal/hex/binary?" indicator to disallow hex characters
10348 when in octal mode.
10353 bool overflowed = FALSE;
10354 bool just_zero = TRUE; /* just plain 0 or binary number? */
10355 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10356 static const char* const bases[5] =
10357 { "", "binary", "", "octal", "hexadecimal" };
10358 static const char* const Bases[5] =
10359 { "", "Binary", "", "Octal", "Hexadecimal" };
10360 static const char* const maxima[5] =
10362 "0b11111111111111111111111111111111",
10366 const char *base, *Base, *max;
10368 /* check for hex */
10369 if (isALPHA_FOLD_EQ(s[1], 'x')) {
10373 } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
10378 /* check for a decimal in disguise */
10379 else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
10381 /* so it must be octal */
10388 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10389 "Misplaced _ in number");
10393 base = bases[shift];
10394 Base = Bases[shift];
10395 max = maxima[shift];
10397 /* read the rest of the number */
10399 /* x is used in the overflow test,
10400 b is the digit we're adding on. */
10405 /* if we don't mention it, we're done */
10409 /* _ are ignored -- but warned about if consecutive */
10411 if (lastub && s == lastub + 1)
10412 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10413 "Misplaced _ in number");
10417 /* 8 and 9 are not octal */
10418 case '8': case '9':
10420 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10424 case '2': case '3': case '4':
10425 case '5': case '6': case '7':
10427 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10430 case '0': case '1':
10431 b = *s++ & 15; /* ASCII digit -> value of digit */
10435 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10436 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
10437 /* make sure they said 0x */
10440 b = (*s++ & 7) + 9;
10442 /* Prepare to put the digit we have onto the end
10443 of the number so far. We check for overflows.
10449 x = u << shift; /* make room for the digit */
10451 total_bits += shift;
10453 if ((x >> shift) != u
10454 && !(PL_hints & HINT_NEW_BINARY)) {
10457 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10458 "Integer overflow in %s number",
10461 u = x | b; /* add the digit to the end */
10464 n *= nvshift[shift];
10465 /* If an NV has not enough bits in its
10466 * mantissa to represent an UV this summing of
10467 * small low-order numbers is a waste of time
10468 * (because the NV cannot preserve the
10469 * low-order bits anyway): we could just
10470 * remember when did we overflow and in the
10471 * end just multiply n by the right
10476 if (high_non_zero == 0 && b > 0)
10480 non_zero_integer_digits++;
10482 /* this could be hexfp, but peek ahead
10483 * to avoid matching ".." */
10484 if (UNLIKELY(HEXFP_PEEK(s))) {
10492 /* if we get here, we had success: make a scalar value from
10497 /* final misplaced underbar check */
10498 if (s[-1] == '_') {
10499 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10502 if (UNLIKELY(HEXFP_PEEK(s))) {
10503 /* Do sloppy (on the underbars) but quick detection
10504 * (and value construction) for hexfp, the decimal
10505 * detection will shortly be more thorough with the
10506 * underbar checks. */
10508 significant_bits = non_zero_integer_digits * shift;
10511 #else /* HEXFP_NV */
10514 /* Ignore the leading zero bits of
10515 * the high (first) non-zero digit. */
10516 if (high_non_zero) {
10517 if (high_non_zero < 0x8)
10518 significant_bits--;
10519 if (high_non_zero < 0x4)
10520 significant_bits--;
10521 if (high_non_zero < 0x2)
10522 significant_bits--;
10529 bool accumulate = TRUE;
10530 for (h++; (isXDIGIT(*h) || *h == '_'); h++) {
10531 if (isXDIGIT(*h)) {
10532 U8 b = XDIGIT_VALUE(*h);
10533 significant_bits += shift;
10536 if (significant_bits < NV_MANT_DIG) {
10537 /* We are in the long "run" of xdigits,
10538 * accumulate the full four bits. */
10539 hexfp_uquad <<= shift;
10541 hexfp_frac_bits += shift;
10543 /* We are at a hexdigit either at,
10544 * or straddling, the edge of mantissa.
10545 * We will try grabbing as many as
10546 * possible bits. */
10548 significant_bits - NV_MANT_DIG;
10551 hexfp_uquad <<= tail;
10552 hexfp_uquad |= b >> (shift - tail);
10553 hexfp_frac_bits += tail;
10555 /* Ignore the trailing zero bits
10556 * of the last non-zero xdigit.
10558 * The assumption here is that if
10559 * one has input of e.g. the xdigit
10560 * eight (0x8), there is only one
10561 * bit being input, not the full
10562 * four bits. Conversely, if one
10563 * specifies a zero xdigit, the
10564 * assumption is that one really
10565 * wants all those bits to be zero. */
10567 if ((b & 0x1) == 0x0) {
10568 significant_bits--;
10569 if ((b & 0x2) == 0x0) {
10570 significant_bits--;
10571 if ((b & 0x4) == 0x0) {
10572 significant_bits--;
10578 accumulate = FALSE;
10581 /* Keep skipping the xdigits, and
10582 * accumulating the significant bits,
10583 * but do not shift the uquad
10584 * (which would catastrophically drop
10585 * high-order bits) or accumulate the
10586 * xdigits anymore. */
10588 #else /* HEXFP_NV */
10592 hexfp_nv += b * nv_mult;
10594 accumulate = FALSE;
10598 if (significant_bits >= NV_MANT_DIG)
10599 accumulate = FALSE;
10603 if ((total_bits > 0 || significant_bits > 0) &&
10604 isALPHA_FOLD_EQ(*h, 'p')) {
10605 bool negexp = FALSE;
10609 else if (*h == '-') {
10615 while (isDIGIT(*h) || *h == '_') {
10618 hexfp_exp += *h - '0';
10621 && -hexfp_exp < NV_MIN_EXP - 1) {
10622 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10623 "Hexadecimal float: exponent underflow");
10629 && hexfp_exp > NV_MAX_EXP - 1) {
10630 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10631 "Hexadecimal float: exponent overflow");
10639 hexfp_exp = -hexfp_exp;
10641 hexfp_exp -= hexfp_frac_bits;
10643 hexfp_mult = pow(2.0, hexfp_exp);
10651 if (n > 4294967295.0)
10652 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10653 "%s number > %s non-portable",
10659 if (u > 0xffffffff)
10660 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10661 "%s number > %s non-portable",
10666 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
10667 sv = new_constant(start, s - start, "integer",
10668 sv, NULL, NULL, 0);
10669 else if (PL_hints & HINT_NEW_BINARY)
10670 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
10675 handle decimal numbers.
10676 we're also sent here when we read a 0 as the first digit
10678 case '1': case '2': case '3': case '4': case '5':
10679 case '6': case '7': case '8': case '9': case '.':
10682 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
10691 /* read next group of digits and _ and copy into d */
10694 || UNLIKELY(hexfp && isXDIGIT(*s)))
10696 /* skip underscores, checking for misplaced ones
10700 if (lastub && s == lastub + 1)
10701 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10702 "Misplaced _ in number");
10706 /* check for end of fixed-length buffer */
10708 Perl_croak(aTHX_ "%s", number_too_long);
10709 /* if we're ok, copy the character */
10714 /* final misplaced underbar check */
10715 if (lastub && s == lastub + 1) {
10716 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
10719 /* read a decimal portion if there is one. avoid
10720 3..5 being interpreted as the number 3. followed
10723 if (*s == '.' && s[1] != '.') {
10728 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10729 "Misplaced _ in number");
10733 /* copy, ignoring underbars, until we run out of digits.
10737 || UNLIKELY(hexfp && isXDIGIT(*s));
10740 /* fixed length buffer check */
10742 Perl_croak(aTHX_ "%s", number_too_long);
10744 if (lastub && s == lastub + 1)
10745 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10746 "Misplaced _ in number");
10752 /* fractional part ending in underbar? */
10753 if (s[-1] == '_') {
10754 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10755 "Misplaced _ in number");
10757 if (*s == '.' && isDIGIT(s[1])) {
10758 /* oops, it's really a v-string, but without the "v" */
10764 /* read exponent part, if present */
10765 if ((isALPHA_FOLD_EQ(*s, 'e')
10766 || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
10767 && strchr("+-0123456789_", s[1]))
10771 /* regardless of whether user said 3E5 or 3e5, use lower 'e',
10772 ditto for p (hexfloats) */
10773 if ((isALPHA_FOLD_EQ(*s, 'e'))) {
10774 /* At least some Mach atof()s don't grok 'E' */
10777 else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
10784 /* stray preinitial _ */
10786 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10787 "Misplaced _ in number");
10791 /* allow positive or negative exponent */
10792 if (*s == '+' || *s == '-')
10795 /* stray initial _ */
10797 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10798 "Misplaced _ in number");
10802 /* read digits of exponent */
10803 while (isDIGIT(*s) || *s == '_') {
10806 Perl_croak(aTHX_ "%s", number_too_long);
10810 if (((lastub && s == lastub + 1)
10811 || (!isDIGIT(s[1]) && s[1] != '_')))
10812 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10813 "Misplaced _ in number");
10821 We try to do an integer conversion first if no characters
10822 indicating "float" have been found.
10827 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
10829 if (flags == IS_NUMBER_IN_UV) {
10831 sv = newSViv(uv); /* Prefer IVs over UVs. */
10834 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10835 if (uv <= (UV) IV_MIN)
10836 sv = newSViv(-(IV)uv);
10843 STORE_LC_NUMERIC_UNDERLYING_SET_STANDARD();
10844 /* terminate the string */
10846 if (UNLIKELY(hexfp)) {
10847 # ifdef NV_MANT_DIG
10848 if (significant_bits > NV_MANT_DIG)
10849 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
10850 "Hexadecimal float: mantissa overflow");
10853 nv = hexfp_uquad * hexfp_mult;
10854 #else /* HEXFP_NV */
10855 nv = hexfp_nv * hexfp_mult;
10858 nv = Atof(PL_tokenbuf);
10860 RESTORE_LC_NUMERIC_UNDERLYING();
10865 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10866 const char *const key = floatit ? "float" : "integer";
10867 const STRLEN keylen = floatit ? 5 : 7;
10868 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10869 key, keylen, sv, NULL, NULL, 0);
10873 /* if it starts with a v, it could be a v-string */
10876 sv = newSV(5); /* preallocate storage space */
10877 ENTER_with_name("scan_vstring");
10879 s = scan_vstring(s, PL_bufend, sv);
10880 SvREFCNT_inc_simple_void_NN(sv);
10881 LEAVE_with_name("scan_vstring");
10885 /* make the op for the constant and return */
10888 lvalp->opval = newSVOP(OP_CONST, 0, sv);
10890 lvalp->opval = NULL;
10896 S_scan_formline(pTHX_ char *s)
10900 SV * const stuff = newSVpvs("");
10901 bool needargs = FALSE;
10902 bool eofmt = FALSE;
10904 PERL_ARGS_ASSERT_SCAN_FORMLINE;
10906 while (!needargs) {
10909 #ifdef PERL_STRICT_CR
10910 while (SPACE_OR_TAB(*t))
10913 while (SPACE_OR_TAB(*t) || *t == '\r')
10916 if (*t == '\n' || t == PL_bufend) {
10921 eol = (char *) memchr(s,'\n',PL_bufend-s);
10925 for (t = s; t < eol; t++) {
10926 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10928 goto enough; /* ~~ must be first line in formline */
10930 if (*t == '@' || *t == '^')
10934 sv_catpvn(stuff, s, eol-s);
10935 #ifndef PERL_STRICT_CR
10936 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10937 char *end = SvPVX(stuff) + SvCUR(stuff);
10940 SvCUR_set(stuff, SvCUR(stuff) - 1);
10948 if ((PL_rsfp || PL_parser->filtered)
10949 && PL_parser->form_lex_state == LEX_NORMAL) {
10951 PL_bufptr = PL_bufend;
10952 COPLINE_INC_WITH_HERELINES;
10953 got_some = lex_next_chunk(0);
10954 CopLINE_dec(PL_curcop);
10962 if (!SvCUR(stuff) || needargs)
10963 PL_lex_state = PL_parser->form_lex_state;
10964 if (SvCUR(stuff)) {
10965 PL_expect = XSTATE;
10967 const char *s2 = s;
10968 while (*s2 == '\r' || *s2 == ' ' || *s2 == '\t' || *s2 == '\f'
10972 PL_expect = XTERMBLOCK;
10973 NEXTVAL_NEXTTOKE.ival = 0;
10976 NEXTVAL_NEXTTOKE.ival = 0;
10977 force_next(FORMLBRACK);
10980 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
10982 else if (IN_ENCODING)
10983 sv_recode_to_utf8(stuff, _get_encoding());
10985 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
10989 SvREFCNT_dec(stuff);
10991 PL_lex_formbrack = 0;
10997 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
10999 const I32 oldsavestack_ix = PL_savestack_ix;
11000 CV* const outsidecv = PL_compcv;
11002 SAVEI32(PL_subline);
11003 save_item(PL_subname);
11004 SAVESPTR(PL_compcv);
11006 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
11007 CvFLAGS(PL_compcv) |= flags;
11009 PL_subline = CopLINE(PL_curcop);
11010 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
11011 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
11012 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
11013 if (outsidecv && CvPADLIST(outsidecv))
11014 CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
11016 return oldsavestack_ix;
11020 S_yywarn(pTHX_ const char *const s, U32 flags)
11022 PERL_ARGS_ASSERT_YYWARN;
11024 PL_in_eval |= EVAL_WARNONLY;
11025 yyerror_pv(s, flags);
11030 Perl_yyerror(pTHX_ const char *const s)
11032 PERL_ARGS_ASSERT_YYERROR;
11033 return yyerror_pvn(s, strlen(s), 0);
11037 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11039 PERL_ARGS_ASSERT_YYERROR_PV;
11040 return yyerror_pvn(s, strlen(s), flags);
11044 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11046 const char *context = NULL;
11049 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
11050 int yychar = PL_parser->yychar;
11052 PERL_ARGS_ASSERT_YYERROR_PVN;
11054 if (!yychar || (yychar == ';' && !PL_rsfp))
11055 sv_catpvs(where_sv, "at EOF");
11056 else if ( PL_oldoldbufptr
11057 && PL_bufptr > PL_oldoldbufptr
11058 && PL_bufptr - PL_oldoldbufptr < 200
11059 && PL_oldoldbufptr != PL_oldbufptr
11060 && PL_oldbufptr != PL_bufptr)
11064 The code below is removed for NetWare because it abends/crashes on NetWare
11065 when the script has error such as not having the closing quotes like:
11066 if ($var eq "value)
11067 Checking of white spaces is anyway done in NetWare code.
11070 while (isSPACE(*PL_oldoldbufptr))
11073 context = PL_oldoldbufptr;
11074 contlen = PL_bufptr - PL_oldoldbufptr;
11076 else if ( PL_oldbufptr
11077 && PL_bufptr > PL_oldbufptr
11078 && PL_bufptr - PL_oldbufptr < 200
11079 && PL_oldbufptr != PL_bufptr) {
11082 The code below is removed for NetWare because it abends/crashes on NetWare
11083 when the script has error such as not having the closing quotes like:
11084 if ($var eq "value)
11085 Checking of white spaces is anyway done in NetWare code.
11088 while (isSPACE(*PL_oldbufptr))
11091 context = PL_oldbufptr;
11092 contlen = PL_bufptr - PL_oldbufptr;
11094 else if (yychar > 255)
11095 sv_catpvs(where_sv, "next token ???");
11096 else if (yychar == YYEMPTY) {
11097 if ( PL_lex_state == LEX_NORMAL
11098 || (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
11099 sv_catpvs(where_sv, "at end of line");
11100 else if (PL_lex_inpat)
11101 sv_catpvs(where_sv, "within pattern");
11103 sv_catpvs(where_sv, "within string");
11106 sv_catpvs(where_sv, "next char ");
11108 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11109 else if (isPRINT_LC(yychar)) {
11110 const char string = yychar;
11111 sv_catpvn(where_sv, &string, 1);
11114 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11116 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
11117 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
11118 OutCopFILE(PL_curcop),
11119 (IV)(PL_parser->preambling == NOLINE
11120 ? CopLINE(PL_curcop)
11121 : PL_parser->preambling));
11123 Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
11124 UTF8fARG(UTF, contlen, context));
11126 Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
11127 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11128 Perl_sv_catpvf(aTHX_ msg,
11129 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
11130 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11133 if (PL_in_eval & EVAL_WARNONLY) {
11134 PL_in_eval &= ~EVAL_WARNONLY;
11135 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
11139 if (PL_error_count >= 10) {
11141 if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
11142 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11143 SVfARG(errsv), OutCopFILE(PL_curcop));
11145 Perl_croak(aTHX_ "%s has too many errors.\n",
11146 OutCopFILE(PL_curcop));
11149 PL_in_my_stash = NULL;
11154 S_swallow_bom(pTHX_ U8 *s)
11156 const STRLEN slen = SvCUR(PL_linestr);
11158 PERL_ARGS_ASSERT_SWALLOW_BOM;
11162 if (s[1] == 0xFE) {
11163 /* UTF-16 little-endian? (or UTF-32LE?) */
11164 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
11165 /* diag_listed_as: Unsupported script encoding %s */
11166 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11167 #ifndef PERL_NO_UTF16_FILTER
11168 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11170 if (PL_bufend > (char*)s) {
11171 s = add_utf16_textfilter(s, TRUE);
11174 /* diag_listed_as: Unsupported script encoding %s */
11175 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11180 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
11181 #ifndef PERL_NO_UTF16_FILTER
11182 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11184 if (PL_bufend > (char *)s) {
11185 s = add_utf16_textfilter(s, FALSE);
11188 /* diag_listed_as: Unsupported script encoding %s */
11189 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11193 case BOM_UTF8_FIRST_BYTE: {
11194 const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
11195 if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
11196 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11197 s += len + 1; /* UTF-8 */
11204 if (s[2] == 0xFE && s[3] == 0xFF) {
11205 /* UTF-32 big-endian */
11206 /* diag_listed_as: Unsupported script encoding %s */
11207 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11210 else if (s[2] == 0 && s[3] != 0) {
11213 * are a good indicator of UTF-16BE. */
11214 #ifndef PERL_NO_UTF16_FILTER
11215 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11216 s = add_utf16_textfilter(s, FALSE);
11218 /* diag_listed_as: Unsupported script encoding %s */
11219 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11226 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11229 * are a good indicator of UTF-16LE. */
11230 #ifndef PERL_NO_UTF16_FILTER
11231 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11232 s = add_utf16_textfilter(s, TRUE);
11234 /* diag_listed_as: Unsupported script encoding %s */
11235 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11243 #ifndef PERL_NO_UTF16_FILTER
11245 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11247 SV *const filter = FILTER_DATA(idx);
11248 /* We re-use this each time round, throwing the contents away before we
11250 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11251 SV *const utf8_buffer = filter;
11252 IV status = IoPAGE(filter);
11253 const bool reverse = cBOOL(IoLINES(filter));
11256 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11258 /* As we're automatically added, at the lowest level, and hence only called
11259 from this file, we can be sure that we're not called in block mode. Hence
11260 don't bother writing code to deal with block mode. */
11262 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11265 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11267 DEBUG_P(PerlIO_printf(Perl_debug_log,
11268 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11269 FPTR2DPTR(void *, S_utf16_textfilter),
11270 reverse ? 'l' : 'b', idx, maxlen, status,
11271 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11278 /* First, look in our buffer of existing UTF-8 data: */
11279 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11283 } else if (status == 0) {
11285 IoPAGE(filter) = 0;
11286 nl = SvEND(utf8_buffer);
11289 STRLEN got = nl - SvPVX(utf8_buffer);
11290 /* Did we have anything to append? */
11292 sv_catpvn(sv, SvPVX(utf8_buffer), got);
11293 /* Everything else in this code works just fine if SVp_POK isn't
11294 set. This, however, needs it, and we need it to work, else
11295 we loop infinitely because the buffer is never consumed. */
11296 sv_chop(utf8_buffer, nl);
11300 /* OK, not a complete line there, so need to read some more UTF-16.
11301 Read an extra octect if the buffer currently has an odd number. */
11305 if (SvCUR(utf16_buffer) >= 2) {
11306 /* Location of the high octet of the last complete code point.
11307 Gosh, UTF-16 is a pain. All the benefits of variable length,
11308 *coupled* with all the benefits of partial reads and
11310 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11311 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11313 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11317 /* We have the first half of a surrogate. Read more. */
11318 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11321 status = FILTER_READ(idx + 1, utf16_buffer,
11322 160 + (SvCUR(utf16_buffer) & 1));
11323 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
11324 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11327 IoPAGE(filter) = status;
11332 chars = SvCUR(utf16_buffer) >> 1;
11333 have = SvCUR(utf8_buffer);
11334 SvGROW(utf8_buffer, have + chars * 3 + 1);
11337 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11338 (U8*)SvPVX_const(utf8_buffer) + have,
11339 chars * 2, &newlen);
11341 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11342 (U8*)SvPVX_const(utf8_buffer) + have,
11343 chars * 2, &newlen);
11345 SvCUR_set(utf8_buffer, have + newlen);
11348 /* No need to keep this SV "well-formed" with a '\0' after the end, as
11349 it's private to us, and utf16_to_utf8{,reversed} take a
11350 (pointer,length) pair, rather than a NUL-terminated string. */
11351 if(SvCUR(utf16_buffer) & 1) {
11352 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11353 SvCUR_set(utf16_buffer, 1);
11355 SvCUR_set(utf16_buffer, 0);
11358 DEBUG_P(PerlIO_printf(Perl_debug_log,
11359 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11361 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11362 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11367 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11369 SV *filter = filter_add(S_utf16_textfilter, NULL);
11371 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11373 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11374 sv_setpvs(filter, "");
11375 IoLINES(filter) = reversed;
11376 IoPAGE(filter) = 1; /* Not EOF */
11378 /* Sadly, we have to return a valid pointer, come what may, so we have to
11379 ignore any error return from this. */
11380 SvCUR_set(PL_linestr, 0);
11381 if (FILTER_READ(0, PL_linestr, 0)) {
11382 SvUTF8_on(PL_linestr);
11384 SvUTF8_on(PL_linestr);
11386 PL_bufend = SvEND(PL_linestr);
11387 return (U8*)SvPVX(PL_linestr);
11392 Returns a pointer to the next character after the parsed
11393 vstring, as well as updating the passed in sv.
11395 Function must be called like
11397 sv = sv_2mortal(newSV(5));
11398 s = scan_vstring(s,e,sv);
11400 where s and e are the start and end of the string.
11401 The sv should already be large enough to store the vstring
11402 passed in, for performance reasons.
11404 This function may croak if fatal warnings are enabled in the
11405 calling scope, hence the sv_2mortal in the example (to prevent
11406 a leak). Make sure to do SvREFCNT_inc afterwards if you use
11412 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11414 const char *pos = s;
11415 const char *start = s;
11417 PERL_ARGS_ASSERT_SCAN_VSTRING;
11419 if (*pos == 'v') pos++; /* get past 'v' */
11420 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11422 if ( *pos != '.') {
11423 /* this may not be a v-string if followed by => */
11424 const char *next = pos;
11425 while (next < e && isSPACE(*next))
11427 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11428 /* return string not v-string */
11429 sv_setpvn(sv,(char *)s,pos-s);
11430 return (char *)pos;
11434 if (!isALPHA(*pos)) {
11435 U8 tmpbuf[UTF8_MAXBYTES+1];
11438 s++; /* get past 'v' */
11443 /* this is atoi() that tolerates underscores */
11446 const char *end = pos;
11448 while (--end >= s) {
11450 const UV orev = rev;
11451 rev += (*end - '0') * mult;
11454 /* diag_listed_as: Integer overflow in %s number */
11455 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11456 "Integer overflow in decimal number");
11460 /* Append native character for the rev point */
11461 tmpend = uvchr_to_utf8(tmpbuf, rev);
11462 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11463 if (!UVCHR_IS_INVARIANT(rev))
11465 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11471 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11475 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11482 Perl_keyword_plugin_standard(pTHX_
11483 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11485 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11486 PERL_UNUSED_CONTEXT;
11487 PERL_UNUSED_ARG(keyword_ptr);
11488 PERL_UNUSED_ARG(keyword_len);
11489 PERL_UNUSED_ARG(op_ptr);
11490 return KEYWORD_PLUGIN_DECLINE;
11493 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11495 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11497 SAVEI32(PL_lex_brackets);
11498 if (PL_lex_brackets > 100)
11499 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11500 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
11501 SAVEI32(PL_lex_allbrackets);
11502 PL_lex_allbrackets = 0;
11503 SAVEI8(PL_lex_fakeeof);
11504 PL_lex_fakeeof = (U8)fakeeof;
11505 if(yyparse(gramtype) && !PL_parser->error_count)
11506 qerror(Perl_mess(aTHX_ "Parse error"));
11509 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
11511 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
11515 SAVEVPTR(PL_eval_root);
11516 PL_eval_root = NULL;
11517 parse_recdescent(gramtype, fakeeof);
11523 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11525 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11528 if (flags & ~PARSE_OPTIONAL)
11529 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11530 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11531 if (!exprop && !(flags & PARSE_OPTIONAL)) {
11532 if (!PL_parser->error_count)
11533 qerror(Perl_mess(aTHX_ "Parse error"));
11534 exprop = newOP(OP_NULL, 0);
11540 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
11542 Parse a Perl arithmetic expression. This may contain operators of precedence
11543 down to the bit shift operators. The expression must be followed (and thus
11544 terminated) either by a comparison or lower-precedence operator or by
11545 something that would normally terminate an expression such as semicolon.
11546 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
11547 otherwise it is mandatory. It is up to the caller to ensure that the
11548 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11549 the source of the code to be parsed and the lexical context for the
11552 The op tree representing the expression is returned. If an optional
11553 expression is absent, a null pointer is returned, otherwise the pointer
11556 If an error occurs in parsing or compilation, in most cases a valid op
11557 tree is returned anyway. The error is reflected in the parser state,
11558 normally resulting in a single exception at the top level of parsing
11559 which covers all the compilation errors that occurred. Some compilation
11560 errors, however, will throw an exception immediately.
11566 Perl_parse_arithexpr(pTHX_ U32 flags)
11568 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11572 =for apidoc Amx|OP *|parse_termexpr|U32 flags
11574 Parse a Perl term expression. This may contain operators of precedence
11575 down to the assignment operators. The expression must be followed (and thus
11576 terminated) either by a comma or lower-precedence operator or by
11577 something that would normally terminate an expression such as semicolon.
11578 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
11579 otherwise it is mandatory. It is up to the caller to ensure that the
11580 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11581 the source of the code to be parsed and the lexical context for the
11584 The op tree representing the expression is returned. If an optional
11585 expression is absent, a null pointer is returned, otherwise the pointer
11588 If an error occurs in parsing or compilation, in most cases a valid op
11589 tree is returned anyway. The error is reflected in the parser state,
11590 normally resulting in a single exception at the top level of parsing
11591 which covers all the compilation errors that occurred. Some compilation
11592 errors, however, will throw an exception immediately.
11598 Perl_parse_termexpr(pTHX_ U32 flags)
11600 return parse_expr(LEX_FAKEEOF_COMMA, flags);
11604 =for apidoc Amx|OP *|parse_listexpr|U32 flags
11606 Parse a Perl list expression. This may contain operators of precedence
11607 down to the comma operator. The expression must be followed (and thus
11608 terminated) either by a low-precedence logic operator such as C<or> or by
11609 something that would normally terminate an expression such as semicolon.
11610 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
11611 otherwise it is mandatory. It is up to the caller to ensure that the
11612 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11613 the source of the code to be parsed and the lexical context for the
11616 The op tree representing the expression is returned. If an optional
11617 expression is absent, a null pointer is returned, otherwise the pointer
11620 If an error occurs in parsing or compilation, in most cases a valid op
11621 tree is returned anyway. The error is reflected in the parser state,
11622 normally resulting in a single exception at the top level of parsing
11623 which covers all the compilation errors that occurred. Some compilation
11624 errors, however, will throw an exception immediately.
11630 Perl_parse_listexpr(pTHX_ U32 flags)
11632 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11636 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
11638 Parse a single complete Perl expression. This allows the full
11639 expression grammar, including the lowest-precedence operators such
11640 as C<or>. The expression must be followed (and thus terminated) by a
11641 token that an expression would normally be terminated by: end-of-file,
11642 closing bracketing punctuation, semicolon, or one of the keywords that
11643 signals a postfix expression-statement modifier. If C<flags> has the
11644 C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
11645 mandatory. It is up to the caller to ensure that the dynamic parser
11646 state (L</PL_parser> et al) is correctly set to reflect the source of
11647 the code to be parsed and the lexical context for the expression.
11649 The op tree representing the expression is returned. If an optional
11650 expression is absent, a null pointer is returned, otherwise the pointer
11653 If an error occurs in parsing or compilation, in most cases a valid op
11654 tree is returned anyway. The error is reflected in the parser state,
11655 normally resulting in a single exception at the top level of parsing
11656 which covers all the compilation errors that occurred. Some compilation
11657 errors, however, will throw an exception immediately.
11663 Perl_parse_fullexpr(pTHX_ U32 flags)
11665 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11669 =for apidoc Amx|OP *|parse_block|U32 flags
11671 Parse a single complete Perl code block. This consists of an opening
11672 brace, a sequence of statements, and a closing brace. The block
11673 constitutes a lexical scope, so C<my> variables and various compile-time
11674 effects can be contained within it. It is up to the caller to ensure
11675 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11676 reflect the source of the code to be parsed and the lexical context for
11679 The op tree representing the code block is returned. This is always a
11680 real op, never a null pointer. It will normally be a C<lineseq> list,
11681 including C<nextstate> or equivalent ops. No ops to construct any kind
11682 of runtime scope are included by virtue of it being a block.
11684 If an error occurs in parsing or compilation, in most cases a valid op
11685 tree (most likely null) is returned anyway. The error is reflected in
11686 the parser state, normally resulting in a single exception at the top
11687 level of parsing which covers all the compilation errors that occurred.
11688 Some compilation errors, however, will throw an exception immediately.
11690 The C<flags> parameter is reserved for future use, and must always
11697 Perl_parse_block(pTHX_ U32 flags)
11700 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
11701 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
11705 =for apidoc Amx|OP *|parse_barestmt|U32 flags
11707 Parse a single unadorned Perl statement. This may be a normal imperative
11708 statement or a declaration that has compile-time effect. It does not
11709 include any label or other affixture. It is up to the caller to ensure
11710 that the dynamic parser state (L</PL_parser> et al) is correctly set to
11711 reflect the source of the code to be parsed and the lexical context for
11714 The op tree representing the statement is returned. This may be a
11715 null pointer if the statement is null, for example if it was actually
11716 a subroutine definition (which has compile-time side effects). If not
11717 null, it will be ops directly implementing the statement, suitable to
11718 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
11719 equivalent op (except for those embedded in a scope contained entirely
11720 within the statement).
11722 If an error occurs in parsing or compilation, in most cases a valid op
11723 tree (most likely null) is returned anyway. The error is reflected in
11724 the parser state, normally resulting in a single exception at the top
11725 level of parsing which covers all the compilation errors that occurred.
11726 Some compilation errors, however, will throw an exception immediately.
11728 The C<flags> parameter is reserved for future use, and must always
11735 Perl_parse_barestmt(pTHX_ U32 flags)
11738 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
11739 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
11743 =for apidoc Amx|SV *|parse_label|U32 flags
11745 Parse a single label, possibly optional, of the type that may prefix a
11746 Perl statement. It is up to the caller to ensure that the dynamic parser
11747 state (L</PL_parser> et al) is correctly set to reflect the source of
11748 the code to be parsed. If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
11749 label is optional, otherwise it is mandatory.
11751 The name of the label is returned in the form of a fresh scalar. If an
11752 optional label is absent, a null pointer is returned.
11754 If an error occurs in parsing, which can only occur if the label is
11755 mandatory, a valid label is returned anyway. The error is reflected in
11756 the parser state, normally resulting in a single exception at the top
11757 level of parsing which covers all the compilation errors that occurred.
11763 Perl_parse_label(pTHX_ U32 flags)
11765 if (flags & ~PARSE_OPTIONAL)
11766 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11767 if (PL_lex_state == LEX_KNOWNEXT) {
11768 PL_parser->yychar = yylex();
11769 if (PL_parser->yychar == LABEL) {
11770 char * const lpv = pl_yylval.pval;
11771 STRLEN llen = strlen(lpv);
11772 PL_parser->yychar = YYEMPTY;
11773 return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
11780 STRLEN wlen, bufptr_pos;
11783 if (!isIDFIRST_lazy_if(s, UTF))
11785 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
11786 if (word_takes_any_delimeter(s, wlen))
11788 bufptr_pos = s - SvPVX(PL_linestr);
11790 lex_read_space(LEX_KEEP_PREVIOUS);
11792 s = SvPVX(PL_linestr) + bufptr_pos;
11793 if (t[0] == ':' && t[1] != ':') {
11794 PL_oldoldbufptr = PL_oldbufptr;
11797 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
11801 if (flags & PARSE_OPTIONAL) {
11804 qerror(Perl_mess(aTHX_ "Parse error"));
11805 return newSVpvs("x");
11812 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
11814 Parse a single complete Perl statement. This may be a normal imperative
11815 statement or a declaration that has compile-time effect, and may include
11816 optional labels. It is up to the caller to ensure that the dynamic
11817 parser state (L</PL_parser> et al) is correctly set to reflect the source
11818 of the code to be parsed and the lexical context for the statement.
11820 The op tree representing the statement is returned. This may be a
11821 null pointer if the statement is null, for example if it was actually
11822 a subroutine definition (which has compile-time side effects). If not
11823 null, it will be the result of a L</newSTATEOP> call, normally including
11824 a C<nextstate> or equivalent op.
11826 If an error occurs in parsing or compilation, in most cases a valid op
11827 tree (most likely null) is returned anyway. The error is reflected in
11828 the parser state, normally resulting in a single exception at the top
11829 level of parsing which covers all the compilation errors that occurred.
11830 Some compilation errors, however, will throw an exception immediately.
11832 The C<flags> parameter is reserved for future use, and must always
11839 Perl_parse_fullstmt(pTHX_ U32 flags)
11842 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
11843 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
11847 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
11849 Parse a sequence of zero or more Perl statements. These may be normal
11850 imperative statements, including optional labels, or declarations
11851 that have compile-time effect, or any mixture thereof. The statement
11852 sequence ends when a closing brace or end-of-file is encountered in a
11853 place where a new statement could have validly started. It is up to
11854 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11855 is correctly set to reflect the source of the code to be parsed and the
11856 lexical context for the statements.
11858 The op tree representing the statement sequence is returned. This may
11859 be a null pointer if the statements were all null, for example if there
11860 were no statements or if there were only subroutine definitions (which
11861 have compile-time side effects). If not null, it will be a C<lineseq>
11862 list, normally including C<nextstate> or equivalent ops.
11864 If an error occurs in parsing or compilation, in most cases a valid op
11865 tree is returned anyway. The error is reflected in the parser state,
11866 normally resulting in a single exception at the top level of parsing
11867 which covers all the compilation errors that occurred. Some compilation
11868 errors, however, will throw an exception immediately.
11870 The C<flags> parameter is reserved for future use, and must always
11877 Perl_parse_stmtseq(pTHX_ U32 flags)
11882 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11883 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
11884 c = lex_peek_unichar(0);
11885 if (c != -1 && c != /*{*/'}')
11886 qerror(Perl_mess(aTHX_ "Parse error"));
11890 #define lex_token_boundary() S_lex_token_boundary(aTHX)
11892 S_lex_token_boundary(pTHX)
11894 PL_oldoldbufptr = PL_oldbufptr;
11895 PL_oldbufptr = PL_bufptr;
11898 #define parse_opt_lexvar() S_parse_opt_lexvar(aTHX)
11900 S_parse_opt_lexvar(pTHX)
11905 lex_token_boundary();
11906 sigil = lex_read_unichar(0);
11907 if (lex_peek_unichar(0) == '#') {
11908 qerror(Perl_mess(aTHX_ "Parse error"));
11912 c = lex_peek_unichar(0);
11913 if (c == -1 || !(UTF ? isIDFIRST_uni(c) : isIDFIRST_A(c)))
11916 d = PL_tokenbuf + 1;
11917 PL_tokenbuf[0] = (char)sigil;
11918 parse_ident(&s, &d, PL_tokenbuf + sizeof(PL_tokenbuf) - 1, 0, cBOOL(UTF));
11920 if (d == PL_tokenbuf+1)
11922 var = newOP(sigil == '$' ? OP_PADSV : sigil == '@' ? OP_PADAV : OP_PADHV,
11923 OPf_MOD | (OPpLVAL_INTRO<<8));
11924 var->op_targ = allocmy(PL_tokenbuf, d - PL_tokenbuf, UTF ? SVf_UTF8 : 0);
11929 Perl_parse_subsignature(pTHX)
11932 int prev_type = 0, pos = 0, min_arity = 0, max_arity = 0;
11933 OP *initops = NULL;
11935 c = lex_peek_unichar(0);
11936 while (c != /*(*/')') {
11940 if (prev_type == 2)
11941 qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
11942 var = parse_opt_lexvar();
11944 newBINOP(OP_AELEM, 0,
11945 ref(newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv)),
11947 newSVOP(OP_CONST, 0, newSViv(pos))) :
11950 c = lex_peek_unichar(0);
11952 lex_token_boundary();
11953 lex_read_unichar(0);
11955 c = lex_peek_unichar(0);
11956 if (c == ',' || c == /*(*/')') {
11958 qerror(Perl_mess(aTHX_ "Optional parameter "
11959 "lacks default expression"));
11961 OP *defexpr = parse_termexpr(0);
11962 if (defexpr->op_type == OP_UNDEF
11963 && !(defexpr->op_flags & OPf_KIDS))
11969 scalar(newUNOP(OP_RV2AV, 0,
11970 newGVOP(OP_GV, 0, PL_defgv))),
11971 newSVOP(OP_CONST, 0, newSViv(pos+1)));
11973 newCONDOP(0, ifop, expr, defexpr) :
11974 newLOGOP(OP_OR, 0, ifop, defexpr);
11979 if (prev_type == 1)
11980 qerror(Perl_mess(aTHX_ "Mandatory parameter "
11981 "follows optional parameter"));
11983 min_arity = pos + 1;
11985 if (var) expr = newASSIGNOP(OPf_STACKED, var, 0, expr);
11987 initops = op_append_list(OP_LINESEQ, initops,
11988 newSTATEOP(0, NULL, expr));
11994 if (prev_type == 2)
11995 qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
11996 var = parse_opt_lexvar();
11998 OP *chkop = newLOGOP((pos & 1) ? OP_OR : OP_AND, 0,
11999 newBINOP(OP_BIT_AND, 0,
12000 scalar(newUNOP(OP_RV2AV, 0,
12001 newGVOP(OP_GV, 0, PL_defgv))),
12002 newSVOP(OP_CONST, 0, newSViv(1))),
12003 op_convert_list(OP_DIE, 0,
12004 op_convert_list(OP_SPRINTF, 0,
12005 op_append_list(OP_LIST,
12006 newSVOP(OP_CONST, 0,
12007 newSVpvs("Odd name/value argument for subroutine at %s line %d.\n")),
12009 op_append_list(OP_LIST,
12010 newSVOP(OP_CONST, 0, newSViv(1)),
12011 newSVOP(OP_CONST, 0, newSViv(2))),
12012 newOP(OP_CALLER, 0))))));
12013 if (pos != min_arity)
12014 chkop = newLOGOP(OP_AND, 0,
12016 scalar(newUNOP(OP_RV2AV, 0,
12017 newGVOP(OP_GV, 0, PL_defgv))),
12018 newSVOP(OP_CONST, 0, newSViv(pos))),
12020 initops = op_append_list(OP_LINESEQ,
12021 newSTATEOP(0, NULL, chkop),
12026 op_prepend_elem(OP_ASLICE,
12027 newOP(OP_PUSHMARK, 0),
12028 newLISTOP(OP_ASLICE, 0,
12030 newSVOP(OP_CONST, 0, newSViv(pos)),
12031 newUNOP(OP_AV2ARYLEN, 0,
12032 ref(newUNOP(OP_RV2AV, 0,
12033 newGVOP(OP_GV, 0, PL_defgv)),
12035 ref(newUNOP(OP_RV2AV, 0,
12036 newGVOP(OP_GV, 0, PL_defgv)),
12038 newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv));
12039 initops = op_append_list(OP_LINESEQ, initops,
12040 newSTATEOP(0, NULL,
12041 newASSIGNOP(OPf_STACKED, var, 0, slice)));
12048 qerror(Perl_mess(aTHX_ "Parse error"));
12052 c = lex_peek_unichar(0);
12054 case /*(*/')': break;
12057 lex_token_boundary();
12058 lex_read_unichar(0);
12060 c = lex_peek_unichar(0);
12061 } while (c == ',');
12067 if (min_arity != 0) {
12068 initops = op_append_list(OP_LINESEQ,
12069 newSTATEOP(0, NULL,
12072 scalar(newUNOP(OP_RV2AV, 0,
12073 newGVOP(OP_GV, 0, PL_defgv))),
12074 newSVOP(OP_CONST, 0, newSViv(min_arity))),
12075 op_convert_list(OP_DIE, 0,
12076 op_convert_list(OP_SPRINTF, 0,
12077 op_append_list(OP_LIST,
12078 newSVOP(OP_CONST, 0,
12079 newSVpvs("Too few arguments for subroutine at %s line %d.\n")),
12081 op_append_list(OP_LIST,
12082 newSVOP(OP_CONST, 0, newSViv(1)),
12083 newSVOP(OP_CONST, 0, newSViv(2))),
12084 newOP(OP_CALLER, 0))))))),
12087 if (max_arity != -1) {
12088 initops = op_append_list(OP_LINESEQ,
12089 newSTATEOP(0, NULL,
12092 scalar(newUNOP(OP_RV2AV, 0,
12093 newGVOP(OP_GV, 0, PL_defgv))),
12094 newSVOP(OP_CONST, 0, newSViv(max_arity))),
12095 op_convert_list(OP_DIE, 0,
12096 op_convert_list(OP_SPRINTF, 0,
12097 op_append_list(OP_LIST,
12098 newSVOP(OP_CONST, 0,
12099 newSVpvs("Too many arguments for subroutine at %s line %d.\n")),
12101 op_append_list(OP_LIST,
12102 newSVOP(OP_CONST, 0, newSViv(1)),
12103 newSVOP(OP_CONST, 0, newSViv(2))),
12104 newOP(OP_CALLER, 0))))))),
12111 * ex: set ts=8 sts=4 sw=4 et: