3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 =head1 Lexer interface
27 This is the lower layer of the Perl parser, managing characters and tokens.
29 =for apidoc AmU|yy_parser *|PL_parser
31 Pointer to a structure encapsulating the state of the parsing operation
32 currently in progress. The pointer can be locally changed to perform
33 a nested parse without interfering with the state of an outer parse.
34 Individual members of C<PL_parser> have their own documentation.
40 #define PERL_IN_TOKE_C
42 #include "dquote_static.c"
44 #define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
47 #define pl_yylval (PL_parser->yylval)
49 /* XXX temporary backwards compatibility */
50 #define PL_lex_brackets (PL_parser->lex_brackets)
51 #define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52 #define PL_lex_fakeeof (PL_parser->lex_fakeeof)
53 #define PL_lex_brackstack (PL_parser->lex_brackstack)
54 #define PL_lex_casemods (PL_parser->lex_casemods)
55 #define PL_lex_casestack (PL_parser->lex_casestack)
56 #define PL_lex_defer (PL_parser->lex_defer)
57 #define PL_lex_dojoin (PL_parser->lex_dojoin)
58 #define PL_lex_expect (PL_parser->lex_expect)
59 #define PL_lex_formbrack (PL_parser->lex_formbrack)
60 #define PL_lex_inpat (PL_parser->lex_inpat)
61 #define PL_lex_inwhat (PL_parser->lex_inwhat)
62 #define PL_lex_op (PL_parser->lex_op)
63 #define PL_lex_repl (PL_parser->lex_repl)
64 #define PL_lex_starts (PL_parser->lex_starts)
65 #define PL_lex_stuff (PL_parser->lex_stuff)
66 #define PL_multi_start (PL_parser->multi_start)
67 #define PL_multi_open (PL_parser->multi_open)
68 #define PL_multi_close (PL_parser->multi_close)
69 #define PL_preambled (PL_parser->preambled)
70 #define PL_sublex_info (PL_parser->sublex_info)
71 #define PL_linestr (PL_parser->linestr)
72 #define PL_expect (PL_parser->expect)
73 #define PL_copline (PL_parser->copline)
74 #define PL_bufptr (PL_parser->bufptr)
75 #define PL_oldbufptr (PL_parser->oldbufptr)
76 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
77 #define PL_linestart (PL_parser->linestart)
78 #define PL_bufend (PL_parser->bufend)
79 #define PL_last_uni (PL_parser->last_uni)
80 #define PL_last_lop (PL_parser->last_lop)
81 #define PL_last_lop_op (PL_parser->last_lop_op)
82 #define PL_lex_state (PL_parser->lex_state)
83 #define PL_rsfp (PL_parser->rsfp)
84 #define PL_rsfp_filters (PL_parser->rsfp_filters)
85 #define PL_in_my (PL_parser->in_my)
86 #define PL_in_my_stash (PL_parser->in_my_stash)
87 #define PL_tokenbuf (PL_parser->tokenbuf)
88 #define PL_multi_end (PL_parser->multi_end)
89 #define PL_error_count (PL_parser->error_count)
92 # define PL_endwhite (PL_parser->endwhite)
93 # define PL_faketokens (PL_parser->faketokens)
94 # define PL_lasttoke (PL_parser->lasttoke)
95 # define PL_nextwhite (PL_parser->nextwhite)
96 # define PL_realtokenstart (PL_parser->realtokenstart)
97 # define PL_skipwhite (PL_parser->skipwhite)
98 # define PL_thisclose (PL_parser->thisclose)
99 # define PL_thismad (PL_parser->thismad)
100 # define PL_thisopen (PL_parser->thisopen)
101 # define PL_thisstuff (PL_parser->thisstuff)
102 # define PL_thistoken (PL_parser->thistoken)
103 # define PL_thiswhite (PL_parser->thiswhite)
104 # define PL_thiswhite (PL_parser->thiswhite)
105 # define PL_nexttoke (PL_parser->nexttoke)
106 # define PL_curforce (PL_parser->curforce)
108 # define PL_nexttoke (PL_parser->nexttoke)
109 # define PL_nexttype (PL_parser->nexttype)
110 # define PL_nextval (PL_parser->nextval)
113 static const char* const ident_too_long = "Identifier too long";
116 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
117 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
119 # define CURMAD(slot,sv)
120 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
123 #define XENUMMASK 0x3f
124 #define XFAKEEOF 0x40
125 #define XFAKEBRACK 0x80
127 #ifdef USE_UTF8_SCRIPTS
128 # define UTF (!IN_BYTES)
130 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
133 /* The maximum number of characters preceding the unrecognized one to display */
134 #define UNRECOGNIZED_PRECEDE_COUNT 10
136 /* In variables named $^X, these are the legal values for X.
137 * 1999-02-27 mjd-perl-patch@plover.com */
138 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
140 #define SPACE_OR_TAB(c) isBLANK_A(c)
142 /* LEX_* are values for PL_lex_state, the state of the lexer.
143 * They are arranged oddly so that the guard on the switch statement
144 * can get by with a single comparison (if the compiler is smart enough).
146 * These values refer to the various states within a sublex parse,
147 * i.e. within a double quotish string
150 /* #define LEX_NOTPARSING 11 is done in perl.h. */
152 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
153 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
154 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
155 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
156 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
158 /* at end of code, eg "$x" followed by: */
159 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
160 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
162 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
163 string or after \E, $foo, etc */
164 #define LEX_INTERPCONST 2 /* NOT USED */
165 #define LEX_FORMLINE 1 /* expecting a format line */
166 #define LEX_KNOWNEXT 0 /* next token known; just return it */
170 static const char* const lex_state_names[] = {
185 #include "keywords.h"
187 /* CLINE is a macro that ensures PL_copline has a sane value */
189 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
192 # define SKIPSPACE0(s) skipspace0(s)
193 # define SKIPSPACE1(s) skipspace1(s)
194 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
195 # define PEEKSPACE(s) skipspace2(s,0)
197 # define SKIPSPACE0(s) skipspace(s)
198 # define SKIPSPACE1(s) skipspace(s)
199 # define SKIPSPACE2(s,tsv) skipspace(s)
200 # define PEEKSPACE(s) skipspace(s)
204 * Convenience functions to return different tokens and prime the
205 * lexer for the next token. They all take an argument.
207 * TOKEN : generic token (used for '(', DOLSHARP, etc)
208 * OPERATOR : generic operator
209 * AOPERATOR : assignment operator
210 * PREBLOCK : beginning the block after an if, while, foreach, ...
211 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
212 * PREREF : *EXPR where EXPR is not a simple identifier
213 * TERM : expression term
214 * POSTDEREF : postfix dereference (->$* ->@[...] etc.)
215 * LOOPX : loop exiting command (goto, last, dump, etc)
216 * FTST : file test operator
217 * FUN0 : zero-argument function
218 * FUN0OP : zero-argument function, with its op created in this file
219 * FUN1 : not used, except for not, which isn't a UNIOP
220 * BOop : bitwise or or xor
222 * SHop : shift operator
223 * PWop : power operator
224 * PMop : pattern-matching operator
225 * Aop : addition-level operator
226 * Mop : multiplication-level operator
227 * Eop : equality-testing operator
228 * Rop : relational operator <= != gt
230 * Also see LOP and lop() below.
233 #ifdef DEBUGGING /* Serve -DT. */
234 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
236 # define REPORT(retval) (retval)
239 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
240 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
241 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
242 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
243 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
244 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
245 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
246 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
247 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
248 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
249 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
250 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
251 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
252 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
253 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
254 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
255 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
256 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
257 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
258 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
259 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
260 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
262 /* This bit of chicanery makes a unary function followed by
263 * a parenthesis into a function with one argument, highest precedence.
264 * The UNIDOR macro is for unary functions that can be followed by the //
265 * operator (such as C<shift // 0>).
267 #define UNI3(f,x,have_x) { \
268 pl_yylval.ival = f; \
269 if (have_x) PL_expect = x; \
271 PL_last_uni = PL_oldbufptr; \
272 PL_last_lop_op = f; \
274 return REPORT( (int)FUNC1 ); \
276 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
278 #define UNI(f) UNI3(f,XTERM,1)
279 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
280 #define UNIPROTO(f,optional) { \
281 if (optional) PL_last_uni = PL_oldbufptr; \
285 #define UNIBRACK(f) UNI3(f,0,0)
287 /* grandfather return to old style */
290 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
291 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
292 pl_yylval.ival = (f); \
298 #define COPLINE_INC_WITH_HERELINES \
300 CopLINE_inc(PL_curcop); \
301 if (PL_parser->herelines) \
302 CopLINE(PL_curcop) += PL_parser->herelines, \
303 PL_parser->herelines = 0; \
305 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
306 * is no sublex_push to follow. */
307 #define COPLINE_SET_FROM_MULTI_END \
309 CopLINE_set(PL_curcop, PL_multi_end); \
310 if (PL_multi_end != PL_multi_start) \
311 PL_parser->herelines = 0; \
317 /* how to interpret the pl_yylval associated with the token */
321 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
326 static struct debug_tokens {
328 enum token_type type;
330 } const debug_tokens[] =
332 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
333 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
334 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
335 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
336 { ARROW, TOKENTYPE_NONE, "ARROW" },
337 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
338 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
339 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
340 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
341 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
342 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
343 { DO, TOKENTYPE_NONE, "DO" },
344 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
345 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
346 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
347 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
348 { ELSE, TOKENTYPE_NONE, "ELSE" },
349 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
350 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
351 { FOR, TOKENTYPE_IVAL, "FOR" },
352 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
353 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
354 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
355 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
356 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
357 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
358 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
359 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
360 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
361 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
362 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
363 { IF, TOKENTYPE_IVAL, "IF" },
364 { LABEL, TOKENTYPE_PVAL, "LABEL" },
365 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
366 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
367 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
368 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
369 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
370 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
371 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
372 { MY, TOKENTYPE_IVAL, "MY" },
373 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
374 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
375 { OROP, TOKENTYPE_IVAL, "OROP" },
376 { OROR, TOKENTYPE_NONE, "OROR" },
377 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
378 { PEG, TOKENTYPE_NONE, "PEG" },
379 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
380 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
381 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
382 { POSTJOIN, TOKENTYPE_NONE, "POSTJOIN" },
383 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
384 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
385 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
386 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
387 { PREINC, TOKENTYPE_NONE, "PREINC" },
388 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
389 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
390 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
391 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
392 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
393 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
394 { SUB, TOKENTYPE_NONE, "SUB" },
395 { THING, TOKENTYPE_OPVAL, "THING" },
396 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
397 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
398 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
399 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
400 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
401 { USE, TOKENTYPE_IVAL, "USE" },
402 { WHEN, TOKENTYPE_IVAL, "WHEN" },
403 { WHILE, TOKENTYPE_IVAL, "WHILE" },
404 { WORD, TOKENTYPE_OPVAL, "WORD" },
405 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
406 { 0, TOKENTYPE_NONE, NULL }
409 /* dump the returned token in rv, plus any optional arg in pl_yylval */
412 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
416 PERL_ARGS_ASSERT_TOKEREPORT;
419 const char *name = NULL;
420 enum token_type type = TOKENTYPE_NONE;
421 const struct debug_tokens *p;
422 SV* const report = newSVpvs("<== ");
424 for (p = debug_tokens; p->token; p++) {
425 if (p->token == (int)rv) {
432 Perl_sv_catpv(aTHX_ report, name);
433 else if ((char)rv > ' ' && (char)rv <= '~')
435 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
437 sv_catpvs(report, " (pending identifier)");
440 sv_catpvs(report, "EOF");
442 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
447 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
449 case TOKENTYPE_OPNUM:
450 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
451 PL_op_name[lvalp->ival]);
454 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
456 case TOKENTYPE_OPVAL:
458 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
459 PL_op_name[lvalp->opval->op_type]);
460 if (lvalp->opval->op_type == OP_CONST) {
461 Perl_sv_catpvf(aTHX_ report, " %s",
462 SvPEEK(cSVOPx_sv(lvalp->opval)));
467 sv_catpvs(report, "(opval=null)");
470 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
476 /* print the buffer with suitable escapes */
479 S_printbuf(pTHX_ const char *const fmt, const char *const s)
481 SV* const tmp = newSVpvs("");
483 PERL_ARGS_ASSERT_PRINTBUF;
485 GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */
486 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
494 S_deprecate_commaless_var_list(pTHX) {
496 deprecate("comma-less variable list");
497 return REPORT(','); /* grandfather non-comma-format format */
503 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
504 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
508 S_ao(pTHX_ int toketype)
511 if (*PL_bufptr == '=') {
513 if (toketype == ANDAND)
514 pl_yylval.ival = OP_ANDASSIGN;
515 else if (toketype == OROR)
516 pl_yylval.ival = OP_ORASSIGN;
517 else if (toketype == DORDOR)
518 pl_yylval.ival = OP_DORASSIGN;
526 * When Perl expects an operator and finds something else, no_op
527 * prints the warning. It always prints "<something> found where
528 * operator expected. It prints "Missing semicolon on previous line?"
529 * if the surprise occurs at the start of the line. "do you need to
530 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
531 * where the compiler doesn't know if foo is a method call or a function.
532 * It prints "Missing operator before end of line" if there's nothing
533 * after the missing operator, or "... before <...>" if there is something
534 * after the missing operator.
538 S_no_op(pTHX_ const char *const what, char *s)
541 char * const oldbp = PL_bufptr;
542 const bool is_first = (PL_oldbufptr == PL_linestart);
544 PERL_ARGS_ASSERT_NO_OP;
550 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
551 if (ckWARN_d(WARN_SYNTAX)) {
553 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
554 "\t(Missing semicolon on previous line?)\n");
555 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
557 for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
558 t += UTF ? UTF8SKIP(t) : 1)
560 if (t < PL_bufptr && isSPACE(*t))
561 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
562 "\t(Do you need to predeclare %"UTF8f"?)\n",
563 UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
567 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
568 "\t(Missing operator before %"UTF8f"?)\n",
569 UTF8fARG(UTF, s - oldbp, oldbp));
577 * Complain about missing quote/regexp/heredoc terminator.
578 * If it's called with NULL then it cauterizes the line buffer.
579 * If we're in a delimited string and the delimiter is a control
580 * character, it's reformatted into a two-char sequence like ^C.
585 S_missingterm(pTHX_ char *s)
591 char * const nl = strrchr(s,'\n');
595 else if ((U8) PL_multi_close < 32) {
597 tmpbuf[1] = (char)toCTRL(PL_multi_close);
602 *tmpbuf = (char)PL_multi_close;
606 q = strchr(s,'"') ? '\'' : '"';
607 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
613 * Check whether the named feature is enabled.
616 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
619 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
621 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
623 assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
625 if (namelen > MAX_FEATURE_LEN)
627 memcpy(&he_name[8], name, namelen);
629 return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
630 REFCOUNTED_HE_EXISTS));
634 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
635 * utf16-to-utf8-reversed.
638 #ifdef PERL_CR_FILTER
642 const char *s = SvPVX_const(sv);
643 const char * const e = s + SvCUR(sv);
645 PERL_ARGS_ASSERT_STRIP_RETURN;
647 /* outer loop optimized to do nothing if there are no CR-LFs */
649 if (*s++ == '\r' && *s == '\n') {
650 /* hit a CR-LF, need to copy the rest */
654 if (*s == '\r' && s[1] == '\n')
665 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
667 const I32 count = FILTER_READ(idx+1, sv, maxlen);
668 if (count > 0 && !maxlen)
675 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
677 Creates and initialises a new lexer/parser state object, supplying
678 a context in which to lex and parse from a new source of Perl code.
679 A pointer to the new state object is placed in L</PL_parser>. An entry
680 is made on the save stack so that upon unwinding the new state object
681 will be destroyed and the former value of L</PL_parser> will be restored.
682 Nothing else need be done to clean up the parsing context.
684 The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
685 non-null, provides a string (in SV form) containing code to be parsed.
686 A copy of the string is made, so subsequent modification of I<line>
687 does not affect parsing. I<rsfp>, if non-null, provides an input stream
688 from which code will be read to be parsed. If both are non-null, the
689 code in I<line> comes first and must consist of complete lines of input,
690 and I<rsfp> supplies the remainder of the source.
692 The I<flags> parameter is reserved for future use. Currently it is only
693 used by perl internally, so extensions should always pass zero.
698 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
699 can share filters with the current parser.
700 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
701 caller, hence isn't owned by the parser, so shouldn't be closed on parser
702 destruction. This is used to handle the case of defaulting to reading the
703 script from the standard input because no filename was given on the command
704 line (without getting confused by situation where STDIN has been closed, so
705 the script handle is opened on fd 0) */
708 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
711 const char *s = NULL;
712 yy_parser *parser, *oparser;
713 if (flags && flags & ~LEX_START_FLAGS)
714 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
716 /* create and initialise a parser */
718 Newxz(parser, 1, yy_parser);
719 parser->old_parser = oparser = PL_parser;
722 parser->stack = NULL;
724 parser->stack_size = 0;
726 /* on scope exit, free this parser and restore any outer one */
728 parser->saved_curcop = PL_curcop;
730 /* initialise lexer state */
733 parser->curforce = -1;
735 parser->nexttoke = 0;
737 parser->error_count = oparser ? oparser->error_count : 0;
738 parser->copline = parser->preambling = NOLINE;
739 parser->lex_state = LEX_NORMAL;
740 parser->expect = XSTATE;
742 parser->rsfp_filters =
743 !(flags & LEX_START_SAME_FILTER) || !oparser
745 : MUTABLE_AV(SvREFCNT_inc(
746 oparser->rsfp_filters
747 ? oparser->rsfp_filters
748 : (oparser->rsfp_filters = newAV())
751 Newx(parser->lex_brackstack, 120, char);
752 Newx(parser->lex_casestack, 12, char);
753 *parser->lex_casestack = '\0';
754 Newxz(parser->lex_shared, 1, LEXSHARED);
758 s = SvPV_const(line, len);
759 parser->linestr = flags & LEX_START_COPIED
760 ? SvREFCNT_inc_simple_NN(line)
761 : newSVpvn_flags(s, len, SvUTF8(line));
762 sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2);
764 parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
766 parser->oldoldbufptr =
769 parser->linestart = SvPVX(parser->linestr);
770 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
771 parser->last_lop = parser->last_uni = NULL;
773 assert(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
774 |LEX_DONT_CLOSE_RSFP));
775 parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
776 |LEX_DONT_CLOSE_RSFP));
778 parser->in_pod = parser->filtered = 0;
782 /* delete a parser object */
785 Perl_parser_free(pTHX_ const yy_parser *parser)
787 PERL_ARGS_ASSERT_PARSER_FREE;
789 PL_curcop = parser->saved_curcop;
790 SvREFCNT_dec(parser->linestr);
792 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
793 PerlIO_clearerr(parser->rsfp);
794 else if (parser->rsfp && (!parser->old_parser ||
795 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
796 PerlIO_close(parser->rsfp);
797 SvREFCNT_dec(parser->rsfp_filters);
798 SvREFCNT_dec(parser->lex_stuff);
799 SvREFCNT_dec(parser->sublex_info.repl);
801 Safefree(parser->lex_brackstack);
802 Safefree(parser->lex_casestack);
803 Safefree(parser->lex_shared);
804 PL_parser = parser->old_parser;
809 Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab)
812 I32 nexttoke = parser->lasttoke;
814 I32 nexttoke = parser->nexttoke;
816 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
819 if (S_is_opval_token(parser->nexttoke[nexttoke].next_type
821 && parser->nexttoke[nexttoke].next_val.opval
822 && parser->nexttoke[nexttoke].next_val.opval->op_slabbed
823 && OpSLAB(parser->nexttoke[nexttoke].next_val.opval) == slab) {
824 op_free(parser->nexttoke[nexttoke].next_val.opval);
825 parser->nexttoke[nexttoke].next_val.opval = NULL;
828 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
829 && parser->nextval[nexttoke].opval
830 && parser->nextval[nexttoke].opval->op_slabbed
831 && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
832 op_free(parser->nextval[nexttoke].opval);
833 parser->nextval[nexttoke].opval = NULL;
841 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
843 Buffer scalar containing the chunk currently under consideration of the
844 text currently being lexed. This is always a plain string scalar (for
845 which C<SvPOK> is true). It is not intended to be used as a scalar by
846 normal scalar means; instead refer to the buffer directly by the pointer
847 variables described below.
849 The lexer maintains various C<char*> pointers to things in the
850 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
851 reallocated, all of these pointers must be updated. Don't attempt to
852 do this manually, but rather use L</lex_grow_linestr> if you need to
853 reallocate the buffer.
855 The content of the text chunk in the buffer is commonly exactly one
856 complete line of input, up to and including a newline terminator,
857 but there are situations where it is otherwise. The octets of the
858 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
859 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
860 flag on this scalar, which may disagree with it.
862 For direct examination of the buffer, the variable
863 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
864 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
865 of these pointers is usually preferable to examination of the scalar
866 through normal scalar means.
868 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
870 Direct pointer to the end of the chunk of text currently being lexed, the
871 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
872 + SvCUR(PL_parser-E<gt>linestr)>. A C<NUL> character (zero octet) is
873 always located at the end of the buffer, and does not count as part of
874 the buffer's contents.
876 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
878 Points to the current position of lexing inside the lexer buffer.
879 Characters around this point may be freely examined, within
880 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
881 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
882 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
884 Lexing code (whether in the Perl core or not) moves this pointer past
885 the characters that it consumes. It is also expected to perform some
886 bookkeeping whenever a newline character is consumed. This movement
887 can be more conveniently performed by the function L</lex_read_to>,
888 which handles newlines appropriately.
890 Interpretation of the buffer's octets can be abstracted out by
891 using the slightly higher-level functions L</lex_peek_unichar> and
892 L</lex_read_unichar>.
894 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
896 Points to the start of the current line inside the lexer buffer.
897 This is useful for indicating at which column an error occurred, and
898 not much else. This must be updated by any lexing code that consumes
899 a newline; the function L</lex_read_to> handles this detail.
905 =for apidoc Amx|bool|lex_bufutf8
907 Indicates whether the octets in the lexer buffer
908 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
909 of Unicode characters. If not, they should be interpreted as Latin-1
910 characters. This is analogous to the C<SvUTF8> flag for scalars.
912 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
913 contains valid UTF-8. Lexing code must be robust in the face of invalid
916 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
917 is significant, but not the whole story regarding the input character
918 encoding. Normally, when a file is being read, the scalar contains octets
919 and its C<SvUTF8> flag is off, but the octets should be interpreted as
920 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
921 however, the scalar may have the C<SvUTF8> flag on, and in this case its
922 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
923 is in effect. This logic may change in the future; use this function
924 instead of implementing the logic yourself.
930 Perl_lex_bufutf8(pTHX)
936 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
938 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
939 at least I<len> octets (including terminating C<NUL>). Returns a
940 pointer to the reallocated buffer. This is necessary before making
941 any direct modification of the buffer that would increase its length.
942 L</lex_stuff_pvn> provides a more convenient way to insert text into
945 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
946 this function updates all of the lexer's variables that point directly
953 Perl_lex_grow_linestr(pTHX_ STRLEN len)
957 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
958 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
959 linestr = PL_parser->linestr;
960 buf = SvPVX(linestr);
961 if (len <= SvLEN(linestr))
963 bufend_pos = PL_parser->bufend - buf;
964 bufptr_pos = PL_parser->bufptr - buf;
965 oldbufptr_pos = PL_parser->oldbufptr - buf;
966 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
967 linestart_pos = PL_parser->linestart - buf;
968 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
969 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
970 re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
971 PL_parser->lex_shared->re_eval_start - buf : 0;
973 buf = sv_grow(linestr, len);
975 PL_parser->bufend = buf + bufend_pos;
976 PL_parser->bufptr = buf + bufptr_pos;
977 PL_parser->oldbufptr = buf + oldbufptr_pos;
978 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
979 PL_parser->linestart = buf + linestart_pos;
980 if (PL_parser->last_uni)
981 PL_parser->last_uni = buf + last_uni_pos;
982 if (PL_parser->last_lop)
983 PL_parser->last_lop = buf + last_lop_pos;
984 if (PL_parser->lex_shared->re_eval_start)
985 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
990 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
992 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
993 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
994 reallocating the buffer if necessary. This means that lexing code that
995 runs later will see the characters as if they had appeared in the input.
996 It is not recommended to do this as part of normal parsing, and most
997 uses of this facility run the risk of the inserted characters being
998 interpreted in an unintended manner.
1000 The string to be inserted is represented by I<len> octets starting
1001 at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
1002 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
1003 The characters are recoded for the lexer buffer, according to how the
1004 buffer is currently being interpreted (L</lex_bufutf8>). If a string
1005 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1006 function is more convenient.
1012 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1016 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1017 if (flags & ~(LEX_STUFF_UTF8))
1018 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1020 if (flags & LEX_STUFF_UTF8) {
1023 STRLEN highhalf = 0; /* Count of variants */
1024 const char *p, *e = pv+len;
1025 for (p = pv; p != e; p++) {
1026 if (! UTF8_IS_INVARIANT(*p)) {
1032 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1033 bufptr = PL_parser->bufptr;
1034 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1035 SvCUR_set(PL_parser->linestr,
1036 SvCUR(PL_parser->linestr) + len+highhalf);
1037 PL_parser->bufend += len+highhalf;
1038 for (p = pv; p != e; p++) {
1040 if (! UTF8_IS_INVARIANT(c)) {
1041 *bufptr++ = UTF8_TWO_BYTE_HI(c);
1042 *bufptr++ = UTF8_TWO_BYTE_LO(c);
1044 *bufptr++ = (char)c;
1049 if (flags & LEX_STUFF_UTF8) {
1050 STRLEN highhalf = 0;
1051 const char *p, *e = pv+len;
1052 for (p = pv; p != e; p++) {
1054 if (UTF8_IS_ABOVE_LATIN1(c)) {
1055 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1056 "non-Latin-1 character into Latin-1 input");
1057 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1060 } else if (! UTF8_IS_INVARIANT(c)) {
1061 /* malformed UTF-8 */
1063 SAVESPTR(PL_warnhook);
1064 PL_warnhook = PERL_WARNHOOK_FATAL;
1065 utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
1071 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1072 bufptr = PL_parser->bufptr;
1073 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1074 SvCUR_set(PL_parser->linestr,
1075 SvCUR(PL_parser->linestr) + len-highhalf);
1076 PL_parser->bufend += len-highhalf;
1079 if (UTF8_IS_INVARIANT(*p)) {
1085 *bufptr++ = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
1091 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1092 bufptr = PL_parser->bufptr;
1093 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1094 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1095 PL_parser->bufend += len;
1096 Copy(pv, bufptr, len, char);
1102 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1104 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1105 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1106 reallocating the buffer if necessary. This means that lexing code that
1107 runs later will see the characters as if they had appeared in the input.
1108 It is not recommended to do this as part of normal parsing, and most
1109 uses of this facility run the risk of the inserted characters being
1110 interpreted in an unintended manner.
1112 The string to be inserted is represented by octets starting at I<pv>
1113 and continuing to the first nul. These octets are interpreted as either
1114 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1115 in I<flags>. The characters are recoded for the lexer buffer, according
1116 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1117 If it is not convenient to nul-terminate a string to be inserted, the
1118 L</lex_stuff_pvn> function is more appropriate.
1124 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1126 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1127 lex_stuff_pvn(pv, strlen(pv), flags);
1131 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1133 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1134 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1135 reallocating the buffer if necessary. This means that lexing code that
1136 runs later will see the characters as if they had appeared in the input.
1137 It is not recommended to do this as part of normal parsing, and most
1138 uses of this facility run the risk of the inserted characters being
1139 interpreted in an unintended manner.
1141 The string to be inserted is the string value of I<sv>. The characters
1142 are recoded for the lexer buffer, according to how the buffer is currently
1143 being interpreted (L</lex_bufutf8>). If a string to be inserted is
1144 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1145 need to construct a scalar.
1151 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1155 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1157 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1159 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1163 =for apidoc Amx|void|lex_unstuff|char *ptr
1165 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1166 I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1167 This hides the discarded text from any lexing code that runs later,
1168 as if the text had never appeared.
1170 This is not the normal way to consume lexed text. For that, use
1177 Perl_lex_unstuff(pTHX_ char *ptr)
1181 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1182 buf = PL_parser->bufptr;
1184 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1187 bufend = PL_parser->bufend;
1189 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1190 unstuff_len = ptr - buf;
1191 Move(ptr, buf, bufend+1-ptr, char);
1192 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1193 PL_parser->bufend = bufend - unstuff_len;
1197 =for apidoc Amx|void|lex_read_to|char *ptr
1199 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1200 to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1201 performing the correct bookkeeping whenever a newline character is passed.
1202 This is the normal way to consume lexed text.
1204 Interpretation of the buffer's octets can be abstracted out by
1205 using the slightly higher-level functions L</lex_peek_unichar> and
1206 L</lex_read_unichar>.
1212 Perl_lex_read_to(pTHX_ char *ptr)
1215 PERL_ARGS_ASSERT_LEX_READ_TO;
1216 s = PL_parser->bufptr;
1217 if (ptr < s || ptr > PL_parser->bufend)
1218 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1219 for (; s != ptr; s++)
1221 COPLINE_INC_WITH_HERELINES;
1222 PL_parser->linestart = s+1;
1224 PL_parser->bufptr = ptr;
1228 =for apidoc Amx|void|lex_discard_to|char *ptr
1230 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1231 up to I<ptr>. The remaining content of the buffer will be moved, and
1232 all pointers into the buffer updated appropriately. I<ptr> must not
1233 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1234 it is not permitted to discard text that has yet to be lexed.
1236 Normally it is not necessarily to do this directly, because it suffices to
1237 use the implicit discarding behaviour of L</lex_next_chunk> and things
1238 based on it. However, if a token stretches across multiple lines,
1239 and the lexing code has kept multiple lines of text in the buffer for
1240 that purpose, then after completion of the token it would be wise to
1241 explicitly discard the now-unneeded earlier lines, to avoid future
1242 multi-line tokens growing the buffer without bound.
1248 Perl_lex_discard_to(pTHX_ char *ptr)
1252 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1253 buf = SvPVX(PL_parser->linestr);
1255 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1258 if (ptr > PL_parser->bufptr)
1259 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1260 discard_len = ptr - buf;
1261 if (PL_parser->oldbufptr < ptr)
1262 PL_parser->oldbufptr = ptr;
1263 if (PL_parser->oldoldbufptr < ptr)
1264 PL_parser->oldoldbufptr = ptr;
1265 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1266 PL_parser->last_uni = NULL;
1267 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1268 PL_parser->last_lop = NULL;
1269 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1270 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1271 PL_parser->bufend -= discard_len;
1272 PL_parser->bufptr -= discard_len;
1273 PL_parser->oldbufptr -= discard_len;
1274 PL_parser->oldoldbufptr -= discard_len;
1275 if (PL_parser->last_uni)
1276 PL_parser->last_uni -= discard_len;
1277 if (PL_parser->last_lop)
1278 PL_parser->last_lop -= discard_len;
1282 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1284 Reads in the next chunk of text to be lexed, appending it to
1285 L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1286 looked to the end of the current chunk and wants to know more. It is
1287 usual, but not necessary, for lexing to have consumed the entirety of
1288 the current chunk at this time.
1290 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1291 chunk (i.e., the current chunk has been entirely consumed), normally the
1292 current chunk will be discarded at the same time that the new chunk is
1293 read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1294 will not be discarded. If the current chunk has not been entirely
1295 consumed, then it will not be discarded regardless of the flag.
1297 Returns true if some new text was added to the buffer, or false if the
1298 buffer has reached the end of the input text.
1303 #define LEX_FAKE_EOF 0x80000000
1304 #define LEX_NO_TERM 0x40000000
1307 Perl_lex_next_chunk(pTHX_ U32 flags)
1311 STRLEN old_bufend_pos, new_bufend_pos;
1312 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1313 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1314 bool got_some_for_debugger = 0;
1316 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1317 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1318 linestr = PL_parser->linestr;
1319 buf = SvPVX(linestr);
1320 if (!(flags & LEX_KEEP_PREVIOUS) &&
1321 PL_parser->bufptr == PL_parser->bufend) {
1322 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1324 if (PL_parser->last_uni != PL_parser->bufend)
1325 PL_parser->last_uni = NULL;
1326 if (PL_parser->last_lop != PL_parser->bufend)
1327 PL_parser->last_lop = NULL;
1328 last_uni_pos = last_lop_pos = 0;
1332 old_bufend_pos = PL_parser->bufend - buf;
1333 bufptr_pos = PL_parser->bufptr - buf;
1334 oldbufptr_pos = PL_parser->oldbufptr - buf;
1335 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1336 linestart_pos = PL_parser->linestart - buf;
1337 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1338 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1340 if (flags & LEX_FAKE_EOF) {
1342 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1344 } else if (filter_gets(linestr, old_bufend_pos)) {
1346 got_some_for_debugger = 1;
1347 } else if (flags & LEX_NO_TERM) {
1350 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1351 sv_setpvs(linestr, "");
1353 /* End of real input. Close filehandle (unless it was STDIN),
1354 * then add implicit termination.
1356 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1357 PerlIO_clearerr(PL_parser->rsfp);
1358 else if (PL_parser->rsfp)
1359 (void)PerlIO_close(PL_parser->rsfp);
1360 PL_parser->rsfp = NULL;
1361 PL_parser->in_pod = PL_parser->filtered = 0;
1363 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1366 if (!PL_in_eval && PL_minus_p) {
1368 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1369 PL_minus_n = PL_minus_p = 0;
1370 } else if (!PL_in_eval && PL_minus_n) {
1371 sv_catpvs(linestr, /*{*/";}");
1374 sv_catpvs(linestr, ";");
1377 buf = SvPVX(linestr);
1378 new_bufend_pos = SvCUR(linestr);
1379 PL_parser->bufend = buf + new_bufend_pos;
1380 PL_parser->bufptr = buf + bufptr_pos;
1381 PL_parser->oldbufptr = buf + oldbufptr_pos;
1382 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1383 PL_parser->linestart = buf + linestart_pos;
1384 if (PL_parser->last_uni)
1385 PL_parser->last_uni = buf + last_uni_pos;
1386 if (PL_parser->last_lop)
1387 PL_parser->last_lop = buf + last_lop_pos;
1388 if (PL_parser->preambling != NOLINE) {
1389 CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1390 PL_parser->preambling = NOLINE;
1392 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1393 PL_curstash != PL_debstash) {
1394 /* debugger active and we're not compiling the debugger code,
1395 * so store the line into the debugger's array of lines
1397 update_debugger_info(NULL, buf+old_bufend_pos,
1398 new_bufend_pos-old_bufend_pos);
1404 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1406 Looks ahead one (Unicode) character in the text currently being lexed.
1407 Returns the codepoint (unsigned integer value) of the next character,
1408 or -1 if lexing has reached the end of the input text. To consume the
1409 peeked character, use L</lex_read_unichar>.
1411 If the next character is in (or extends into) the next chunk of input
1412 text, the next chunk will be read in. Normally the current chunk will be
1413 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1414 then the current chunk will not be discarded.
1416 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1417 is encountered, an exception is generated.
1423 Perl_lex_peek_unichar(pTHX_ U32 flags)
1427 if (flags & ~(LEX_KEEP_PREVIOUS))
1428 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1429 s = PL_parser->bufptr;
1430 bufend = PL_parser->bufend;
1436 if (!lex_next_chunk(flags))
1438 s = PL_parser->bufptr;
1439 bufend = PL_parser->bufend;
1442 if (UTF8_IS_INVARIANT(head))
1444 if (UTF8_IS_START(head)) {
1445 len = UTF8SKIP(&head);
1446 while ((STRLEN)(bufend-s) < len) {
1447 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1449 s = PL_parser->bufptr;
1450 bufend = PL_parser->bufend;
1453 unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1454 if (retlen == (STRLEN)-1) {
1455 /* malformed UTF-8 */
1457 SAVESPTR(PL_warnhook);
1458 PL_warnhook = PERL_WARNHOOK_FATAL;
1459 utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
1465 if (!lex_next_chunk(flags))
1467 s = PL_parser->bufptr;
1474 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1476 Reads the next (Unicode) character in the text currently being lexed.
1477 Returns the codepoint (unsigned integer value) of the character read,
1478 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1479 if lexing has reached the end of the input text. To non-destructively
1480 examine the next character, use L</lex_peek_unichar> instead.
1482 If the next character is in (or extends into) the next chunk of input
1483 text, the next chunk will be read in. Normally the current chunk will be
1484 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1485 then the current chunk will not be discarded.
1487 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1488 is encountered, an exception is generated.
1494 Perl_lex_read_unichar(pTHX_ U32 flags)
1497 if (flags & ~(LEX_KEEP_PREVIOUS))
1498 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1499 c = lex_peek_unichar(flags);
1502 COPLINE_INC_WITH_HERELINES;
1504 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1506 ++(PL_parser->bufptr);
1512 =for apidoc Amx|void|lex_read_space|U32 flags
1514 Reads optional spaces, in Perl style, in the text currently being
1515 lexed. The spaces may include ordinary whitespace characters and
1516 Perl-style comments. C<#line> directives are processed if encountered.
1517 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1518 at a non-space character (or the end of the input text).
1520 If spaces extend into the next chunk of input text, the next chunk will
1521 be read in. Normally the current chunk will be discarded at the same
1522 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1523 chunk will not be discarded.
1528 #define LEX_NO_INCLINE 0x40000000
1529 #define LEX_NO_NEXT_CHUNK 0x80000000
1532 Perl_lex_read_space(pTHX_ U32 flags)
1535 const bool can_incline = !(flags & LEX_NO_INCLINE);
1536 bool need_incline = 0;
1537 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1538 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1541 sv_free(PL_skipwhite);
1542 PL_skipwhite = NULL;
1545 PL_skipwhite = newSVpvs("");
1546 #endif /* PERL_MAD */
1547 s = PL_parser->bufptr;
1548 bufend = PL_parser->bufend;
1554 } while (!(c == '\n' || (c == 0 && s == bufend)));
1555 } else if (c == '\n') {
1558 PL_parser->linestart = s;
1564 } else if (isSPACE(c)) {
1566 } else if (c == 0 && s == bufend) {
1571 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1572 #endif /* PERL_MAD */
1573 if (flags & LEX_NO_NEXT_CHUNK)
1575 PL_parser->bufptr = s;
1576 l = CopLINE(PL_curcop);
1577 CopLINE(PL_curcop) += PL_parser->herelines + 1;
1578 got_more = lex_next_chunk(flags);
1579 CopLINE_set(PL_curcop, l);
1580 s = PL_parser->bufptr;
1581 bufend = PL_parser->bufend;
1584 if (can_incline && need_incline && PL_parser->rsfp) {
1594 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1595 #endif /* PERL_MAD */
1596 PL_parser->bufptr = s;
1601 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1603 This function performs syntax checking on a prototype, C<proto>.
1604 If C<warn> is true, any illegal characters or mismatched brackets
1605 will trigger illegalproto warnings, declaring that they were
1606 detected in the prototype for C<name>.
1608 The return value is C<true> if this is a valid prototype, and
1609 C<false> if it is not, regardless of whether C<warn> was C<true> or
1612 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1619 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
1621 STRLEN len, origlen;
1622 char *p = proto ? SvPV(proto, len) : NULL;
1623 bool bad_proto = FALSE;
1624 bool in_brackets = FALSE;
1625 bool after_slash = FALSE;
1626 char greedy_proto = ' ';
1627 bool proto_after_greedy_proto = FALSE;
1628 bool must_be_last = FALSE;
1629 bool underscore = FALSE;
1630 bool bad_proto_after_underscore = FALSE;
1632 PERL_ARGS_ASSERT_VALIDATE_PROTO;
1638 for (; len--; p++) {
1641 proto_after_greedy_proto = TRUE;
1643 if (!strchr(";@%", *p))
1644 bad_proto_after_underscore = TRUE;
1647 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1654 in_brackets = FALSE;
1655 else if ((*p == '@' || *p == '%') &&
1658 must_be_last = TRUE;
1667 after_slash = FALSE;
1672 SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1675 ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1676 origlen, UNI_DISPLAY_ISPRINT)
1677 : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1679 if (proto_after_greedy_proto)
1680 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1681 "Prototype after '%c' for %"SVf" : %s",
1682 greedy_proto, SVfARG(name), p);
1684 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1685 "Missing ']' in prototype for %"SVf" : %s",
1688 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1689 "Illegal character in prototype for %"SVf" : %s",
1691 if (bad_proto_after_underscore)
1692 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1693 "Illegal character after '_' in prototype for %"SVf" : %s",
1697 return (! (proto_after_greedy_proto || bad_proto) );
1702 * This subroutine has nothing to do with tilting, whether at windmills
1703 * or pinball tables. Its name is short for "increment line". It
1704 * increments the current line number in CopLINE(PL_curcop) and checks
1705 * to see whether the line starts with a comment of the form
1706 * # line 500 "foo.pm"
1707 * If so, it sets the current line number and file to the values in the comment.
1711 S_incline(pTHX_ const char *s)
1719 PERL_ARGS_ASSERT_INCLINE;
1721 COPLINE_INC_WITH_HERELINES;
1722 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1723 && s+1 == PL_bufend && *s == ';') {
1724 /* fake newline in string eval */
1725 CopLINE_dec(PL_curcop);
1730 while (SPACE_OR_TAB(*s))
1732 if (strnEQ(s, "line", 4))
1736 if (SPACE_OR_TAB(*s))
1740 while (SPACE_OR_TAB(*s))
1748 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1750 while (SPACE_OR_TAB(*s))
1752 if (*s == '"' && (t = strchr(s+1, '"'))) {
1758 while (!isSPACE(*t))
1762 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1764 if (*e != '\n' && *e != '\0')
1765 return; /* false alarm */
1767 line_num = atoi(n)-1;
1770 const STRLEN len = t - s;
1772 if (!PL_rsfp && !PL_parser->filtered) {
1773 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1774 * to *{"::_<newfilename"} */
1775 /* However, the long form of evals is only turned on by the
1776 debugger - usually they're "(eval %lu)" */
1777 GV * const cfgv = CopFILEGV(PL_curcop);
1780 STRLEN tmplen2 = len;
1784 if (tmplen2 + 2 <= sizeof smallbuf)
1787 Newx(tmpbuf2, tmplen2 + 2, char);
1792 memcpy(tmpbuf2 + 2, s, tmplen2);
1795 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1797 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1798 /* adjust ${"::_<newfilename"} to store the new file name */
1799 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1800 /* The line number may differ. If that is the case,
1801 alias the saved lines that are in the array.
1802 Otherwise alias the whole array. */
1803 if (CopLINE(PL_curcop) == line_num) {
1804 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1805 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1807 else if (GvAV(cfgv)) {
1808 AV * const av = GvAV(cfgv);
1809 const I32 start = CopLINE(PL_curcop)+1;
1810 I32 items = AvFILLp(av) - start;
1812 AV * const av2 = GvAVn(gv2);
1813 SV **svp = AvARRAY(av) + start;
1814 I32 l = (I32)line_num+1;
1816 av_store(av2, l++, SvREFCNT_inc(*svp++));
1821 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1824 CopFILE_free(PL_curcop);
1825 CopFILE_setn(PL_curcop, s, len);
1827 CopLINE_set(PL_curcop, line_num);
1830 #define skipspace(s) skipspace_flags(s, 0)
1833 /* skip space before PL_thistoken */
1836 S_skipspace0(pTHX_ char *s)
1838 PERL_ARGS_ASSERT_SKIPSPACE0;
1845 PL_thiswhite = newSVpvs("");
1846 sv_catsv(PL_thiswhite, PL_skipwhite);
1847 sv_free(PL_skipwhite);
1850 PL_realtokenstart = s - SvPVX(PL_linestr);
1854 /* skip space after PL_thistoken */
1857 S_skipspace1(pTHX_ char *s)
1859 const char *start = s;
1860 I32 startoff = start - SvPVX(PL_linestr);
1862 PERL_ARGS_ASSERT_SKIPSPACE1;
1867 start = SvPVX(PL_linestr) + startoff;
1868 if (!PL_thistoken && PL_realtokenstart >= 0) {
1869 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1870 PL_thistoken = newSVpvn(tstart, start - tstart);
1872 PL_realtokenstart = -1;
1875 PL_nextwhite = newSVpvs("");
1876 sv_catsv(PL_nextwhite, PL_skipwhite);
1877 sv_free(PL_skipwhite);
1884 S_skipspace2(pTHX_ char *s, SV **svp)
1887 const I32 startoff = s - SvPVX(PL_linestr);
1889 PERL_ARGS_ASSERT_SKIPSPACE2;
1892 if (!PL_madskills || !svp)
1894 start = SvPVX(PL_linestr) + startoff;
1895 if (!PL_thistoken && PL_realtokenstart >= 0) {
1896 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1897 PL_thistoken = newSVpvn(tstart, start - tstart);
1898 PL_realtokenstart = -1;
1902 *svp = newSVpvs("");
1903 sv_setsv(*svp, PL_skipwhite);
1904 sv_free(PL_skipwhite);
1913 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1915 AV *av = CopFILEAVx(PL_curcop);
1918 if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1920 sv = *av_fetch(av, 0, 1);
1921 SvUPGRADE(sv, SVt_PVMG);
1923 if (!SvPOK(sv)) sv_setpvs(sv,"");
1925 sv_catsv(sv, orig_sv);
1927 sv_catpvn(sv, buf, len);
1932 if (PL_parser->preambling == NOLINE)
1933 av_store(av, CopLINE(PL_curcop), sv);
1939 * Called to gobble the appropriate amount and type of whitespace.
1940 * Skips comments as well.
1944 S_skipspace_flags(pTHX_ char *s, U32 flags)
1948 #endif /* PERL_MAD */
1949 PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1952 sv_free(PL_skipwhite);
1953 PL_skipwhite = NULL;
1955 #endif /* PERL_MAD */
1956 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1957 while (s < PL_bufend && SPACE_OR_TAB(*s))
1960 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1962 lex_read_space(flags | LEX_KEEP_PREVIOUS |
1963 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1964 LEX_NO_NEXT_CHUNK : 0));
1966 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1967 if (PL_linestart > PL_bufptr)
1968 PL_bufptr = PL_linestart;
1973 PL_skipwhite = newSVpvn(start, s-start);
1974 #endif /* PERL_MAD */
1980 * Check the unary operators to ensure there's no ambiguity in how they're
1981 * used. An ambiguous piece of code would be:
1983 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1984 * the +5 is its argument.
1994 if (PL_oldoldbufptr != PL_last_uni)
1996 while (isSPACE(*PL_last_uni))
1999 while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
2001 if ((t = strchr(s, '(')) && t < PL_bufptr)
2004 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
2005 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
2006 (int)(s - PL_last_uni), PL_last_uni);
2010 * LOP : macro to build a list operator. Its behaviour has been replaced
2011 * with a subroutine, S_lop() for which LOP is just another name.
2014 #define LOP(f,x) return lop(f,x,s)
2018 * Build a list operator (or something that might be one). The rules:
2019 * - if we have a next token, then it's a list operator [why?]
2020 * - if the next thing is an opening paren, then it's a function
2021 * - else it's a list operator
2025 S_lop(pTHX_ I32 f, int x, char *s)
2029 PERL_ARGS_ASSERT_LOP;
2035 PL_last_lop = PL_oldbufptr;
2036 PL_last_lop_op = (OPCODE)f;
2045 return REPORT(FUNC);
2048 return REPORT(FUNC);
2051 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
2052 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2053 return REPORT(LSTOP);
2060 * Sets up for an eventual force_next(). start_force(0) basically does
2061 * an unshift, while start_force(-1) does a push. yylex removes items
2066 S_start_force(pTHX_ int where)
2070 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
2071 where = PL_lasttoke;
2072 assert(PL_curforce < 0 || PL_curforce == where);
2073 if (PL_curforce != where) {
2074 for (i = PL_lasttoke; i > where; --i) {
2075 PL_nexttoke[i] = PL_nexttoke[i-1];
2079 if (PL_curforce < 0) /* in case of duplicate start_force() */
2080 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
2081 PL_curforce = where;
2084 curmad('^', newSVpvs(""));
2085 CURMAD('_', PL_nextwhite);
2090 S_curmad(pTHX_ char slot, SV *sv)
2096 if (PL_curforce < 0)
2097 where = &PL_thismad;
2099 where = &PL_nexttoke[PL_curforce].next_mad;
2105 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
2107 else if (PL_encoding) {
2108 sv_recode_to_utf8(sv, PL_encoding);
2113 /* keep a slot open for the head of the list? */
2114 if (slot != '_' && *where && (*where)->mad_key == '^') {
2115 (*where)->mad_key = slot;
2116 sv_free(MUTABLE_SV(((*where)->mad_val)));
2117 (*where)->mad_val = (void*)sv;
2120 addmad(newMADsv(slot, sv), where, 0);
2123 # define start_force(where) NOOP
2124 # define curmad(slot, sv) NOOP
2129 * When the lexer realizes it knows the next token (for instance,
2130 * it is reordering tokens for the parser) then it can call S_force_next
2131 * to know what token to return the next time the lexer is called. Caller
2132 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
2133 * and possibly PL_expect to ensure the lexer handles the token correctly.
2137 S_force_next(pTHX_ I32 type)
2142 PerlIO_printf(Perl_debug_log, "### forced token:\n");
2143 tokereport(type, &NEXTVAL_NEXTTOKE);
2147 if (PL_curforce < 0)
2148 start_force(PL_lasttoke);
2149 PL_nexttoke[PL_curforce].next_type = type;
2150 if (PL_lex_state != LEX_KNOWNEXT)
2151 PL_lex_defer = PL_lex_state;
2152 PL_lex_state = LEX_KNOWNEXT;
2153 PL_lex_expect = PL_expect;
2156 PL_nexttype[PL_nexttoke] = type;
2158 if (PL_lex_state != LEX_KNOWNEXT) {
2159 PL_lex_defer = PL_lex_state;
2160 PL_lex_expect = PL_expect;
2161 PL_lex_state = LEX_KNOWNEXT;
2169 * This subroutine handles postfix deref syntax after the arrow has already
2170 * been emitted. @* $* etc. are emitted as two separate token right here.
2171 * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2172 * only the first, leaving yylex to find the next.
2176 S_postderef(pTHX_ int const funny, char const next)
2179 assert(funny == DOLSHARP || strchr("$@%&*", funny));
2180 assert(strchr("*[{", next));
2182 PL_expect = XOPERATOR;
2183 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2184 assert('@' == funny || '$' == funny || DOLSHARP == funny);
2185 PL_lex_state = LEX_INTERPEND;
2186 start_force(PL_curforce);
2187 force_next(POSTJOIN);
2189 start_force(PL_curforce);
2194 if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
2195 && !PL_lex_brackets)
2197 PL_expect = XOPERATOR;
2206 int yyc = PL_parser->yychar;
2207 if (yyc != YYEMPTY) {
2210 NEXTVAL_NEXTTOKE = PL_parser->yylval;
2211 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2212 PL_lex_allbrackets--;
2214 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2215 } else if (yyc == '('/*)*/) {
2216 PL_lex_allbrackets--;
2221 PL_parser->yychar = YYEMPTY;
2226 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2229 SV * const sv = newSVpvn_utf8(start, len,
2232 && !is_ascii_string((const U8*)start, len)
2233 && is_utf8_string((const U8*)start, len));
2239 * When the lexer knows the next thing is a word (for instance, it has
2240 * just seen -> and it knows that the next char is a word char, then
2241 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2245 * char *start : buffer position (must be within PL_linestr)
2246 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2247 * int check_keyword : if true, Perl checks to make sure the word isn't
2248 * a keyword (do this if the word is a label, e.g. goto FOO)
2249 * int allow_pack : if true, : characters will also be allowed (require,
2250 * use, etc. do this)
2251 * int allow_initial_tick : used by the "sub" lexer only.
2255 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2261 PERL_ARGS_ASSERT_FORCE_WORD;
2263 start = SKIPSPACE1(start);
2265 if (isIDFIRST_lazy_if(s,UTF) ||
2266 (allow_pack && *s == ':') )
2268 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2269 if (check_keyword) {
2270 char *s2 = PL_tokenbuf;
2271 if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
2273 if (keyword(s2, len, 0))
2276 start_force(PL_curforce);
2278 curmad('X', newSVpvn(start,s-start));
2279 if (token == METHOD) {
2284 PL_expect = XOPERATOR;
2288 curmad('g', newSVpvs( "forced" ));
2289 NEXTVAL_NEXTTOKE.opval
2290 = (OP*)newSVOP(OP_CONST,0,
2291 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2292 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2300 * Called when the lexer wants $foo *foo &foo etc, but the program
2301 * text only contains the "foo" portion. The first argument is a pointer
2302 * to the "foo", and the second argument is the type symbol to prefix.
2303 * Forces the next token to be a "WORD".
2304 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2308 S_force_ident(pTHX_ const char *s, int kind)
2312 PERL_ARGS_ASSERT_FORCE_IDENT;
2315 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2316 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2317 UTF ? SVf_UTF8 : 0));
2318 start_force(PL_curforce);
2319 NEXTVAL_NEXTTOKE.opval = o;
2322 o->op_private = OPpCONST_ENTERED;
2323 /* XXX see note in pp_entereval() for why we forgo typo
2324 warnings if the symbol must be introduced in an eval.
2326 gv_fetchpvn_flags(s, len,
2327 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2328 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2329 kind == '$' ? SVt_PV :
2330 kind == '@' ? SVt_PVAV :
2331 kind == '%' ? SVt_PVHV :
2339 S_force_ident_maybe_lex(pTHX_ char pit)
2341 start_force(PL_curforce);
2342 NEXTVAL_NEXTTOKE.ival = pit;
2347 Perl_str_to_version(pTHX_ SV *sv)
2352 const char *start = SvPV_const(sv,len);
2353 const char * const end = start + len;
2354 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2356 PERL_ARGS_ASSERT_STR_TO_VERSION;
2358 while (start < end) {
2362 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2367 retval += ((NV)n)/nshift;
2376 * Forces the next token to be a version number.
2377 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2378 * and if "guessing" is TRUE, then no new token is created (and the caller
2379 * must use an alternative parsing method).
2383 S_force_version(pTHX_ char *s, int guessing)
2389 I32 startoff = s - SvPVX(PL_linestr);
2392 PERL_ARGS_ASSERT_FORCE_VERSION;
2400 while (isDIGIT(*d) || *d == '_' || *d == '.')
2404 start_force(PL_curforce);
2405 curmad('X', newSVpvn(s,d-s));
2408 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2410 s = scan_num(s, &pl_yylval);
2411 version = pl_yylval.opval;
2412 ver = cSVOPx(version)->op_sv;
2413 if (SvPOK(ver) && !SvNIOK(ver)) {
2414 SvUPGRADE(ver, SVt_PVNV);
2415 SvNV_set(ver, str_to_version(ver));
2416 SvNOK_on(ver); /* hint that it is a version */
2419 else if (guessing) {
2422 sv_free(PL_nextwhite); /* let next token collect whitespace */
2424 s = SvPVX(PL_linestr) + startoff;
2432 if (PL_madskills && !version) {
2433 sv_free(PL_nextwhite); /* let next token collect whitespace */
2435 s = SvPVX(PL_linestr) + startoff;
2438 /* NOTE: The parser sees the package name and the VERSION swapped */
2439 start_force(PL_curforce);
2440 NEXTVAL_NEXTTOKE.opval = version;
2447 * S_force_strict_version
2448 * Forces the next token to be a version number using strict syntax rules.
2452 S_force_strict_version(pTHX_ char *s)
2457 I32 startoff = s - SvPVX(PL_linestr);
2459 const char *errstr = NULL;
2461 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2463 while (isSPACE(*s)) /* leading whitespace */
2466 if (is_STRICT_VERSION(s,&errstr)) {
2468 s = (char *)scan_version(s, ver, 0);
2469 version = newSVOP(OP_CONST, 0, ver);
2471 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2472 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2476 yyerror(errstr); /* version required */
2481 if (PL_madskills && !version) {
2482 sv_free(PL_nextwhite); /* let next token collect whitespace */
2484 s = SvPVX(PL_linestr) + startoff;
2487 /* NOTE: The parser sees the package name and the VERSION swapped */
2488 start_force(PL_curforce);
2489 NEXTVAL_NEXTTOKE.opval = version;
2497 * Tokenize a quoted string passed in as an SV. It finds the next
2498 * chunk, up to end of string or a backslash. It may make a new
2499 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2504 S_tokeq(pTHX_ SV *sv)
2512 PERL_ARGS_ASSERT_TOKEQ;
2516 assert (!SvIsCOW(sv));
2517 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2521 /* This is relying on the SV being "well formed" with a trailing '\0' */
2522 while (s < send && !(*s == '\\' && s[1] == '\\'))
2527 if ( PL_hints & HINT_NEW_STRING ) {
2528 pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2529 SVs_TEMP | SvUTF8(sv));
2533 if (s + 1 < send && (s[1] == '\\'))
2534 s++; /* all that, just for this */
2539 SvCUR_set(sv, d - SvPVX_const(sv));
2541 if ( PL_hints & HINT_NEW_STRING )
2542 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2547 * Now come three functions related to double-quote context,
2548 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2549 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2550 * interact with PL_lex_state, and create fake ( ... ) argument lists
2551 * to handle functions and concatenation.
2555 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
2560 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2562 * Pattern matching will set PL_lex_op to the pattern-matching op to
2563 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2565 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2567 * Everything else becomes a FUNC.
2569 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2570 * had an OP_CONST or OP_READLINE). This just sets us up for a
2571 * call to S_sublex_push().
2575 S_sublex_start(pTHX)
2578 const I32 op_type = pl_yylval.ival;
2580 if (op_type == OP_NULL) {
2581 pl_yylval.opval = PL_lex_op;
2585 if (op_type == OP_CONST) {
2586 SV *sv = tokeq(PL_lex_stuff);
2588 if (SvTYPE(sv) == SVt_PVIV) {
2589 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2591 const char * const p = SvPV_const(sv, len);
2592 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2596 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2597 PL_lex_stuff = NULL;
2601 PL_sublex_info.super_state = PL_lex_state;
2602 PL_sublex_info.sub_inwhat = (U16)op_type;
2603 PL_sublex_info.sub_op = PL_lex_op;
2604 PL_lex_state = LEX_INTERPPUSH;
2608 pl_yylval.opval = PL_lex_op;
2618 * Create a new scope to save the lexing state. The scope will be
2619 * ended in S_sublex_done. Returns a '(', starting the function arguments
2620 * to the uc, lc, etc. found before.
2621 * Sets PL_lex_state to LEX_INTERPCONCAT.
2629 const bool is_heredoc = PL_multi_close == '<';
2632 PL_lex_state = PL_sublex_info.super_state;
2633 SAVEI8(PL_lex_dojoin);
2634 SAVEI32(PL_lex_brackets);
2635 SAVEI32(PL_lex_allbrackets);
2636 SAVEI32(PL_lex_formbrack);
2637 SAVEI8(PL_lex_fakeeof);
2638 SAVEI32(PL_lex_casemods);
2639 SAVEI32(PL_lex_starts);
2640 SAVEI8(PL_lex_state);
2641 SAVESPTR(PL_lex_repl);
2642 SAVEVPTR(PL_lex_inpat);
2643 SAVEI16(PL_lex_inwhat);
2646 SAVECOPLINE(PL_curcop);
2647 SAVEI32(PL_multi_end);
2648 SAVEI32(PL_parser->herelines);
2649 PL_parser->herelines = 0;
2651 SAVEI8(PL_multi_close);
2652 SAVEPPTR(PL_bufptr);
2653 SAVEPPTR(PL_bufend);
2654 SAVEPPTR(PL_oldbufptr);
2655 SAVEPPTR(PL_oldoldbufptr);
2656 SAVEPPTR(PL_last_lop);
2657 SAVEPPTR(PL_last_uni);
2658 SAVEPPTR(PL_linestart);
2659 SAVESPTR(PL_linestr);
2660 SAVEGENERICPV(PL_lex_brackstack);
2661 SAVEGENERICPV(PL_lex_casestack);
2662 SAVEGENERICPV(PL_parser->lex_shared);
2663 SAVEBOOL(PL_parser->lex_re_reparsing);
2664 SAVEI32(PL_copline);
2666 /* The here-doc parser needs to be able to peek into outer lexing
2667 scopes to find the body of the here-doc. So we put PL_linestr and
2668 PL_bufptr into lex_shared, to ‘share’ those values.
2670 PL_parser->lex_shared->ls_linestr = PL_linestr;
2671 PL_parser->lex_shared->ls_bufptr = PL_bufptr;
2673 PL_linestr = PL_lex_stuff;
2674 PL_lex_repl = PL_sublex_info.repl;
2675 PL_lex_stuff = NULL;
2676 PL_sublex_info.repl = NULL;
2678 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2679 = SvPVX(PL_linestr);
2680 PL_bufend += SvCUR(PL_linestr);
2681 PL_last_lop = PL_last_uni = NULL;
2682 SAVEFREESV(PL_linestr);
2683 if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2685 PL_lex_dojoin = FALSE;
2686 PL_lex_brackets = PL_lex_formbrack = 0;
2687 PL_lex_allbrackets = 0;
2688 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2689 Newx(PL_lex_brackstack, 120, char);
2690 Newx(PL_lex_casestack, 12, char);
2691 PL_lex_casemods = 0;
2692 *PL_lex_casestack = '\0';
2694 PL_lex_state = LEX_INTERPCONCAT;
2696 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2697 PL_copline = NOLINE;
2699 Newxz(shared, 1, LEXSHARED);
2700 shared->ls_prev = PL_parser->lex_shared;
2701 PL_parser->lex_shared = shared;
2703 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2704 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2705 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2706 PL_lex_inpat = PL_sublex_info.sub_op;
2708 PL_lex_inpat = NULL;
2710 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2711 PL_in_eval &= ~EVAL_RE_REPARSING;
2718 * Restores lexer state after a S_sublex_push.
2725 if (!PL_lex_starts++) {
2726 SV * const sv = newSVpvs("");
2727 if (SvUTF8(PL_linestr))
2729 PL_expect = XOPERATOR;
2730 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2734 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2735 PL_lex_state = LEX_INTERPCASEMOD;
2739 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2740 assert(PL_lex_inwhat != OP_TRANSR);
2742 assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2743 PL_linestr = PL_lex_repl;
2745 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2746 PL_bufend += SvCUR(PL_linestr);
2747 PL_last_lop = PL_last_uni = NULL;
2748 PL_lex_dojoin = FALSE;
2749 PL_lex_brackets = 0;
2750 PL_lex_allbrackets = 0;
2751 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2752 PL_lex_casemods = 0;
2753 *PL_lex_casestack = '\0';
2755 if (SvEVALED(PL_lex_repl)) {
2756 PL_lex_state = LEX_INTERPNORMAL;
2758 /* we don't clear PL_lex_repl here, so that we can check later
2759 whether this is an evalled subst; that means we rely on the
2760 logic to ensure sublex_done() is called again only via the
2761 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2764 PL_lex_state = LEX_INTERPCONCAT;
2767 if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2768 CopLINE(PL_curcop) +=
2769 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xpad_cop_seq.xlow
2770 + PL_parser->herelines;
2771 PL_parser->herelines = 0;
2776 const line_t l = CopLINE(PL_curcop);
2781 PL_endwhite = newSVpvs("");
2782 sv_catsv(PL_endwhite, PL_thiswhite);
2786 sv_setpvs(PL_thistoken,"");
2788 PL_realtokenstart = -1;
2792 if (PL_multi_close == '<')
2793 PL_parser->herelines += l - PL_multi_end;
2794 PL_bufend = SvPVX(PL_linestr);
2795 PL_bufend += SvCUR(PL_linestr);
2796 PL_expect = XOPERATOR;
2797 PL_sublex_info.sub_inwhat = 0;
2802 PERL_STATIC_INLINE SV*
2803 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2805 /* <s> points to first character of interior of \N{}, <e> to one beyond the
2806 * interior, hence to the "}". Finds what the name resolves to, returning
2807 * an SV* containing it; NULL if no valid one found */
2809 SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2816 const U8* first_bad_char_loc;
2817 const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2819 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2821 if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2823 &first_bad_char_loc))
2825 /* If warnings are on, this will print a more detailed analysis of what
2826 * is wrong than the error message below */
2827 utf8n_to_uvchr(first_bad_char_loc,
2828 e - ((char *) first_bad_char_loc),
2831 /* We deliberately don't try to print the malformed character, which
2832 * might not print very well; it also may be just the first of many
2833 * malformations, so don't print what comes after it */
2834 yyerror(Perl_form(aTHX_
2835 "Malformed UTF-8 character immediately after '%.*s'",
2836 (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
2840 res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2841 /* include the <}> */
2842 e - backslash_ptr + 1);
2844 SvREFCNT_dec_NN(res);
2848 /* See if the charnames handler is the Perl core's, and if so, we can skip
2849 * the validation needed for a user-supplied one, as Perl's does its own
2851 table = GvHV(PL_hintgv); /* ^H */
2852 cvp = hv_fetchs(table, "charnames", FALSE);
2853 if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2854 SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2856 const char * const name = HvNAME(stash);
2857 if (HvNAMELEN(stash) == sizeof("_charnames")-1
2858 && strEQ(name, "_charnames")) {
2863 /* Here, it isn't Perl's charname handler. We can't rely on a
2864 * user-supplied handler to validate the input name. For non-ut8 input,
2865 * look to see that the first character is legal. Then loop through the
2866 * rest checking that each is a continuation */
2868 /* This code makes the reasonable assumption that the only Latin1-range
2869 * characters that begin a character name alias are alphabetic, otherwise
2870 * would have to create a isCHARNAME_BEGIN macro */
2873 if (! isALPHAU(*s)) {
2878 if (! isCHARNAME_CONT(*s)) {
2881 if (*s == ' ' && *(s-1) == ' ') {
2884 if ((U8) *s == NBSP_NATIVE && ckWARN_d(WARN_DEPRECATED)) {
2885 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2886 "NO-BREAK SPACE in a charnames "
2887 "alias definition is deprecated");
2893 /* Similarly for utf8. For invariants can check directly; for other
2894 * Latin1, can calculate their code point and check; otherwise use a
2896 if (UTF8_IS_INVARIANT(*s)) {
2897 if (! isALPHAU(*s)) {
2901 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2902 if (! isALPHAU(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) {
2908 if (! PL_utf8_charname_begin) {
2909 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2910 PL_utf8_charname_begin = _core_swash_init("utf8",
2911 "_Perl_Charname_Begin",
2913 1, 0, NULL, &flags);
2915 if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2922 if (UTF8_IS_INVARIANT(*s)) {
2923 if (! isCHARNAME_CONT(*s)) {
2926 if (*s == ' ' && *(s-1) == ' ') {
2931 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2932 if (! isCHARNAME_CONT(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1))))
2936 if (*s == *NBSP_UTF8
2937 && *(s+1) == *(NBSP_UTF8+1)
2938 && ckWARN_d(WARN_DEPRECATED))
2940 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2941 "NO-BREAK SPACE in a charnames "
2942 "alias definition is deprecated");
2947 if (! PL_utf8_charname_continue) {
2948 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2949 PL_utf8_charname_continue = _core_swash_init("utf8",
2950 "_Perl_Charname_Continue",
2952 1, 0, NULL, &flags);
2954 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2961 if (*(s-1) == ' ') {
2964 "charnames alias definitions may not contain trailing "
2965 "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2966 (int)(s - backslash_ptr + 1), backslash_ptr,
2967 (int)(e - s + 1), s + 1
2969 UTF ? SVf_UTF8 : 0);
2973 if (SvUTF8(res)) { /* Don't accept malformed input */
2974 const U8* first_bad_char_loc;
2976 const char* const str = SvPV_const(res, len);
2977 if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2978 /* If warnings are on, this will print a more detailed analysis of
2979 * what is wrong than the error message below */
2980 utf8n_to_uvchr(first_bad_char_loc,
2981 (char *) first_bad_char_loc - str,
2984 /* We deliberately don't try to print the malformed character,
2985 * which might not print very well; it also may be just the first
2986 * of many malformations, so don't print what comes after it */
2989 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2990 (int) (e - backslash_ptr + 1), backslash_ptr,
2991 (int) ((char *) first_bad_char_loc - str), str
3002 /* The final %.*s makes sure that should the trailing NUL be missing
3003 * that this print won't run off the end of the string */
3006 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
3007 (int)(s - backslash_ptr + 1), backslash_ptr,
3008 (int)(e - s + 1), s + 1
3010 UTF ? SVf_UTF8 : 0);
3017 "charnames alias definitions may not contain a sequence of "
3018 "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
3019 (int)(s - backslash_ptr + 1), backslash_ptr,
3020 (int)(e - s + 1), s + 1
3022 UTF ? SVf_UTF8 : 0);
3029 Extracts the next constant part of a pattern, double-quoted string,
3030 or transliteration. This is terrifying code.
3032 For example, in parsing the double-quoted string "ab\x63$d", it would
3033 stop at the '$' and return an OP_CONST containing 'abc'.
3035 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3036 processing a pattern (PL_lex_inpat is true), a transliteration
3037 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
3039 Returns a pointer to the character scanned up to. If this is
3040 advanced from the start pointer supplied (i.e. if anything was
3041 successfully parsed), will leave an OP_CONST for the substring scanned
3042 in pl_yylval. Caller must intuit reason for not parsing further
3043 by looking at the next characters herself.
3047 \N{FOO} => \N{U+hex_for_character_FOO}
3048 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
3051 all other \-char, including \N and \N{ apart from \N{ABC}
3054 @ and $ where it appears to be a var, but not for $ as tail anchor
3059 In transliterations:
3060 characters are VERY literal, except for - not at the start or end
3061 of the string, which indicates a range. If the range is in bytes,
3062 scan_const expands the range to the full set of intermediate
3063 characters. If the range is in utf8, the hyphen is replaced with
3064 a certain range mark which will be handled by pmtrans() in op.c.
3066 In double-quoted strings:
3068 double-quoted style: \r and \n
3069 constants: \x31, etc.
3070 deprecated backrefs: \1 (in substitution replacements)
3071 case and quoting: \U \Q \E
3074 scan_const does *not* construct ops to handle interpolated strings.
3075 It stops processing as soon as it finds an embedded $ or @ variable
3076 and leaves it to the caller to work out what's going on.
3078 embedded arrays (whether in pattern or not) could be:
3079 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
3081 $ in double-quoted strings must be the symbol of an embedded scalar.
3083 $ in pattern could be $foo or could be tail anchor. Assumption:
3084 it's a tail anchor if $ is the last thing in the string, or if it's
3085 followed by one of "()| \r\n\t"
3087 \1 (backreferences) are turned into $1 in substitutions
3089 The structure of the code is
3090 while (there's a character to process) {
3091 handle transliteration ranges
3092 skip regexp comments /(?#comment)/ and codes /(?{code})/
3093 skip #-initiated comments in //x patterns
3094 check for embedded arrays
3095 check for embedded scalars
3097 deprecate \1 in substitution replacements
3098 handle string-changing backslashes \l \U \Q \E, etc.
3099 switch (what was escaped) {
3100 handle \- in a transliteration (becomes a literal -)
3101 if a pattern and not \N{, go treat as regular character
3102 handle \132 (octal characters)
3103 handle \x15 and \x{1234} (hex characters)
3104 handle \N{name} (named characters, also \N{3,5} in a pattern)
3105 handle \cV (control characters)
3106 handle printf-style backslashes (\f, \r, \n, etc)
3109 } (end if backslash)
3110 handle regular character
3111 } (end while character to read)
3116 S_scan_const(pTHX_ char *start)
3119 char *send = PL_bufend; /* end of the constant */
3120 SV *sv = newSV(send - start); /* sv for the constant. See
3121 note below on sizing. */
3122 char *s = start; /* start of the constant */
3123 char *d = SvPVX(sv); /* destination for copies */
3124 bool dorange = FALSE; /* are we in a translit range? */
3125 bool didrange = FALSE; /* did we just finish a range? */
3126 bool in_charclass = FALSE; /* within /[...]/ */
3127 bool has_utf8 = FALSE; /* Output constant is UTF8 */
3128 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
3129 to be UTF8? But, this can
3130 show as true when the source
3131 isn't utf8, as for example
3132 when it is entirely composed
3134 SV *res; /* result from charnames */
3136 /* Note on sizing: The scanned constant is placed into sv, which is
3137 * initialized by newSV() assuming one byte of output for every byte of
3138 * input. This routine expects newSV() to allocate an extra byte for a
3139 * trailing NUL, which this routine will append if it gets to the end of
3140 * the input. There may be more bytes of input than output (eg., \N{LATIN
3141 * CAPITAL LETTER A}), or more output than input if the constant ends up
3142 * recoded to utf8, but each time a construct is found that might increase
3143 * the needed size, SvGROW() is called. Its size parameter each time is
3144 * based on the best guess estimate at the time, namely the length used so
3145 * far, plus the length the current construct will occupy, plus room for
3146 * the trailing NUL, plus one byte for every input byte still unscanned */
3148 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3151 UV literal_endpoint = 0;
3152 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
3155 PERL_ARGS_ASSERT_SCAN_CONST;
3157 assert(PL_lex_inwhat != OP_TRANSR);
3158 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3159 /* If we are doing a trans and we know we want UTF8 set expectation */
3160 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
3161 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3164 /* Protect sv from errors and fatal warnings. */
3165 ENTER_with_name("scan_const");
3168 while (s < send || dorange) {
3170 /* get transliterations out of the way (they're most literal) */
3171 if (PL_lex_inwhat == OP_TRANS) {
3172 /* expand a range A-Z to the full set of characters. AIE! */
3174 I32 i; /* current expanded character */
3175 I32 min; /* first character in range */
3176 I32 max; /* last character in range */
3187 char * const c = (char*)utf8_hop((U8*)d, -1);
3191 *c = (char) ILLEGAL_UTF8_BYTE;
3192 /* mark the range as done, and continue */
3198 i = d - SvPVX_const(sv); /* remember current offset */
3201 SvLEN(sv) + (has_utf8 ?
3202 (512 - UTF_CONTINUATION_MARK +
3205 /* How many two-byte within 0..255: 128 in UTF-8,
3206 * 96 in UTF-8-mod. */
3208 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
3210 d = SvPVX(sv) + i; /* refresh d after realloc */
3214 for (j = 0; j <= 1; j++) {
3215 char * const c = (char*)utf8_hop((U8*)d, -1);
3216 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
3222 max = (U8)0xff; /* only to \xff */
3223 uvmax = uv; /* \x{100} to uvmax */
3225 d = c; /* eat endpoint chars */
3230 d -= 2; /* eat the first char and the - */
3231 min = (U8)*d; /* first char in range */
3232 max = (U8)d[1]; /* last char in range */
3239 "Invalid range \"%c-%c\" in transliteration operator",
3240 (char)min, (char)max);
3244 if (literal_endpoint == 2 &&
3245 ((isLOWER_A(min) && isLOWER_A(max)) ||
3246 (isUPPER_A(min) && isUPPER_A(max))))
3248 for (i = min; i <= max; i++) {
3255 for (i = min; i <= max; i++)
3258 append_utf8_from_native_byte(i, &d);
3266 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
3268 *d++ = (char) ILLEGAL_UTF8_BYTE;
3270 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
3274 /* mark the range as done, and continue */
3278 literal_endpoint = 0;
3283 /* range begins (ignore - as first or last char) */
3284 else if (*s == '-' && s+1 < send && s != start) {
3286 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
3293 *d++ = (char) ILLEGAL_UTF8_BYTE; /* use illegal utf8 byte--see pmtrans */
3303 literal_endpoint = 0;
3304 native_range = TRUE;
3309 /* if we get here, we're not doing a transliteration */
3311 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3314 while (s1 >= start && *s1-- == '\\')
3317 in_charclass = TRUE;
3320 else if (*s == ']' && PL_lex_inpat && in_charclass) {
3323 while (s1 >= start && *s1-- == '\\')
3326 in_charclass = FALSE;
3329 /* skip for regexp comments /(?#comment)/, except for the last
3330 * char, which will be done separately.
3331 * Stop on (?{..}) and friends */
3333 else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3335 while (s+1 < send && *s != ')')
3338 else if (!PL_lex_casemods &&
3339 ( s[2] == '{' /* This should match regcomp.c */
3340 || (s[2] == '?' && s[3] == '{')))
3346 /* likewise skip #-initiated comments in //x patterns */
3347 else if (*s == '#' && PL_lex_inpat && !in_charclass &&
3348 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
3349 while (s+1 < send && *s != '\n')
3353 /* no further processing of single-quoted regex */
3354 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3355 goto default_action;
3357 /* check for embedded arrays
3358 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3360 else if (*s == '@' && s[1]) {
3361 if (isWORDCHAR_lazy_if(s+1,UTF))
3363 if (strchr(":'{$", s[1]))
3365 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3366 break; /* in regexp, neither @+ nor @- are interpolated */
3369 /* check for embedded scalars. only stop if we're sure it's a
3372 else if (*s == '$') {
3373 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
3375 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3377 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3378 "Possible unintended interpolation of $\\ in regex");
3380 break; /* in regexp, $ might be tail anchor */
3384 /* End of else if chain - OP_TRANS rejoin rest */
3387 if (*s == '\\' && s+1 < send) {
3388 char* e; /* Can be used for ending '}', etc. */
3392 /* warn on \1 - \9 in substitution replacements, but note that \11
3393 * is an octal; and \19 is \1 followed by '9' */
3394 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
3395 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
3397 /* diag_listed_as: \%d better written as $%d */
3398 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3403 /* string-change backslash escapes */
3404 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3408 /* In a pattern, process \N, but skip any other backslash escapes.
3409 * This is because we don't want to translate an escape sequence
3410 * into a meta symbol and have the regex compiler use the meta
3411 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3412 * in spite of this, we do have to process \N here while the proper
3413 * charnames handler is in scope. See bugs #56444 and #62056.
3414 * There is a complication because \N in a pattern may also stand
3415 * for 'match a non-nl', and not mean a charname, in which case its
3416 * processing should be deferred to the regex compiler. To be a
3417 * charname it must be followed immediately by a '{', and not look
3418 * like \N followed by a curly quantifier, i.e., not something like
3419 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3421 else if (PL_lex_inpat
3424 || regcurly(s + 1, FALSE)))
3427 goto default_action;
3432 /* quoted - in transliterations */
3434 if (PL_lex_inwhat == OP_TRANS) {
3441 if ((isALPHANUMERIC(*s)))
3442 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3443 "Unrecognized escape \\%c passed through",
3445 /* default action is to copy the quoted character */
3446 goto default_action;
3449 /* eg. \132 indicates the octal constant 0132 */
3450 case '0': case '1': case '2': case '3':
3451 case '4': case '5': case '6': case '7':
3453 I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3455 uv = grok_oct(s, &len, &flags, NULL);
3457 if (len < 3 && s < send && isDIGIT(*s)
3458 && ckWARN(WARN_MISC))
3460 Perl_warner(aTHX_ packWARN(WARN_MISC),
3461 "%s", form_short_octal_warning(s, len));
3464 goto NUM_ESCAPE_INSERT;
3466 /* eg. \o{24} indicates the octal constant \024 */
3471 bool valid = grok_bslash_o(&s, &uv, &error,
3472 TRUE, /* Output warning */
3473 FALSE, /* Not strict */
3474 TRUE, /* Output warnings for
3481 goto NUM_ESCAPE_INSERT;
3484 /* eg. \x24 indicates the hex constant 0x24 */
3489 bool valid = grok_bslash_x(&s, &uv, &error,
3490 TRUE, /* Output warning */
3491 FALSE, /* Not strict */
3492 TRUE, /* Output warnings for
3502 /* Insert oct or hex escaped character. There will always be
3503 * enough room in sv since such escapes will be longer than any
3504 * UTF-8 sequence they can end up as, except if they force us
3505 * to recode the rest of the string into utf8 */
3507 /* Here uv is the ordinal of the next character being added */
3508 if (!UVCHR_IS_INVARIANT(uv)) {
3509 if (!has_utf8 && uv > 255) {
3510 /* Might need to recode whatever we have accumulated so
3511 * far if it contains any chars variant in utf8 or
3514 SvCUR_set(sv, d - SvPVX_const(sv));
3517 /* See Note on sizing above. */
3518 sv_utf8_upgrade_flags_grow(sv,
3519 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3520 UNISKIP(uv) + (STRLEN)(send - s) + 1);
3521 d = SvPVX(sv) + SvCUR(sv);
3526 d = (char*)uvchr_to_utf8((U8*)d, uv);
3527 if (PL_lex_inwhat == OP_TRANS &&
3528 PL_sublex_info.sub_op) {
3529 PL_sublex_info.sub_op->op_private |=
3530 (PL_lex_repl ? OPpTRANS_FROM_UTF
3534 if (uv > 255 && !dorange)
3535 native_range = FALSE;
3548 /* In a non-pattern \N must be a named character, like \N{LATIN
3549 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3550 * mean to match a non-newline. For non-patterns, named
3551 * characters are converted to their string equivalents. In
3552 * patterns, named characters are not converted to their
3553 * ultimate forms for the same reasons that other escapes
3554 * aren't. Instead, they are converted to the \N{U+...} form
3555 * to get the value from the charnames that is in effect right
3556 * now, while preserving the fact that it was a named character
3557 * so that the regex compiler knows this */
3559 /* The structure of this section of code (besides checking for
3560 * errors and upgrading to utf8) is:
3561 * Further disambiguate between the two meanings of \N, and if
3562 * not a charname, go process it elsewhere
3563 * If of form \N{U+...}, pass it through if a pattern;
3564 * otherwise convert to utf8
3565 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3566 * pattern; otherwise convert to utf8 */
3568 /* Here, s points to the 'N'; the test below is guaranteed to
3569 * succeed if we are being called on a pattern as we already
3570 * know from a test above that the next character is a '{'.
3571 * On a non-pattern \N must mean 'named sequence, which
3572 * requires braces */
3575 yyerror("Missing braces on \\N{}");
3580 /* If there is no matching '}', it is an error. */
3581 if (! (e = strchr(s, '}'))) {
3582 if (! PL_lex_inpat) {
3583 yyerror("Missing right brace on \\N{}");
3585 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3590 /* Here it looks like a named character */
3592 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3593 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3594 | PERL_SCAN_DISALLOW_PREFIX;
3597 /* For \N{U+...}, the '...' is a unicode value even on
3598 * EBCDIC machines */
3599 s += 2; /* Skip to next char after the 'U+' */
3601 uv = grok_hex(s, &len, &flags, NULL);
3602 if (len == 0 || len != (STRLEN)(e - s)) {
3603 yyerror("Invalid hexadecimal number in \\N{U+...}");
3610 /* On non-EBCDIC platforms, pass through to the regex
3611 * compiler unchanged. The reason we evaluated the
3612 * number above is to make sure there wasn't a syntax
3613 * error. But on EBCDIC we convert to native so
3614 * downstream code can continue to assume it's native
3616 s -= 5; /* Include the '\N{U+' */
3618 d += my_snprintf(d, e - s + 1 + 1, /* includes the }
3621 (unsigned int) UNI_TO_NATIVE(uv));
3623 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3627 else { /* Not a pattern: convert the hex to string */
3629 /* If destination is not in utf8, unconditionally
3630 * recode it to be so. This is because \N{} implies
3631 * Unicode semantics, and scalars have to be in utf8
3632 * to guarantee those semantics */
3634 SvCUR_set(sv, d - SvPVX_const(sv));
3637 /* See Note on sizing above. */
3638 sv_utf8_upgrade_flags_grow(
3640 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3641 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3642 d = SvPVX(sv) + SvCUR(sv);
3646 /* Add the (Unicode) code point to the output. */
3647 if (UNI_IS_INVARIANT(uv)) {
3648 *d++ = (char) LATIN1_TO_NATIVE(uv);
3651 d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3655 else /* Here is \N{NAME} but not \N{U+...}. */
3656 if ((res = get_and_check_backslash_N_name(s, e)))
3659 const char *str = SvPV_const(res, len);
3662 if (! len) { /* The name resolved to an empty string */
3663 Copy("\\N{}", d, 4, char);
3667 /* In order to not lose information for the regex
3668 * compiler, pass the result in the specially made
3669 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3670 * the code points in hex of each character
3671 * returned by charnames */
3673 const char *str_end = str + len;
3674 const STRLEN off = d - SvPVX_const(sv);
3676 if (! SvUTF8(res)) {
3677 /* For the non-UTF-8 case, we can determine the
3678 * exact length needed without having to parse
3679 * through the string. Each character takes up
3680 * 2 hex digits plus either a trailing dot or
3682 d = off + SvGROW(sv, off
3684 + 6 /* For the "\N{U+", and
3686 + (STRLEN)(send - e));
3687 Copy("\\N{U+", d, 5, char);
3689 while (str < str_end) {
3691 my_snprintf(hex_string, sizeof(hex_string),
3692 "%02X.", (U8) *str);
3693 Copy(hex_string, d, 3, char);
3697 d--; /* We will overwrite below the final
3698 dot with a right brace */
3701 STRLEN char_length; /* cur char's byte length */
3703 /* and the number of bytes after this is
3704 * translated into hex digits */
3705 STRLEN output_length;
3707 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3708 * for max('U+', '.'); and 1 for NUL */
3709 char hex_string[2 * UTF8_MAXBYTES + 5];
3711 /* Get the first character of the result. */
3712 U32 uv = utf8n_to_uvchr((U8 *) str,
3716 /* Convert first code point to hex, including
3717 * the boiler plate before it. */
3719 my_snprintf(hex_string, sizeof(hex_string),
3723 /* Make sure there is enough space to hold it */
3724 d = off + SvGROW(sv, off
3726 + (STRLEN)(send - e)
3727 + 2); /* '}' + NUL */
3729 Copy(hex_string, d, output_length, char);
3732 /* For each subsequent character, append dot and
3733 * its ordinal in hex */
3734 while ((str += char_length) < str_end) {
3735 const STRLEN off = d - SvPVX_const(sv);
3736 U32 uv = utf8n_to_uvchr((U8 *) str,
3741 my_snprintf(hex_string,
3746 d = off + SvGROW(sv, off
3748 + (STRLEN)(send - e)
3749 + 2); /* '}' + NUL */
3750 Copy(hex_string, d, output_length, char);
3755 *d++ = '}'; /* Done. Add the trailing brace */
3758 else { /* Here, not in a pattern. Convert the name to a
3761 /* If destination is not in utf8, unconditionally
3762 * recode it to be so. This is because \N{} implies
3763 * Unicode semantics, and scalars have to be in utf8
3764 * to guarantee those semantics */
3766 SvCUR_set(sv, d - SvPVX_const(sv));
3769 /* See Note on sizing above. */
3770 sv_utf8_upgrade_flags_grow(sv,
3771 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3772 len + (STRLEN)(send - s) + 1);
3773 d = SvPVX(sv) + SvCUR(sv);
3775 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3777 /* See Note on sizing above. (NOTE: SvCUR() is not
3778 * set correctly here). */
3779 const STRLEN off = d - SvPVX_const(sv);
3780 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3782 if (! SvUTF8(res)) { /* Make sure is \N{} return is UTF-8 */
3783 sv_utf8_upgrade(res);
3784 str = SvPV_const(res, len);
3786 Copy(str, d, len, char);
3792 } /* End \N{NAME} */
3795 native_range = FALSE; /* \N{} is defined to be Unicode */
3797 s = e + 1; /* Point to just after the '}' */
3800 /* \c is a control character */
3804 *d++ = grok_bslash_c(*s++, 1);
3807 yyerror("Missing control char name in \\c");
3811 /* printf-style backslashes, formfeeds, newlines, etc */
3828 *d++ = ASCII_TO_NATIVE('\033');
3837 } /* end if (backslash) */
3844 /* If we started with encoded form, or already know we want it,
3845 then encode the next character */
3846 if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3850 /* One might think that it is wasted effort in the case of the
3851 * source being utf8 (this_utf8 == TRUE) to take the next character
3852 * in the source, convert it to an unsigned value, and then convert
3853 * it back again. But the source has not been validated here. The
3854 * routine that does the conversion checks for errors like
3857 const UV nextuv = (this_utf8)
3858 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
3860 const STRLEN need = UNISKIP(nextuv);
3862 SvCUR_set(sv, d - SvPVX_const(sv));
3865 /* See Note on sizing above. */
3866 sv_utf8_upgrade_flags_grow(sv,
3867 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3868 need + (STRLEN)(send - s) + 1);
3869 d = SvPVX(sv) + SvCUR(sv);
3871 } else if (need > len) {
3872 /* encoded value larger than old, may need extra space (NOTE:
3873 * SvCUR() is not set correctly here). See Note on sizing
3875 const STRLEN off = d - SvPVX_const(sv);
3876 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3880 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3882 if (uv > 255 && !dorange)
3883 native_range = FALSE;
3889 } /* while loop to process each character */
3891 /* terminate the string and set up the sv */
3893 SvCUR_set(sv, d - SvPVX_const(sv));
3894 if (SvCUR(sv) >= SvLEN(sv))
3895 Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3896 " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3899 if (PL_encoding && !has_utf8) {
3900 sv_recode_to_utf8(sv, PL_encoding);
3906 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3907 PL_sublex_info.sub_op->op_private |=
3908 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3912 /* shrink the sv if we allocated more than we used */
3913 if (SvCUR(sv) + 5 < SvLEN(sv)) {
3914 SvPV_shrink_to_cur(sv);
3917 /* return the substring (via pl_yylval) only if we parsed anything */
3920 for (; s2 < s; s2++) {
3922 COPLINE_INC_WITH_HERELINES;
3924 SvREFCNT_inc_simple_void_NN(sv);
3925 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
3926 && ! PL_parser->lex_re_reparsing)
3928 const char *const key = PL_lex_inpat ? "qr" : "q";
3929 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3933 if (PL_lex_inwhat == OP_TRANS) {
3936 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3939 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3947 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3950 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3952 LEAVE_with_name("scan_const");
3957 * Returns TRUE if there's more to the expression (e.g., a subscript),
3960 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3962 * ->[ and ->{ return TRUE
3963 * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
3964 * { and [ outside a pattern are always subscripts, so return TRUE
3965 * if we're outside a pattern and it's not { or [, then return FALSE
3966 * if we're in a pattern and the first char is a {
3967 * {4,5} (any digits around the comma) returns FALSE
3968 * if we're in a pattern and the first char is a [
3970 * [SOMETHING] has a funky algorithm to decide whether it's a
3971 * character class or not. It has to deal with things like
3972 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3973 * anything else returns TRUE
3976 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3979 S_intuit_more(pTHX_ char *s)
3983 PERL_ARGS_ASSERT_INTUIT_MORE;
3985 if (PL_lex_brackets)
3987 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3989 if (*s == '-' && s[1] == '>'
3990 && FEATURE_POSTDEREF_QQ_IS_ENABLED
3991 && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
3992 ||(s[2] == '@' && strchr("*[{",s[3])) ))
3994 if (*s != '{' && *s != '[')
3999 /* In a pattern, so maybe we have {n,m}. */
4001 if (regcurly(s, FALSE)) {
4007 /* On the other hand, maybe we have a character class */
4010 if (*s == ']' || *s == '^')
4013 /* this is terrifying, and it works */
4016 const char * const send = strchr(s,']');
4017 unsigned char un_char, last_un_char;
4018 char tmpbuf[sizeof PL_tokenbuf * 4];
4020 if (!send) /* has to be an expression */
4022 weight = 2; /* let's weigh the evidence */
4026 else if (isDIGIT(*s)) {
4028 if (isDIGIT(s[1]) && s[2] == ']')
4034 Zero(seen,256,char);
4036 for (; s < send; s++) {
4037 last_un_char = un_char;
4038 un_char = (unsigned char)*s;
4043 weight -= seen[un_char] * 10;
4044 if (isWORDCHAR_lazy_if(s+1,UTF)) {
4046 char *tmp = PL_bufend;
4047 PL_bufend = (char*)send;
4048 scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4050 len = (int)strlen(tmpbuf);
4051 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
4052 UTF ? SVf_UTF8 : 0, SVt_PV))
4057 else if (*s == '$' && s[1] &&
4058 strchr("[#!%*<>()-=",s[1])) {
4059 if (/*{*/ strchr("])} =",s[2]))
4068 if (strchr("wds]",s[1]))
4070 else if (seen[(U8)'\''] || seen[(U8)'"'])
4072 else if (strchr("rnftbxcav",s[1]))
4074 else if (isDIGIT(s[1])) {
4076 while (s[1] && isDIGIT(s[1]))
4086 if (strchr("aA01! ",last_un_char))
4088 if (strchr("zZ79~",s[1]))
4090 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4091 weight -= 5; /* cope with negative subscript */
4094 if (!isWORDCHAR(last_un_char)
4095 && !(last_un_char == '$' || last_un_char == '@'
4096 || last_un_char == '&')
4097 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4102 if (keyword(tmpbuf, d - tmpbuf, 0))
4105 if (un_char == last_un_char + 1)
4107 weight -= seen[un_char];
4112 if (weight >= 0) /* probably a character class */
4122 * Does all the checking to disambiguate
4124 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
4125 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4127 * First argument is the stuff after the first token, e.g. "bar".
4129 * Not a method if foo is a filehandle.
4130 * Not a method if foo is a subroutine prototyped to take a filehandle.
4131 * Not a method if it's really "Foo $bar"
4132 * Method if it's "foo $bar"
4133 * Not a method if it's really "print foo $bar"
4134 * Method if it's really "foo package::" (interpreted as package->foo)
4135 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4136 * Not a method if bar is a filehandle or package, but is quoted with
4141 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
4144 char *s = start + (*start == '$');
4145 char tmpbuf[sizeof PL_tokenbuf];
4152 PERL_ARGS_ASSERT_INTUIT_METHOD;
4154 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4156 if (cv && SvPOK(cv)) {
4157 const char *proto = CvPROTO(cv);
4159 while (*proto && (isSPACE(*proto) || *proto == ';'))
4166 if (*start == '$') {
4167 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
4168 isUPPER(*PL_tokenbuf))
4171 len = start - SvPVX(PL_linestr);
4175 start = SvPVX(PL_linestr) + len;
4179 return *s == '(' ? FUNCMETH : METHOD;
4182 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4183 /* start is the beginning of the possible filehandle/object,
4184 * and s is the end of it
4185 * tmpbuf is a copy of it (but with single quotes as double colons)
4188 if (!keyword(tmpbuf, len, 0)) {
4189 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4193 soff = s - SvPVX(PL_linestr);
4197 indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
4198 if (indirgv && GvCVu(indirgv))
4200 /* filehandle or package name makes it a method */
4201 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4203 soff = s - SvPVX(PL_linestr);
4206 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4207 return 0; /* no assumptions -- "=>" quotes bareword */
4209 start_force(PL_curforce);
4210 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
4211 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4212 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4214 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
4215 ( UTF ? SVf_UTF8 : 0 )));
4220 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
4222 return *s == '(' ? FUNCMETH : METHOD;
4228 /* Encoded script support. filter_add() effectively inserts a
4229 * 'pre-processing' function into the current source input stream.
4230 * Note that the filter function only applies to the current source file
4231 * (e.g., it will not affect files 'require'd or 'use'd by this one).
4233 * The datasv parameter (which may be NULL) can be used to pass
4234 * private data to this instance of the filter. The filter function
4235 * can recover the SV using the FILTER_DATA macro and use it to
4236 * store private buffers and state information.
4238 * The supplied datasv parameter is upgraded to a PVIO type
4239 * and the IoDIRP/IoANY field is used to store the function pointer,
4240 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4241 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4242 * private use must be set using malloc'd pointers.
4246 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4255 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4256 Perl_croak(aTHX_ "Source filters apply only to byte streams");
4258 if (!PL_rsfp_filters)
4259 PL_rsfp_filters = newAV();
4262 SvUPGRADE(datasv, SVt_PVIO);
4263 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4264 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4265 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4266 FPTR2DPTR(void *, IoANY(datasv)),
4267 SvPV_nolen(datasv)));
4268 av_unshift(PL_rsfp_filters, 1);
4269 av_store(PL_rsfp_filters, 0, datasv) ;
4271 !PL_parser->filtered
4272 && PL_parser->lex_flags & LEX_EVALBYTES
4273 && PL_bufptr < PL_bufend
4275 const char *s = PL_bufptr;
4276 while (s < PL_bufend) {
4278 SV *linestr = PL_parser->linestr;
4279 char *buf = SvPVX(linestr);
4280 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4281 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4282 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4283 STRLEN const linestart_pos = PL_parser->linestart - buf;
4284 STRLEN const last_uni_pos =
4285 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4286 STRLEN const last_lop_pos =
4287 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4288 av_push(PL_rsfp_filters, linestr);
4289 PL_parser->linestr =
4290 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4291 buf = SvPVX(PL_parser->linestr);
4292 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4293 PL_parser->bufptr = buf + bufptr_pos;
4294 PL_parser->oldbufptr = buf + oldbufptr_pos;
4295 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4296 PL_parser->linestart = buf + linestart_pos;
4297 if (PL_parser->last_uni)
4298 PL_parser->last_uni = buf + last_uni_pos;
4299 if (PL_parser->last_lop)
4300 PL_parser->last_lop = buf + last_lop_pos;
4301 SvLEN(linestr) = SvCUR(linestr);
4302 SvCUR(linestr) = s-SvPVX(linestr);
4303 PL_parser->filtered = 1;
4313 /* Delete most recently added instance of this filter function. */
4315 Perl_filter_del(pTHX_ filter_t funcp)
4320 PERL_ARGS_ASSERT_FILTER_DEL;
4323 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4324 FPTR2DPTR(void*, funcp)));
4326 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4328 /* if filter is on top of stack (usual case) just pop it off */
4329 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4330 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4331 sv_free(av_pop(PL_rsfp_filters));
4335 /* we need to search for the correct entry and clear it */
4336 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4340 /* Invoke the idxth filter function for the current rsfp. */
4341 /* maxlen 0 = read one text line */
4343 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4348 /* This API is bad. It should have been using unsigned int for maxlen.
4349 Not sure if we want to change the API, but if not we should sanity
4350 check the value here. */
4351 unsigned int correct_length = maxlen < 0 ? PERL_INT_MAX : maxlen;
4353 PERL_ARGS_ASSERT_FILTER_READ;
4355 if (!PL_parser || !PL_rsfp_filters)
4357 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
4358 /* Provide a default input filter to make life easy. */
4359 /* Note that we append to the line. This is handy. */
4360 DEBUG_P(PerlIO_printf(Perl_debug_log,
4361 "filter_read %d: from rsfp\n", idx));
4362 if (correct_length) {
4365 const int old_len = SvCUR(buf_sv);
4367 /* ensure buf_sv is large enough */
4368 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4369 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4370 correct_length)) <= 0) {
4371 if (PerlIO_error(PL_rsfp))
4372 return -1; /* error */
4374 return 0 ; /* end of file */
4376 SvCUR_set(buf_sv, old_len + len) ;
4377 SvPVX(buf_sv)[old_len + len] = '\0';
4380 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4381 if (PerlIO_error(PL_rsfp))
4382 return -1; /* error */
4384 return 0 ; /* end of file */
4387 return SvCUR(buf_sv);
4389 /* Skip this filter slot if filter has been deleted */
4390 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4391 DEBUG_P(PerlIO_printf(Perl_debug_log,
4392 "filter_read %d: skipped (filter deleted)\n",
4394 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4396 if (SvTYPE(datasv) != SVt_PVIO) {
4397 if (correct_length) {
4399 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4400 if (!remainder) return 0; /* eof */
4401 if (correct_length > remainder) correct_length = remainder;
4402 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4403 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4406 const char *s = SvEND(datasv);
4407 const char *send = SvPVX(datasv) + SvLEN(datasv);
4415 if (s == send) return 0; /* eof */
4416 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4417 SvCUR_set(datasv, s-SvPVX(datasv));
4419 return SvCUR(buf_sv);
4421 /* Get function pointer hidden within datasv */
4422 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4423 DEBUG_P(PerlIO_printf(Perl_debug_log,
4424 "filter_read %d: via function %p (%s)\n",
4425 idx, (void*)datasv, SvPV_nolen_const(datasv)));
4426 /* Call function. The function is expected to */
4427 /* call "FILTER_READ(idx+1, buf_sv)" first. */
4428 /* Return: <0:error, =0:eof, >0:not eof */
4429 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4433 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4437 PERL_ARGS_ASSERT_FILTER_GETS;
4439 #ifdef PERL_CR_FILTER
4440 if (!PL_rsfp_filters) {
4441 filter_add(S_cr_textfilter,NULL);
4444 if (PL_rsfp_filters) {
4446 SvCUR_set(sv, 0); /* start with empty line */
4447 if (FILTER_READ(0, sv, 0) > 0)
4448 return ( SvPVX(sv) ) ;
4453 return (sv_gets(sv, PL_rsfp, append));
4457 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4462 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4464 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4468 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4469 (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4471 return GvHV(gv); /* Foo:: */
4474 /* use constant CLASS => 'MyClass' */
4475 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4476 if (gv && GvCV(gv)) {
4477 SV * const sv = cv_const_sv(GvCV(gv));
4479 pkgname = SvPV_const(sv, len);
4482 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4488 * The intent of this yylex wrapper is to minimize the changes to the
4489 * tokener when we aren't interested in collecting madprops. It remains
4490 * to be seen how successful this strategy will be...
4497 char *s = PL_bufptr;
4499 /* make sure PL_thiswhite is initialized */
4503 /* previous token ate up our whitespace? */
4504 if (!PL_lasttoke && PL_nextwhite) {
4505 PL_thiswhite = PL_nextwhite;
4509 /* isolate the token, and figure out where it is without whitespace */
4510 PL_realtokenstart = -1;
4514 assert(PL_curforce < 0);
4516 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
4517 if (!PL_thistoken) {
4518 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4519 PL_thistoken = newSVpvs("");
4521 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4522 PL_thistoken = newSVpvn(tstart, s - tstart);
4525 if (PL_thismad) /* install head */
4526 CURMAD('X', PL_thistoken);
4529 /* last whitespace of a sublex? */
4530 if (optype == ')' && PL_endwhite) {
4531 CURMAD('X', PL_endwhite);
4536 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
4537 if (!PL_thiswhite && !PL_endwhite && !optype) {
4538 sv_free(PL_thistoken);
4543 /* put off final whitespace till peg */
4544 if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
4545 PL_nextwhite = PL_thiswhite;
4548 else if (PL_thisopen) {
4549 CURMAD('q', PL_thisopen);
4551 sv_free(PL_thistoken);
4555 /* Store actual token text as madprop X */
4556 CURMAD('X', PL_thistoken);
4560 /* add preceding whitespace as madprop _ */
4561 CURMAD('_', PL_thiswhite);
4565 /* add quoted material as madprop = */
4566 CURMAD('=', PL_thisstuff);
4570 /* add terminating quote as madprop Q */
4571 CURMAD('Q', PL_thisclose);
4575 /* special processing based on optype */
4579 /* opval doesn't need a TOKEN since it can already store mp */
4589 if (pl_yylval.opval)
4590 append_madprops(PL_thismad, pl_yylval.opval, 0);
4598 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4611 /* remember any fake bracket that lexer is about to discard */
4612 if (PL_lex_brackets == 1 &&
4613 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4616 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4619 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4620 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4623 break; /* don't bother looking for trailing comment */
4632 /* attach a trailing comment to its statement instead of next token */
4636 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4638 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4640 if (*s == '\n' || *s == '#') {
4641 while (s < PL_bufend && *s != '\n')
4645 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4646 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4659 /* Create new token struct. Note: opvals return early above. */
4660 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4667 S_tokenize_use(pTHX_ int is_use, char *s) {
4670 PERL_ARGS_ASSERT_TOKENIZE_USE;
4672 if (PL_expect != XSTATE)
4673 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4674 is_use ? "use" : "no"));
4677 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4678 s = force_version(s, TRUE);
4679 if (*s == ';' || *s == '}'
4680 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4681 start_force(PL_curforce);
4682 NEXTVAL_NEXTTOKE.opval = NULL;
4685 else if (*s == 'v') {
4686 s = force_word(s,WORD,FALSE,TRUE);
4687 s = force_version(s, FALSE);
4691 s = force_word(s,WORD,FALSE,TRUE);
4692 s = force_version(s, FALSE);
4694 pl_yylval.ival = is_use;
4698 static const char* const exp_name[] =
4699 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4700 "ATTRTERM", "TERMBLOCK", "POSTDEREF", "TERMORDORDOR"
4704 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4706 S_word_takes_any_delimeter(char *p, STRLEN len)
4708 return (len == 1 && strchr("msyq", p[0])) ||
4710 (p[0] == 't' && p[1] == 'r') ||
4711 (p[0] == 'q' && strchr("qwxr", p[1]))));
4715 S_check_scalar_slice(pTHX_ char *s)
4718 while (*s == ' ' || *s == '\t') s++;
4719 if (*s == 'q' && s[1] == 'w'
4720 && !isWORDCHAR_lazy_if(s+2,UTF))
4722 while (*s && (isWORDCHAR_lazy_if(s,UTF) || strchr(" \t$#+-'\"", *s)))
4723 s += UTF ? UTF8SKIP(s) : 1;
4724 if (*s == '}' || *s == ']')
4725 pl_yylval.ival = OPpSLICEWARNING;
4731 Works out what to call the token just pulled out of the input
4732 stream. The yacc parser takes care of taking the ops we return and
4733 stitching them into a tree.
4736 The type of the next token
4739 Switch based on the current state:
4740 - if we already built the token before, use it
4741 - if we have a case modifier in a string, deal with that
4742 - handle other cases of interpolation inside a string
4743 - scan the next line if we are inside a format
4744 In the normal state switch on the next character:
4746 if alphabetic, go to key lookup
4747 unrecoginized character - croak
4748 - 0/4/26: handle end-of-line or EOF
4749 - cases for whitespace
4750 - \n and #: handle comments and line numbers
4751 - various operators, brackets and sigils
4754 - 'v': vstrings (or go to key lookup)
4755 - 'x' repetition operator (or go to key lookup)
4756 - other ASCII alphanumerics (key lookup begins here):
4759 scan built-in keyword (but do nothing with it yet)
4760 check for statement label
4761 check for lexical subs
4762 goto just_a_word if there is one
4763 see whether built-in keyword is overridden
4764 switch on keyword number:
4765 - default: just_a_word:
4766 not a built-in keyword; handle bareword lookup
4767 disambiguate between method and sub call
4768 fall back to bareword
4769 - cases for built-in keywords
4777 char *s = PL_bufptr;
4781 const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil);
4785 /* orig_keyword, gvp, and gv are initialized here because
4786 * jump to the label just_a_word_zero can bypass their
4787 * initialization later. */
4788 I32 orig_keyword = 0;
4793 SV* tmp = newSVpvs("");
4794 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4795 (IV)CopLINE(PL_curcop),
4796 lex_state_names[PL_lex_state],
4797 exp_name[PL_expect],
4798 pv_display(tmp, s, strlen(s), 0, 60));
4802 switch (PL_lex_state) {
4804 case LEX_INTERPNORMAL:
4807 /* when we've already built the next token, just pull it out of the queue */
4811 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4813 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4814 PL_nexttoke[PL_lasttoke].next_mad = 0;
4815 if (PL_thismad && PL_thismad->mad_key == '_') {
4816 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4817 PL_thismad->mad_val = 0;
4818 mad_free(PL_thismad);
4823 PL_lex_state = PL_lex_defer;
4824 PL_expect = PL_lex_expect;
4825 PL_lex_defer = LEX_NORMAL;
4826 if (!PL_nexttoke[PL_lasttoke].next_type)
4831 pl_yylval = PL_nextval[PL_nexttoke];
4833 PL_lex_state = PL_lex_defer;
4834 PL_expect = PL_lex_expect;
4835 PL_lex_defer = LEX_NORMAL;
4841 next_type = PL_nexttoke[PL_lasttoke].next_type;
4843 next_type = PL_nexttype[PL_nexttoke];
4845 if (next_type & (7<<24)) {
4846 if (next_type & (1<<24)) {
4847 if (PL_lex_brackets > 100)
4848 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4849 PL_lex_brackstack[PL_lex_brackets++] =
4850 (char) ((next_type >> 16) & 0xff);
4852 if (next_type & (2<<24))
4853 PL_lex_allbrackets++;
4854 if (next_type & (4<<24))
4855 PL_lex_allbrackets--;
4856 next_type &= 0xffff;
4858 return REPORT(next_type == 'p' ? pending_ident() : next_type);
4861 /* interpolated case modifiers like \L \U, including \Q and \E.
4862 when we get here, PL_bufptr is at the \
4864 case LEX_INTERPCASEMOD:
4866 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4868 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4869 PL_bufptr, PL_bufend, *PL_bufptr);
4871 /* handle \E or end of string */
4872 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4874 if (PL_lex_casemods) {
4875 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4876 PL_lex_casestack[PL_lex_casemods] = '\0';
4878 if (PL_bufptr != PL_bufend
4879 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4880 || oldmod == 'F')) {
4882 PL_lex_state = LEX_INTERPCONCAT;
4885 PL_thistoken = newSVpvs("\\E");
4888 PL_lex_allbrackets--;
4891 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4892 /* Got an unpaired \E */
4893 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4894 "Useless use of \\E");
4897 while (PL_bufptr != PL_bufend &&
4898 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4901 PL_thiswhite = newSVpvs("");
4902 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4907 if (PL_bufptr != PL_bufend)
4910 PL_lex_state = LEX_INTERPCONCAT;
4914 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4915 "### Saw case modifier\n"); });
4917 if (s[1] == '\\' && s[2] == 'E') {
4921 PL_thiswhite = newSVpvs("");
4922 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4926 PL_lex_state = LEX_INTERPCONCAT;
4931 if (!PL_madskills) /* when just compiling don't need correct */
4932 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4933 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
4934 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4935 (strchr(PL_lex_casestack, 'L')
4936 || strchr(PL_lex_casestack, 'U')
4937 || strchr(PL_lex_casestack, 'F'))) {
4938 PL_lex_casestack[--PL_lex_casemods] = '\0';
4939 PL_lex_allbrackets--;
4942 if (PL_lex_casemods > 10)
4943 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4944 PL_lex_casestack[PL_lex_casemods++] = *s;
4945 PL_lex_casestack[PL_lex_casemods] = '\0';
4946 PL_lex_state = LEX_INTERPCONCAT;
4947 start_force(PL_curforce);
4948 NEXTVAL_NEXTTOKE.ival = 0;
4949 force_next((2<<24)|'(');
4950 start_force(PL_curforce);
4952 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4954 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4956 NEXTVAL_NEXTTOKE.ival = OP_LC;
4958 NEXTVAL_NEXTTOKE.ival = OP_UC;
4960 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4962 NEXTVAL_NEXTTOKE.ival = OP_FC;
4964 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4966 SV* const tmpsv = newSVpvs("\\ ");
4967 /* replace the space with the character we want to escape
4969 SvPVX(tmpsv)[1] = *s;
4975 if (PL_lex_starts) {
4981 sv_free(PL_thistoken);
4982 PL_thistoken = newSVpvs("");
4985 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4986 if (PL_lex_casemods == 1 && PL_lex_inpat)
4995 case LEX_INTERPPUSH:
4996 return REPORT(sublex_push());
4998 case LEX_INTERPSTART:
4999 if (PL_bufptr == PL_bufend)
5000 return REPORT(sublex_done());
5001 DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
5002 "### Interpolated variable\n"); });
5004 /* for /@a/, we leave the joining for the regex engine to do
5005 * (unless we're within \Q etc) */
5006 PL_lex_dojoin = (*PL_bufptr == '@'
5007 && (!PL_lex_inpat || PL_lex_casemods));
5008 PL_lex_state = LEX_INTERPNORMAL;
5009 if (PL_lex_dojoin) {
5010 start_force(PL_curforce);
5011 NEXTVAL_NEXTTOKE.ival = 0;
5013 start_force(PL_curforce);
5014 force_ident("\"", '$');
5015 start_force(PL_curforce);
5016 NEXTVAL_NEXTTOKE.ival = 0;
5018 start_force(PL_curforce);
5019 NEXTVAL_NEXTTOKE.ival = 0;
5020 force_next((2<<24)|'(');
5021 start_force(PL_curforce);
5022 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
5025 /* Convert (?{...}) and friends to 'do {...}' */
5026 if (PL_lex_inpat && *PL_bufptr == '(') {
5027 PL_parser->lex_shared->re_eval_start = PL_bufptr;
5029 if (*PL_bufptr != '{')
5031 start_force(PL_curforce);
5032 /* XXX probably need a CURMAD(something) here */
5033 PL_expect = XTERMBLOCK;
5037 if (PL_lex_starts++) {
5042 sv_free(PL_thistoken);
5043 PL_thistoken = newSVpvs("");
5046 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5047 if (!PL_lex_casemods && PL_lex_inpat)
5054 case LEX_INTERPENDMAYBE:
5055 if (intuit_more(PL_bufptr)) {
5056 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
5062 if (PL_lex_dojoin) {
5063 const U8 dojoin_was = PL_lex_dojoin;
5064 PL_lex_dojoin = FALSE;
5065 PL_lex_state = LEX_INTERPCONCAT;
5069 sv_free(PL_thistoken);
5070 PL_thistoken = newSVpvs("");
5073 PL_lex_allbrackets--;
5074 return REPORT(dojoin_was == 1 ? ')' : POSTJOIN);
5076 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
5077 && SvEVALED(PL_lex_repl))
5079 if (PL_bufptr != PL_bufend)
5080 Perl_croak(aTHX_ "Bad evalled substitution pattern");
5083 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
5084 re_eval_str. If the here-doc body’s length equals the previous
5085 value of re_eval_start, re_eval_start will now be null. So
5086 check re_eval_str as well. */
5087 if (PL_parser->lex_shared->re_eval_start
5088 || PL_parser->lex_shared->re_eval_str) {
5090 if (*PL_bufptr != ')')
5091 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
5093 /* having compiled a (?{..}) expression, return the original
5094 * text too, as a const */
5095 if (PL_parser->lex_shared->re_eval_str) {
5096 sv = PL_parser->lex_shared->re_eval_str;
5097 PL_parser->lex_shared->re_eval_str = NULL;
5099 PL_bufptr - PL_parser->lex_shared->re_eval_start);
5100 SvPV_shrink_to_cur(sv);
5102 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
5103 PL_bufptr - PL_parser->lex_shared->re_eval_start);
5104 start_force(PL_curforce);
5105 /* XXX probably need a CURMAD(something) here */
5106 NEXTVAL_NEXTTOKE.opval =
5107 (OP*)newSVOP(OP_CONST, 0,
5110 PL_parser->lex_shared->re_eval_start = NULL;
5116 case LEX_INTERPCONCAT:
5118 if (PL_lex_brackets)
5119 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
5120 (long) PL_lex_brackets);
5122 if (PL_bufptr == PL_bufend)
5123 return REPORT(sublex_done());
5125 /* m'foo' still needs to be parsed for possible (?{...}) */
5126 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
5127 SV *sv = newSVsv(PL_linestr);
5129 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5133 s = scan_const(PL_bufptr);
5135 PL_lex_state = LEX_INTERPCASEMOD;
5137 PL_lex_state = LEX_INTERPSTART;
5140 if (s != PL_bufptr) {
5141 start_force(PL_curforce);
5143 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
5145 NEXTVAL_NEXTTOKE = pl_yylval;
5148 if (PL_lex_starts++) {
5152 sv_free(PL_thistoken);
5153 PL_thistoken = newSVpvs("");
5156 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5157 if (!PL_lex_casemods && PL_lex_inpat)
5170 s = scan_formline(PL_bufptr);
5171 if (!PL_lex_formbrack)
5180 /* We really do *not* want PL_linestr ever becoming a COW. */
5181 assert (!SvIsCOW(PL_linestr));
5183 PL_oldoldbufptr = PL_oldbufptr;
5185 PL_parser->saw_infix_sigil = 0;
5190 sv_free(PL_thistoken);
5193 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5197 if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
5200 SV *dsv = newSVpvs_flags("", SVs_TEMP);
5201 const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s,
5203 SVs_TEMP | SVf_UTF8),
5204 10, UNI_DISPLAY_ISPRINT)
5205 : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
5206 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
5207 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
5208 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
5212 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %"UTF8f"<-- HERE near column %d", c,
5213 UTF8fARG(UTF, (s - d), d),
5218 goto fake_eof; /* emulate EOF on ^D or ^Z */
5224 if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
5227 if (PL_lex_brackets &&
5228 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
5229 yyerror((const char *)
5231 ? "Format not terminated"
5232 : "Missing right curly or square bracket"));
5234 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5235 "### Tokener got EOF\n");
5239 if (s++ < PL_bufend)
5240 goto retry; /* ignore stray nulls */
5243 if (!PL_in_eval && !PL_preambled) {
5244 PL_preambled = TRUE;
5250 /* Generate a string of Perl code to load the debugger.
5251 * If PERL5DB is set, it will return the contents of that,
5252 * otherwise a compile-time require of perl5db.pl. */
5254 const char * const pdb = PerlEnv_getenv("PERL5DB");
5257 sv_setpv(PL_linestr, pdb);
5258 sv_catpvs(PL_linestr,";");
5260 SETERRNO(0,SS_NORMAL);
5261 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
5263 PL_parser->preambling = CopLINE(PL_curcop);
5265 sv_setpvs(PL_linestr,"");
5266 if (PL_preambleav) {
5267 SV **svp = AvARRAY(PL_preambleav);
5268 SV **const end = svp + AvFILLp(PL_preambleav);
5270 sv_catsv(PL_linestr, *svp);
5272 sv_catpvs(PL_linestr, ";");
5274 sv_free(MUTABLE_SV(PL_preambleav));
5275 PL_preambleav = NULL;
5278 sv_catpvs(PL_linestr,
5279 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
5280 if (PL_minus_n || PL_minus_p) {
5281 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
5283 sv_catpvs(PL_linestr,"chomp;");
5286 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
5287 || *PL_splitstr == '"')
5288 && strchr(PL_splitstr + 1, *PL_splitstr))
5289 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
5291 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
5292 bytes can be used as quoting characters. :-) */
5293 const char *splits = PL_splitstr;
5294 sv_catpvs(PL_linestr, "our @F=split(q\0");
5297 if (*splits == '\\')
5298 sv_catpvn(PL_linestr, splits, 1);
5299 sv_catpvn(PL_linestr, splits, 1);
5300 } while (*splits++);
5301 /* This loop will embed the trailing NUL of
5302 PL_linestr as the last thing it does before
5304 sv_catpvs(PL_linestr, ");");
5308 sv_catpvs(PL_linestr,"our @F=split(' ');");
5311 sv_catpvs(PL_linestr, "\n");
5312 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5313 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5314 PL_last_lop = PL_last_uni = NULL;
5315 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5316 update_debugger_info(PL_linestr, NULL, 0);
5321 bof = PL_rsfp ? TRUE : FALSE;
5324 fake_eof = LEX_FAKE_EOF;
5326 PL_bufptr = PL_bufend;
5327 COPLINE_INC_WITH_HERELINES;
5328 if (!lex_next_chunk(fake_eof)) {
5329 CopLINE_dec(PL_curcop);
5331 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
5333 CopLINE_dec(PL_curcop);
5336 PL_realtokenstart = -1;
5339 /* If it looks like the start of a BOM or raw UTF-16,
5340 * check if it in fact is. */
5341 if (bof && PL_rsfp &&
5343 *(U8*)s == BOM_UTF8_FIRST_BYTE ||
5346 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
5347 bof = (offset == (Off_t)SvCUR(PL_linestr));
5348 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5349 /* offset may include swallowed CR */
5351 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
5354 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5355 s = swallow_bom((U8*)s);
5358 if (PL_parser->in_pod) {
5359 /* Incest with pod. */
5362 sv_catsv(PL_thiswhite, PL_linestr);
5364 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
5365 sv_setpvs(PL_linestr, "");
5366 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5367 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5368 PL_last_lop = PL_last_uni = NULL;
5369 PL_parser->in_pod = 0;
5372 if (PL_rsfp || PL_parser->filtered)
5374 } while (PL_parser->in_pod);
5375 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5376 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5377 PL_last_lop = PL_last_uni = NULL;
5378 if (CopLINE(PL_curcop) == 1) {
5379 while (s < PL_bufend && isSPACE(*s))
5381 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5385 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5389 if (*s == '#' && *(s+1) == '!')
5391 #ifdef ALTERNATE_SHEBANG
5393 static char const as[] = ALTERNATE_SHEBANG;
5394 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5395 d = s + (sizeof(as) - 1);
5397 #endif /* ALTERNATE_SHEBANG */
5406 while (*d && !isSPACE(*d))
5410 #ifdef ARG_ZERO_IS_SCRIPT
5411 if (ipathend > ipath) {
5413 * HP-UX (at least) sets argv[0] to the script name,
5414 * which makes $^X incorrect. And Digital UNIX and Linux,
5415 * at least, set argv[0] to the basename of the Perl
5416 * interpreter. So, having found "#!", we'll set it right.
5418 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5420 assert(SvPOK(x) || SvGMAGICAL(x));
5421 if (sv_eq(x, CopFILESV(PL_curcop))) {
5422 sv_setpvn(x, ipath, ipathend - ipath);
5428 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
5429 const char * const lstart = SvPV_const(x,llen);
5431 bstart += blen - llen;
5432 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5433 sv_setpvn(x, ipath, ipathend - ipath);
5438 TAINT_NOT; /* $^X is always tainted, but that's OK */
5440 #endif /* ARG_ZERO_IS_SCRIPT */
5445 d = instr(s,"perl -");
5447 d = instr(s,"perl");
5449 /* avoid getting into infinite loops when shebang
5450 * line contains "Perl" rather than "perl" */
5452 for (d = ipathend-4; d >= ipath; --d) {
5453 if ((*d == 'p' || *d == 'P')
5454 && !ibcmp(d, "perl", 4))
5464 #ifdef ALTERNATE_SHEBANG
5466 * If the ALTERNATE_SHEBANG on this system starts with a
5467 * character that can be part of a Perl expression, then if
5468 * we see it but not "perl", we're probably looking at the
5469 * start of Perl code, not a request to hand off to some
5470 * other interpreter. Similarly, if "perl" is there, but
5471 * not in the first 'word' of the line, we assume the line
5472 * contains the start of the Perl program.
5474 if (d && *s != '#') {
5475 const char *c = ipath;
5476 while (*c && !strchr("; \t\r\n\f\v#", *c))
5479 d = NULL; /* "perl" not in first word; ignore */
5481 *s = '#'; /* Don't try to parse shebang line */
5483 #endif /* ALTERNATE_SHEBANG */
5488 !instr(s,"indir") &&
5489 instr(PL_origargv[0],"perl"))
5496 while (s < PL_bufend && isSPACE(*s))
5498 if (s < PL_bufend) {
5499 Newx(newargv,PL_origargc+3,char*);
5501 while (s < PL_bufend && !isSPACE(*s))
5504 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5507 newargv = PL_origargv;
5510 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5512 Perl_croak(aTHX_ "Can't exec %s", ipath);
5515 while (*d && !isSPACE(*d))
5517 while (SPACE_OR_TAB(*d))
5521 const bool switches_done = PL_doswitches;
5522 const U32 oldpdb = PL_perldb;
5523 const bool oldn = PL_minus_n;
5524 const bool oldp = PL_minus_p;
5528 bool baduni = FALSE;
5530 const char *d2 = d1 + 1;
5531 if (parse_unicode_opts((const char **)&d2)
5535 if (baduni || *d1 == 'M' || *d1 == 'm') {
5536 const char * const m = d1;
5537 while (*d1 && !isSPACE(*d1))
5539 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5542 d1 = moreswitches(d1);
5544 if (PL_doswitches && !switches_done) {
5545 int argc = PL_origargc;
5546 char **argv = PL_origargv;
5549 } while (argc && argv[0][0] == '-' && argv[0][1]);
5550 init_argv_symbols(argc,argv);
5552 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5553 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5554 /* if we have already added "LINE: while (<>) {",
5555 we must not do it again */
5557 sv_setpvs(PL_linestr, "");
5558 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5559 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5560 PL_last_lop = PL_last_uni = NULL;
5561 PL_preambled = FALSE;
5562 if (PERLDB_LINE || PERLDB_SAVESRC)
5563 (void)gv_fetchfile(PL_origfilename);
5570 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5571 PL_lex_state = LEX_FORMLINE;
5572 start_force(PL_curforce);
5573 NEXTVAL_NEXTTOKE.ival = 0;
5574 force_next(FORMRBRACK);
5579 #ifdef PERL_STRICT_CR
5580 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5582 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5584 case ' ': case '\t': case '\f': case 013:
5586 PL_realtokenstart = -1;
5589 PL_thiswhite = newSVpvs("");
5590 sv_catpvn(PL_thiswhite, s, 1);
5598 PL_realtokenstart = -1;
5602 if (PL_lex_state != LEX_NORMAL ||
5603 (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5604 if (*s == '#' && s == PL_linestart && PL_in_eval
5605 && !PL_rsfp && !PL_parser->filtered) {
5606 /* handle eval qq[#line 1 "foo"\n ...] */
5607 CopLINE_dec(PL_curcop);
5610 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5612 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5616 const bool in_comment = *s == '#';
5618 while (d < PL_bufend && *d != '\n')
5622 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5623 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5627 PL_thiswhite = newSVpvn(s, d - s);
5630 if (in_comment && d == PL_bufend
5631 && PL_lex_state == LEX_INTERPNORMAL
5632 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5633 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5636 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5637 PL_lex_state = LEX_FORMLINE;
5638 start_force(PL_curforce);
5639 NEXTVAL_NEXTTOKE.ival = 0;
5640 force_next(FORMRBRACK);
5646 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5647 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5650 TOKEN(PEG); /* make sure any #! line is accessible */
5656 if (PL_madskills) d = s;
5657 while (s < PL_bufend && *s != '\n')
5665 else if (s > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5666 Perl_croak(aTHX_ "panic: input overflow");
5668 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5670 PL_thiswhite = newSVpvs("");
5671 if (CopLINE(PL_curcop) == 1) {
5672 sv_setpvs(PL_thiswhite, "");
5675 sv_catpvn(PL_thiswhite, d, s - d);
5682 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5690 while (s < PL_bufend && SPACE_OR_TAB(*s))
5693 if (strnEQ(s,"=>",2)) {
5694 s = force_word(PL_bufptr,WORD,FALSE,FALSE);
5695 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5696 OPERATOR('-'); /* unary minus */
5699 case 'r': ftst = OP_FTEREAD; break;
5700 case 'w': ftst = OP_FTEWRITE; break;
5701 case 'x': ftst = OP_FTEEXEC; break;
5702 case 'o': ftst = OP_FTEOWNED; break;
5703 case 'R': ftst = OP_FTRREAD; break;
5704 case 'W': ftst = OP_FTRWRITE; break;
5705 case 'X': ftst = OP_FTREXEC; break;
5706 case 'O': ftst = OP_FTROWNED; break;
5707 case 'e': ftst = OP_FTIS; break;
5708 case 'z': ftst = OP_FTZERO; break;
5709 case 's': ftst = OP_FTSIZE; break;
5710 case 'f': ftst = OP_FTFILE; break;
5711 case 'd': ftst = OP_FTDIR; break;
5712 case 'l': ftst = OP_FTLINK; break;
5713 case 'p': ftst = OP_FTPIPE; break;
5714 case 'S': ftst = OP_FTSOCK; break;
5715 case 'u': ftst = OP_FTSUID; break;
5716 case 'g': ftst = OP_FTSGID; break;
5717 case 'k': ftst = OP_FTSVTX; break;
5718 case 'b': ftst = OP_FTBLK; break;
5719 case 'c': ftst = OP_FTCHR; break;
5720 case 't': ftst = OP_FTTTY; break;
5721 case 'T': ftst = OP_FTTEXT; break;
5722 case 'B': ftst = OP_FTBINARY; break;
5723 case 'M': case 'A': case 'C':
5724 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5726 case 'M': ftst = OP_FTMTIME; break;
5727 case 'A': ftst = OP_FTATIME; break;
5728 case 'C': ftst = OP_FTCTIME; break;
5736 PL_last_uni = PL_oldbufptr;
5737 PL_last_lop_op = (OPCODE)ftst;
5738 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5739 "### Saw file test %c\n", (int)tmp);
5744 /* Assume it was a minus followed by a one-letter named
5745 * subroutine call (or a -bareword), then. */
5746 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5747 "### '-%c' looked like a file test but was not\n",
5754 const char tmp = *s++;
5757 if (PL_expect == XOPERATOR)
5762 else if (*s == '>') {
5765 if (FEATURE_POSTDEREF_IS_ENABLED && (
5766 ((*s == '$' || *s == '&') && s[1] == '*')
5767 ||(*s == '$' && s[1] == '#' && s[2] == '*')
5768 ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
5769 ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5772 Perl_ck_warner_d(aTHX_
5773 packWARN(WARN_EXPERIMENTAL__POSTDEREF),
5774 "Postfix dereference is experimental"
5776 PL_expect = XPOSTDEREF;
5779 if (isIDFIRST_lazy_if(s,UTF)) {
5780 s = force_word(s,METHOD,FALSE,TRUE);
5788 if (PL_expect == XOPERATOR) {
5789 if (*s == '=' && !PL_lex_allbrackets &&
5790 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5797 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5799 OPERATOR('-'); /* unary minus */
5805 const char tmp = *s++;
5808 if (PL_expect == XOPERATOR)
5813 if (PL_expect == XOPERATOR) {
5814 if (*s == '=' && !PL_lex_allbrackets &&
5815 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5822 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5829 if (PL_expect == XPOSTDEREF) POSTDEREF('*');
5830 if (PL_expect != XOPERATOR) {
5831 s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5832 PL_expect = XOPERATOR;
5833 force_ident(PL_tokenbuf, '*');
5841 if (*s == '=' && !PL_lex_allbrackets &&
5842 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5848 if (*s == '=' && !PL_lex_allbrackets &&
5849 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5853 PL_parser->saw_infix_sigil = 1;
5858 if (PL_expect == XOPERATOR) {
5859 if (s[1] == '=' && !PL_lex_allbrackets &&
5860 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5863 PL_parser->saw_infix_sigil = 1;
5866 else if (PL_expect == XPOSTDEREF) POSTDEREF('%');
5867 PL_tokenbuf[0] = '%';
5868 s = scan_ident(s, PL_tokenbuf + 1,
5869 sizeof PL_tokenbuf - 1, FALSE);
5871 if (!PL_tokenbuf[1]) {
5874 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5876 PL_tokenbuf[0] = '@';
5878 PL_expect = XOPERATOR;
5879 force_ident_maybe_lex('%');
5883 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5884 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5889 if (PL_lex_brackets > 100)
5890 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5891 PL_lex_brackstack[PL_lex_brackets++] = 0;
5892 PL_lex_allbrackets++;
5894 const char tmp = *s++;
5899 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5901 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5904 Perl_ck_warner_d(aTHX_
5905 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5906 "Smartmatch is experimental");
5912 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5919 goto just_a_word_zero_gv;
5922 switch (PL_expect) {
5928 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5930 PL_bufptr = s; /* update in case we back off */
5933 "Use of := for an empty attribute list is not allowed");
5940 PL_expect = XTERMBLOCK;
5943 stuffstart = s - SvPVX(PL_linestr) - 1;
5947 while (isIDFIRST_lazy_if(s,UTF)) {
5950 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5951 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5952 if (tmp < 0) tmp = -tmp;
5967 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5969 d = scan_str(d,TRUE,TRUE,FALSE,FALSE,NULL);
5970 COPLINE_SET_FROM_MULTI_END;
5972 /* MUST advance bufptr here to avoid bogus
5973 "at end of line" context messages from yyerror().
5975 PL_bufptr = s + len;
5976 yyerror("Unterminated attribute parameter in attribute list");
5980 return REPORT(0); /* EOF indicator */
5984 sv_catsv(sv, PL_lex_stuff);
5985 attrs = op_append_elem(OP_LIST, attrs,
5986 newSVOP(OP_CONST, 0, sv));
5987 SvREFCNT_dec(PL_lex_stuff);
5988 PL_lex_stuff = NULL;
5991 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5993 if (PL_in_my == KEY_our) {
5994 deprecate(":unique");
5997 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
6000 /* NOTE: any CV attrs applied here need to be part of
6001 the CVf_BUILTIN_ATTRS define in cv.h! */
6002 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
6004 CvLVALUE_on(PL_compcv);
6006 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
6008 deprecate(":locked");
6010 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
6012 CvMETHOD_on(PL_compcv);
6014 /* After we've set the flags, it could be argued that
6015 we don't need to do the attributes.pm-based setting
6016 process, and shouldn't bother appending recognized
6017 flags. To experiment with that, uncomment the
6018 following "else". (Note that's already been
6019 uncommented. That keeps the above-applied built-in
6020 attributes from being intercepted (and possibly
6021 rejected) by a package's attribute routines, but is
6022 justified by the performance win for the common case
6023 of applying only built-in attributes.) */
6025 attrs = op_append_elem(OP_LIST, attrs,
6026 newSVOP(OP_CONST, 0,
6030 if (*s == ':' && s[1] != ':')
6033 break; /* require real whitespace or :'s */
6034 /* XXX losing whitespace on sequential attributes here */
6037 if (*s != ';' && *s != '}' &&
6038 !(PL_expect == XOPERATOR
6039 ? (*s == '=' || *s == ')')
6040 : (*s == '{' || *s == '('))) {
6041 const char q = ((*s == '\'') ? '"' : '\'');
6042 /* If here for an expression, and parsed no attrs, back
6044 if (PL_expect == XOPERATOR && !attrs) {
6048 /* MUST advance bufptr here to avoid bogus "at end of line"
6049 context messages from yyerror().
6052 yyerror( (const char *)
6054 ? Perl_form(aTHX_ "Invalid separator character "
6055 "%c%c%c in attribute list", q, *s, q)
6056 : "Unterminated attribute list" ) );
6064 start_force(PL_curforce);
6065 NEXTVAL_NEXTTOKE.opval = attrs;
6066 CURMAD('_', PL_nextwhite);
6071 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
6072 (s - SvPVX(PL_linestr)) - stuffstart);
6077 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
6081 PL_lex_allbrackets--;
6085 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6086 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
6090 PL_lex_allbrackets++;
6093 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6099 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6102 PL_lex_allbrackets--;
6108 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6111 if (PL_lex_brackets <= 0)
6112 /* diag_listed_as: Unmatched right %s bracket */
6113 yyerror("Unmatched right square bracket");
6116 PL_lex_allbrackets--;
6117 if (PL_lex_state == LEX_INTERPNORMAL) {
6118 if (PL_lex_brackets == 0) {
6119 if (*s == '-' && s[1] == '>')
6120 PL_lex_state = LEX_INTERPENDMAYBE;
6121 else if (*s != '[' && *s != '{')
6122 PL_lex_state = LEX_INTERPEND;
6129 if (PL_lex_brackets > 100) {
6130 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6132 switch (PL_expect) {
6134 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6135 PL_lex_allbrackets++;
6136 OPERATOR(HASHBRACK);
6138 while (s < PL_bufend && SPACE_OR_TAB(*s))
6141 PL_tokenbuf[0] = '\0';
6142 if (d < PL_bufend && *d == '-') {
6143 PL_tokenbuf[0] = '-';
6145 while (d < PL_bufend && SPACE_OR_TAB(*d))
6148 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
6149 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6151 while (d < PL_bufend && SPACE_OR_TAB(*d))
6154 const char minus = (PL_tokenbuf[0] == '-');
6155 s = force_word(s + minus, WORD, FALSE, TRUE);
6163 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6164 PL_lex_allbrackets++;
6169 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6170 PL_lex_allbrackets++;
6175 if (PL_oldoldbufptr == PL_last_lop)
6176 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6178 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6179 PL_lex_allbrackets++;
6182 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6184 /* This hack is to get the ${} in the message. */
6186 yyerror("syntax error");
6189 OPERATOR(HASHBRACK);
6191 /* This hack serves to disambiguate a pair of curlies
6192 * as being a block or an anon hash. Normally, expectation
6193 * determines that, but in cases where we're not in a
6194 * position to expect anything in particular (like inside
6195 * eval"") we have to resolve the ambiguity. This code
6196 * covers the case where the first term in the curlies is a
6197 * quoted string. Most other cases need to be explicitly
6198 * disambiguated by prepending a "+" before the opening
6199 * curly in order to force resolution as an anon hash.
6201 * XXX should probably propagate the outer expectation
6202 * into eval"" to rely less on this hack, but that could
6203 * potentially break current behavior of eval"".
6207 if (*s == '\'' || *s == '"' || *s == '`') {
6208 /* common case: get past first string, handling escapes */
6209 for (t++; t < PL_bufend && *t != *s;)
6210 if (*t++ == '\\' && (*t == '\\' || *t == *s))
6214 else if (*s == 'q') {
6217 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6218 && !isWORDCHAR(*t))))
6220 /* skip q//-like construct */
6222 char open, close, term;
6225 while (t < PL_bufend && isSPACE(*t))
6227 /* check for q => */
6228 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6229 OPERATOR(HASHBRACK);
6233 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
6237 for (t++; t < PL_bufend; t++) {
6238 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6240 else if (*t == open)
6244 for (t++; t < PL_bufend; t++) {
6245 if (*t == '\\' && t+1 < PL_bufend)
6247 else if (*t == close && --brackets <= 0)
6249 else if (*t == open)
6256 /* skip plain q word */
6257 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
6260 else if (isWORDCHAR_lazy_if(t,UTF)) {
6262 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
6265 while (t < PL_bufend && isSPACE(*t))
6267 /* if comma follows first term, call it an anon hash */
6268 /* XXX it could be a comma expression with loop modifiers */
6269 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6270 || (*t == '=' && t[1] == '>')))
6271 OPERATOR(HASHBRACK);
6272 if (PL_expect == XREF)
6275 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6281 pl_yylval.ival = CopLINE(PL_curcop);
6282 if (isSPACE(*s) || *s == '#')
6283 PL_copline = NOLINE; /* invalidate current command line number */
6284 TOKEN(formbrack ? '=' : '{');
6286 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6290 if (PL_lex_brackets <= 0)
6291 /* diag_listed_as: Unmatched right %s bracket */
6292 yyerror("Unmatched right curly bracket");
6294 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6295 PL_lex_allbrackets--;
6296 if (PL_lex_state == LEX_INTERPNORMAL) {
6297 if (PL_lex_brackets == 0) {
6298 if (PL_expect & XFAKEBRACK) {
6299 PL_expect &= XENUMMASK;
6300 PL_lex_state = LEX_INTERPEND;
6305 PL_thiswhite = newSVpvs("");
6306 sv_catpvs(PL_thiswhite,"}");
6309 return yylex(); /* ignore fake brackets */
6311 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6312 && SvEVALED(PL_lex_repl))
6313 PL_lex_state = LEX_INTERPEND;
6314 else if (*s == '-' && s[1] == '>')
6315 PL_lex_state = LEX_INTERPENDMAYBE;
6316 else if (*s != '[' && *s != '{')
6317 PL_lex_state = LEX_INTERPEND;
6320 if (PL_expect & XFAKEBRACK) {
6321 PL_expect &= XENUMMASK;
6323 return yylex(); /* ignore fake brackets */
6325 start_force(PL_curforce);
6327 curmad('X', newSVpvn(s-1,1));
6328 CURMAD('_', PL_thiswhite);
6330 force_next(formbrack ? '.' : '}');
6331 if (formbrack) LEAVE;
6333 if (PL_madskills && !PL_thistoken)
6334 PL_thistoken = newSVpvs("");
6336 if (formbrack == 2) { /* means . where arguments were expected */
6337 start_force(PL_curforce);
6343 if (PL_expect == XPOSTDEREF) POSTDEREF('&');
6346 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6347 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6354 if (PL_expect == XOPERATOR) {
6355 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
6356 && isIDFIRST_lazy_if(s,UTF))
6358 CopLINE_dec(PL_curcop);
6359 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6360 CopLINE_inc(PL_curcop);
6362 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6363 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6367 PL_parser->saw_infix_sigil = 1;
6371 PL_tokenbuf[0] = '&';
6372 s = scan_ident(s - 1, PL_tokenbuf + 1,
6373 sizeof PL_tokenbuf - 1, TRUE);
6374 if (PL_tokenbuf[1]) {
6375 PL_expect = XOPERATOR;
6376 force_ident_maybe_lex('&');
6380 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6386 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6387 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6394 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6395 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6403 const char tmp = *s++;
6405 if (!PL_lex_allbrackets &&
6406 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6413 if (!PL_lex_allbrackets &&
6414 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
6422 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6423 && strchr("+-*/%.^&|<",tmp))
6424 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6425 "Reversed %c= operator",(int)tmp);
6427 if (PL_expect == XSTATE && isALPHA(tmp) &&
6428 (s == PL_linestart+1 || s[-2] == '\n') )
6430 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6431 || PL_lex_state != LEX_NORMAL) {
6436 if (strnEQ(s,"=cut",4)) {
6452 PL_thiswhite = newSVpvs("");
6453 sv_catpvn(PL_thiswhite, PL_linestart,
6454 PL_bufend - PL_linestart);
6458 PL_parser->in_pod = 1;
6462 if (PL_expect == XBLOCK) {
6464 #ifdef PERL_STRICT_CR
6465 while (SPACE_OR_TAB(*t))
6467 while (SPACE_OR_TAB(*t) || *t == '\r')
6470 if (*t == '\n' || *t == '#') {
6473 SAVEI8(PL_parser->form_lex_state);
6474 SAVEI32(PL_lex_formbrack);
6475 PL_parser->form_lex_state = PL_lex_state;
6476 PL_lex_formbrack = PL_lex_brackets + 1;
6480 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6489 const char tmp = *s++;
6491 /* was this !=~ where !~ was meant?
6492 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6494 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6495 const char *t = s+1;
6497 while (t < PL_bufend && isSPACE(*t))
6500 if (*t == '/' || *t == '?' ||
6501 ((*t == 'm' || *t == 's' || *t == 'y')
6502 && !isWORDCHAR(t[1])) ||
6503 (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6504 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6505 "!=~ should be !~");
6507 if (!PL_lex_allbrackets &&
6508 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6520 if (PL_expect != XOPERATOR) {
6521 if (s[1] != '<' && !strchr(s,'>'))
6524 s = scan_heredoc(s);
6526 s = scan_inputsymbol(s);
6527 PL_expect = XOPERATOR;
6528 TOKEN(sublex_start());
6534 if (*s == '=' && !PL_lex_allbrackets &&
6535 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6539 SHop(OP_LEFT_SHIFT);
6544 if (!PL_lex_allbrackets &&
6545 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6552 if (!PL_lex_allbrackets &&
6553 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6561 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6569 const char tmp = *s++;
6571 if (*s == '=' && !PL_lex_allbrackets &&
6572 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6576 SHop(OP_RIGHT_SHIFT);
6578 else if (tmp == '=') {
6579 if (!PL_lex_allbrackets &&
6580 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6588 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6597 if (PL_expect == XOPERATOR) {
6598 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6599 return deprecate_commaless_var_list();
6602 else if (PL_expect == XPOSTDEREF) {
6605 POSTDEREF(DOLSHARP);
6610 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6611 PL_tokenbuf[0] = '@';
6612 s = scan_ident(s + 1, PL_tokenbuf + 1,
6613 sizeof PL_tokenbuf - 1, FALSE);
6614 if (PL_expect == XOPERATOR)
6615 no_op("Array length", s);
6616 if (!PL_tokenbuf[1])
6618 PL_expect = XOPERATOR;
6619 force_ident_maybe_lex('#');
6623 PL_tokenbuf[0] = '$';
6624 s = scan_ident(s, PL_tokenbuf + 1,
6625 sizeof PL_tokenbuf - 1, FALSE);
6626 if (PL_expect == XOPERATOR)
6628 if (!PL_tokenbuf[1]) {
6630 yyerror("Final $ should be \\$ or $name");
6636 const char tmp = *s;
6637 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6640 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6641 && intuit_more(s)) {
6643 PL_tokenbuf[0] = '@';
6644 if (ckWARN(WARN_SYNTAX)) {
6647 while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
6650 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6651 while (t < PL_bufend && *t != ']')
6653 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6654 "Multidimensional syntax %.*s not supported",
6655 (int)((t - PL_bufptr) + 1), PL_bufptr);
6659 else if (*s == '{') {
6661 PL_tokenbuf[0] = '%';
6662 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6663 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6665 char tmpbuf[sizeof PL_tokenbuf];
6668 } while (isSPACE(*t));
6669 if (isIDFIRST_lazy_if(t,UTF)) {
6671 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6676 && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6677 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6678 "You need to quote \"%"UTF8f"\"",
6679 UTF8fARG(UTF, len, tmpbuf));
6685 PL_expect = XOPERATOR;
6686 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6687 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6688 if (!islop || PL_last_lop_op == OP_GREPSTART)
6689 PL_expect = XOPERATOR;
6690 else if (strchr("$@\"'`q", *s))
6691 PL_expect = XTERM; /* e.g. print $fh "foo" */
6692 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6693 PL_expect = XTERM; /* e.g. print $fh &sub */
6694 else if (isIDFIRST_lazy_if(s,UTF)) {
6695 char tmpbuf[sizeof PL_tokenbuf];
6697 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6698 if ((t2 = keyword(tmpbuf, len, 0))) {
6699 /* binary operators exclude handle interpretations */
6711 PL_expect = XTERM; /* e.g. print $fh length() */
6716 PL_expect = XTERM; /* e.g. print $fh subr() */
6719 else if (isDIGIT(*s))
6720 PL_expect = XTERM; /* e.g. print $fh 3 */
6721 else if (*s == '.' && isDIGIT(s[1]))
6722 PL_expect = XTERM; /* e.g. print $fh .3 */
6723 else if ((*s == '?' || *s == '-' || *s == '+')
6724 && !isSPACE(s[1]) && s[1] != '=')
6725 PL_expect = XTERM; /* e.g. print $fh -1 */
6726 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6728 PL_expect = XTERM; /* e.g. print $fh /.../
6729 XXX except DORDOR operator
6731 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6733 PL_expect = XTERM; /* print $fh <<"EOF" */
6736 force_ident_maybe_lex('$');
6740 if (PL_expect == XOPERATOR)
6742 else if (PL_expect == XPOSTDEREF) POSTDEREF('@');
6743 PL_tokenbuf[0] = '@';
6744 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6746 if (!PL_tokenbuf[1]) {
6749 if (PL_lex_state == LEX_NORMAL)
6751 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6753 PL_tokenbuf[0] = '%';
6755 /* Warn about @ where they meant $. */
6756 if (*s == '[' || *s == '{') {
6757 if (ckWARN(WARN_SYNTAX)) {
6758 S_check_scalar_slice(aTHX_ s);
6762 PL_expect = XOPERATOR;
6763 force_ident_maybe_lex('@');
6766 case '/': /* may be division, defined-or, or pattern */
6767 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6768 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6769 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6775 case '?': /* may either be conditional or pattern */
6776 if (PL_expect == XOPERATOR) {
6779 if (!PL_lex_allbrackets &&
6780 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6784 PL_lex_allbrackets++;
6790 /* A // operator. */
6791 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6792 (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6793 LEX_FAKEEOF_LOGIC)) {
6801 if (*s == '=' && !PL_lex_allbrackets &&
6802 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6811 /* Disable warning on "study /blah/" */
6812 if (PL_oldoldbufptr == PL_last_uni
6813 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6814 || memNE(PL_last_uni, "study", 5)
6815 || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
6819 deprecate("?PATTERN? without explicit operator");
6820 s = scan_pat(s,OP_MATCH);
6821 TERM(sublex_start());
6825 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6826 #ifdef PERL_STRICT_CR
6829 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6831 && (s == PL_linestart || s[-1] == '\n') )
6834 formbrack = 2; /* dot seen where arguments expected */
6837 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6841 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6844 if (!PL_lex_allbrackets &&
6845 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6852 pl_yylval.ival = OPf_SPECIAL;
6858 if (*s == '=' && !PL_lex_allbrackets &&
6859 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6866 case '0': case '1': case '2': case '3': case '4':
6867 case '5': case '6': case '7': case '8': case '9':
6868 s = scan_num(s, &pl_yylval);
6869 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6870 if (PL_expect == XOPERATOR)
6875 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
6878 COPLINE_SET_FROM_MULTI_END;
6879 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6880 if (PL_expect == XOPERATOR) {
6881 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6882 return deprecate_commaless_var_list();
6887 pl_yylval.ival = OP_CONST;
6888 TERM(sublex_start());
6891 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
6894 printbuf("### Saw string before %s\n", s);
6896 PerlIO_printf(Perl_debug_log,
6897 "### Saw unterminated string\n");
6899 if (PL_expect == XOPERATOR) {
6900 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6901 return deprecate_commaless_var_list();
6908 pl_yylval.ival = OP_CONST;
6909 /* FIXME. I think that this can be const if char *d is replaced by
6910 more localised variables. */
6911 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6912 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6913 pl_yylval.ival = OP_STRINGIFY;
6917 if (pl_yylval.ival == OP_CONST)
6918 COPLINE_SET_FROM_MULTI_END;
6919 TERM(sublex_start());
6922 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
6923 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6924 if (PL_expect == XOPERATOR)
6925 no_op("Backticks",s);
6928 pl_yylval.ival = OP_BACKTICK;
6929 TERM(sublex_start());
6933 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6935 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6937 if (PL_expect == XOPERATOR)
6938 no_op("Backslash",s);
6942 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6943 char *start = s + 2;
6944 while (isDIGIT(*start) || *start == '_')
6946 if (*start == '.' && isDIGIT(start[1])) {
6947 s = scan_num(s, &pl_yylval);
6950 else if ((*start == ':' && start[1] == ':')
6951 || (PL_expect == XSTATE && *start == ':'))
6953 else if (PL_expect == XSTATE) {
6955 while (d < PL_bufend && isSPACE(*d)) d++;
6956 if (*d == ':') goto keylookup;
6958 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6959 if (!isALPHA(*start) && (PL_expect == XTERM
6960 || PL_expect == XREF || PL_expect == XSTATE
6961 || PL_expect == XTERMORDORDOR)) {
6962 GV *const gv = gv_fetchpvn_flags(s, start - s,
6963 UTF ? SVf_UTF8 : 0, SVt_PVCV);
6965 s = scan_num(s, &pl_yylval);
6972 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
7025 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7027 /* Some keywords can be followed by any delimiter, including ':' */
7028 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
7030 /* x::* is just a word, unless x is "CORE" */
7031 if (!anydelim && *s == ':' && s[1] == ':') {
7032 if (strEQ(PL_tokenbuf, "CORE")) goto case_KEY_CORE;
7037 while (d < PL_bufend && isSPACE(*d))
7038 d++; /* no comments skipped here, or s### is misparsed */
7040 /* Is this a word before a => operator? */
7041 if (*d == '=' && d[1] == '>') {
7045 = (OP*)newSVOP(OP_CONST, 0,
7046 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7047 pl_yylval.opval->op_private = OPpCONST_BARE;
7051 /* Check for plugged-in keyword */
7055 char *saved_bufptr = PL_bufptr;
7057 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
7059 if (result == KEYWORD_PLUGIN_DECLINE) {
7060 /* not a plugged-in keyword */
7061 PL_bufptr = saved_bufptr;
7062 } else if (result == KEYWORD_PLUGIN_STMT) {
7063 pl_yylval.opval = o;
7066 return REPORT(PLUGSTMT);
7067 } else if (result == KEYWORD_PLUGIN_EXPR) {
7068 pl_yylval.opval = o;
7070 PL_expect = XOPERATOR;
7071 return REPORT(PLUGEXPR);
7073 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
7078 /* Check for built-in keyword */
7079 tmp = keyword(PL_tokenbuf, len, 0);
7081 /* Is this a label? */
7082 if (!anydelim && PL_expect == XSTATE
7083 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
7085 pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
7086 pl_yylval.pval[len] = '\0';
7087 pl_yylval.pval[len+1] = UTF ? 1 : 0;
7092 /* Check for lexical sub */
7093 if (PL_expect != XOPERATOR) {
7094 char tmpbuf[sizeof PL_tokenbuf + 1];
7096 Copy(PL_tokenbuf, tmpbuf+1, len, char);
7097 off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
7098 if (off != NOT_IN_PAD) {
7099 assert(off); /* we assume this is boolean-true below */
7100 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
7101 HV * const stash = PAD_COMPNAME_OURSTASH(off);
7102 HEK * const stashname = HvNAME_HEK(stash);
7103 sv = newSVhek(stashname);
7104 sv_catpvs(sv, "::");
7105 sv_catpvn_flags(sv, PL_tokenbuf, len,
7106 (UTF ? SV_CATUTF8 : SV_CATBYTES));
7107 gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
7117 rv2cv_op = newOP(OP_PADANY, 0);
7118 rv2cv_op->op_targ = off;
7119 cv = find_lexical_cv(off);
7127 if (tmp < 0) { /* second-class keyword? */
7128 GV *ogv = NULL; /* override (winner) */
7129 GV *hgv = NULL; /* hidden (loser) */
7130 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
7132 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7133 (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
7137 if (GvIMPORTED_CV(gv))
7139 else if (! CvMETHOD(cv))
7143 (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
7147 ? GvCVu(gv) && GvIMPORTED_CV(gv)
7148 : SvPCS_IMPORTED(gv)
7149 && (gv_init(gv, PL_globalstash, PL_tokenbuf,
7158 tmp = 0; /* overridden by import or by GLOBAL */
7161 && -tmp==KEY_lock /* XXX generalizable kludge */
7164 tmp = 0; /* any sub overrides "weak" keyword */
7166 else { /* no override */
7168 if (tmp == KEY_dump) {
7169 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
7170 "dump() better written as CORE::dump()");
7174 if (hgv && tmp != KEY_x) /* never ambiguous */
7175 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7176 "Ambiguous call resolved as CORE::%s(), "
7177 "qualify as such or use &",
7182 if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
7183 && (!anydelim || *s != '#')) {
7184 /* no override, and not s### either; skipspace is safe here
7185 * check for => on following line */
7187 STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
7188 STRLEN soff = s - SvPVX(PL_linestr);
7189 s = skipspace_flags(s, LEX_NO_INCLINE);
7190 arrow = *s == '=' && s[1] == '>';
7191 PL_bufptr = SvPVX(PL_linestr) + bufoff;
7192 s = SvPVX(PL_linestr) + soff;
7200 default: /* not a keyword */
7201 /* Trade off - by using this evil construction we can pull the
7202 variable gv into the block labelled keylookup. If not, then
7203 we have to give it function scope so that the goto from the
7204 earlier ':' case doesn't bypass the initialisation. */
7206 just_a_word_zero_gv:
7218 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7219 const char penultchar =
7220 lastchar && PL_bufptr - 2 >= PL_linestart
7224 SV *nextPL_nextwhite = 0;
7228 /* Get the rest if it looks like a package qualifier */
7230 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7232 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7235 Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
7236 UTF8fARG(UTF, len, PL_tokenbuf),
7237 *s == '\'' ? "'" : "::");
7242 if (PL_expect == XOPERATOR) {
7243 if (PL_bufptr == PL_linestart) {
7244 CopLINE_dec(PL_curcop);
7245 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7246 CopLINE_inc(PL_curcop);
7249 no_op("Bareword",s);
7252 /* Look for a subroutine with this name in current package,
7253 unless this is a lexical sub, or name is "Foo::",
7254 in which case Foo is a bareword
7255 (and a package name). */
7257 if (len > 2 && !PL_madskills &&
7258 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
7260 if (ckWARN(WARN_BAREWORD)
7261 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7262 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7263 "Bareword \"%"UTF8f"\" refers to nonexistent package",
7264 UTF8fARG(UTF, len, PL_tokenbuf));
7266 PL_tokenbuf[len] = '\0';
7272 /* Mustn't actually add anything to a symbol table.
7273 But also don't want to "initialise" any placeholder
7274 constants that might already be there into full
7275 blown PVGVs with attached PVCV. */
7276 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7277 GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
7283 /* if we saw a global override before, get the right name */
7286 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
7287 len ? len : strlen(PL_tokenbuf));
7289 SV * const tmp_sv = sv;
7290 sv = newSVpvs("CORE::GLOBAL::");
7291 sv_catsv(sv, tmp_sv);
7292 SvREFCNT_dec(tmp_sv);
7296 if (PL_madskills && !PL_thistoken) {
7297 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
7298 PL_thistoken = newSVpvn(start,s - start);
7299 PL_realtokenstart = s - SvPVX(PL_linestr);
7303 /* Presume this is going to be a bareword of some sort. */
7305 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
7306 pl_yylval.opval->op_private = OPpCONST_BARE;
7308 /* And if "Foo::", then that's what it certainly is. */
7314 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
7315 const_op->op_private = OPpCONST_BARE;
7316 rv2cv_op = newCVREF(0, const_op);
7317 cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0);
7320 /* See if it's the indirect object for a list operator. */
7322 if (PL_oldoldbufptr &&
7323 PL_oldoldbufptr < PL_bufptr &&
7324 (PL_oldoldbufptr == PL_last_lop
7325 || PL_oldoldbufptr == PL_last_uni) &&
7326 /* NO SKIPSPACE BEFORE HERE! */
7327 (PL_expect == XREF ||
7328 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
7330 bool immediate_paren = *s == '(';
7332 /* (Now we can afford to cross potential line boundary.) */
7333 s = SKIPSPACE2(s,nextPL_nextwhite);
7335 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
7338 /* Two barewords in a row may indicate method call. */
7340 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
7341 (tmp = intuit_method(s, gv, cv))) {
7343 if (tmp == METHOD && !PL_lex_allbrackets &&
7344 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7345 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7349 /* If not a declared subroutine, it's an indirect object. */
7350 /* (But it's an indir obj regardless for sort.) */
7351 /* Also, if "_" follows a filetest operator, it's a bareword */
7354 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
7356 (PL_last_lop_op != OP_MAPSTART &&
7357 PL_last_lop_op != OP_GREPSTART))))
7358 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7359 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
7362 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7367 PL_expect = XOPERATOR;
7370 s = SKIPSPACE2(s,nextPL_nextwhite);
7371 PL_nextwhite = nextPL_nextwhite;
7376 /* Is this a word before a => operator? */
7377 if (*s == '=' && s[1] == '>' && !pkgname) {
7380 /* This is our own scalar, created a few lines above,
7382 SvREADONLY_off(cSVOPx(pl_yylval.opval)->op_sv);
7383 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
7384 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
7385 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
7386 SvREADONLY_on(cSVOPx(pl_yylval.opval)->op_sv);
7390 /* If followed by a paren, it's certainly a subroutine. */
7395 while (SPACE_OR_TAB(*d))
7397 if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
7404 PL_nextwhite = PL_thiswhite;
7407 start_force(PL_curforce);
7409 NEXTVAL_NEXTTOKE.opval =
7410 off ? rv2cv_op : pl_yylval.opval;
7411 PL_expect = XOPERATOR;
7414 PL_nextwhite = nextPL_nextwhite;
7415 curmad('X', PL_thistoken);
7416 PL_thistoken = newSVpvs("");
7420 op_free(pl_yylval.opval), force_next(PRIVATEREF);
7421 else op_free(rv2cv_op), force_next(WORD);
7426 /* If followed by var or block, call it a method (unless sub) */
7428 if ((*s == '$' || *s == '{') && !cv) {
7430 PL_last_lop = PL_oldbufptr;
7431 PL_last_lop_op = OP_METHOD;
7432 if (!PL_lex_allbrackets &&
7433 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7434 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7438 /* If followed by a bareword, see if it looks like indir obj. */
7441 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
7442 && (tmp = intuit_method(s, gv, cv))) {
7444 if (tmp == METHOD && !PL_lex_allbrackets &&
7445 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7446 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7450 /* Not a method, so call it a subroutine (if defined) */
7453 if (lastchar == '-' && penultchar != '-') {
7454 const STRLEN l = len ? len : strlen(PL_tokenbuf);
7455 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7456 "Ambiguous use of -%"UTF8f" resolved as -&%"UTF8f"()",
7457 UTF8fARG(UTF, l, PL_tokenbuf),
7458 UTF8fARG(UTF, l, PL_tokenbuf));
7460 /* Check for a constant sub */
7461 if ((sv = cv_const_sv_or_av(cv))) {
7464 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7465 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7466 if (SvTYPE(sv) == SVt_PVAV)
7467 pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7470 pl_yylval.opval->op_private = 0;
7471 pl_yylval.opval->op_folded = 1;
7472 pl_yylval.opval->op_flags |= OPf_SPECIAL;
7477 op_free(pl_yylval.opval);
7479 off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7480 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7481 PL_last_lop = PL_oldbufptr;
7482 PL_last_lop_op = OP_ENTERSUB;
7483 /* Is there a prototype? */
7490 STRLEN protolen = CvPROTOLEN(cv);
7491 const char *proto = CvPROTO(cv);
7493 proto = S_strip_spaces(aTHX_ proto, &protolen);
7496 if ((optional = *proto == ';'))
7499 while (*proto == ';');
7503 *proto == '$' || *proto == '_'
7504 || *proto == '*' || *proto == '+'
7509 *proto == '\\' && proto[1] && proto[2] == '\0'
7512 UNIPROTO(UNIOPSUB,optional);
7513 if (*proto == '\\' && proto[1] == '[') {
7514 const char *p = proto + 2;
7515 while(*p && *p != ']')
7517 if(*p == ']' && !p[1])
7518 UNIPROTO(UNIOPSUB,optional);
7520 if (*proto == '&' && *s == '{') {
7522 sv_setpvs(PL_subname, "__ANON__");
7524 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7525 if (!PL_lex_allbrackets &&
7526 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7527 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7534 PL_nextwhite = PL_thiswhite;
7537 start_force(PL_curforce);
7538 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7541 PL_nextwhite = nextPL_nextwhite;
7542 curmad('X', PL_thistoken);
7543 PL_thistoken = newSVpvs("");
7545 force_next(off ? PRIVATEREF : WORD);
7546 if (!PL_lex_allbrackets &&
7547 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7548 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7553 /* Guess harder when madskills require "best effort". */
7554 if (PL_madskills && (!gv || !GvCVu(gv))) {
7555 int probable_sub = 0;
7556 if (strchr("\"'`$@%0123456789!*+{[<", *s))
7558 else if (isALPHA(*s)) {
7562 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
7563 if (!keyword(tmpbuf, tmplen, 0))
7566 while (d < PL_bufend && isSPACE(*d))
7568 if (*d == '=' && d[1] == '>')
7573 gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
7575 op_free(pl_yylval.opval);
7577 off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7578 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7579 PL_last_lop = PL_oldbufptr;
7580 PL_last_lop_op = OP_ENTERSUB;
7581 PL_nextwhite = PL_thiswhite;
7583 start_force(PL_curforce);
7584 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7586 PL_nextwhite = nextPL_nextwhite;
7587 curmad('X', PL_thistoken);
7588 PL_thistoken = newSVpvs("");
7589 force_next(off ? PRIVATEREF : WORD);
7590 if (!PL_lex_allbrackets &&
7591 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7592 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7596 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7598 force_next(off ? PRIVATEREF : WORD);
7599 if (!PL_lex_allbrackets &&
7600 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7601 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7606 /* Call it a bare word */
7608 if (PL_hints & HINT_STRICT_SUBS)
7609 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7612 /* after "print" and similar functions (corresponding to
7613 * "F? L" in opcode.pl), whatever wasn't already parsed as
7614 * a filehandle should be subject to "strict subs".
7615 * Likewise for the optional indirect-object argument to system
7616 * or exec, which can't be a bareword */
7617 if ((PL_last_lop_op == OP_PRINT
7618 || PL_last_lop_op == OP_PRTF
7619 || PL_last_lop_op == OP_SAY
7620 || PL_last_lop_op == OP_SYSTEM
7621 || PL_last_lop_op == OP_EXEC)
7622 && (PL_hints & HINT_STRICT_SUBS))
7623 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7624 if (lastchar != '-') {
7625 if (ckWARN(WARN_RESERVED)) {
7629 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7631 /* PL_warn_reserved is constant */
7632 GCC_DIAG_IGNORE(-Wformat-nonliteral);
7633 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7643 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7644 && saw_infix_sigil) {
7645 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7646 "Operator or semicolon missing before %c%"UTF8f,
7648 UTF8fARG(UTF, strlen(PL_tokenbuf),
7650 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7651 "Ambiguous use of %c resolved as operator %c",
7652 lastchar, lastchar);
7659 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7664 (OP*)newSVOP(OP_CONST, 0,
7665 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7668 case KEY___PACKAGE__:
7670 (OP*)newSVOP(OP_CONST, 0,
7672 ? newSVhek(HvNAME_HEK(PL_curstash))
7679 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7680 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
7683 gv = (GV *)*hv_fetchs(stash, "DATA", 1);
7685 gv_init(gv,stash,"DATA",4,0);
7688 GvIOp(gv) = newIO();
7689 IoIFP(GvIOp(gv)) = PL_rsfp;
7690 #if defined(HAS_FCNTL) && defined(F_SETFD)
7692 const int fd = PerlIO_fileno(PL_rsfp);
7693 fcntl(fd,F_SETFD,fd >= 3);
7696 /* Mark this internal pseudo-handle as clean */
7697 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7698 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7699 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7701 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7702 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7703 /* if the script was opened in binmode, we need to revert
7704 * it to text mode for compatibility; but only iff it has CRs
7705 * XXX this is a questionable hack at best. */
7706 if (PL_bufend-PL_bufptr > 2
7707 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7710 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7711 loc = PerlIO_tell(PL_rsfp);
7712 (void)PerlIO_seek(PL_rsfp, 0L, 0);
7715 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7717 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7718 #endif /* NETWARE */
7720 PerlIO_seek(PL_rsfp, loc, 0);
7724 #ifdef PERLIO_LAYERS
7727 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7728 else if (PL_encoding) {
7734 XPUSHs(PL_encoding);
7736 call_method("name", G_SCALAR);
7740 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7741 Perl_form(aTHX_ ":encoding(%"SVf")",
7750 if (PL_realtokenstart >= 0) {
7751 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7753 PL_endwhite = newSVpvs("");
7754 sv_catsv(PL_endwhite, PL_thiswhite);
7756 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7757 PL_realtokenstart = -1;
7759 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7769 FUN0OP(newPVOP(OP_RUNCV,0,NULL));
7778 if (PL_expect == XSTATE) {
7789 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7790 if ((*s == ':' && s[1] == ':')
7791 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7795 Copy(PL_bufptr, PL_tokenbuf, olen, char);
7799 Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
7800 UTF8fARG(UTF, len, PL_tokenbuf));
7803 else if (tmp == KEY_require || tmp == KEY_do
7805 /* that's a way to remember we saw "CORE::" */
7817 LOP(OP_ACCEPT,XTERM);
7820 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7825 LOP(OP_ATAN2,XTERM);
7831 LOP(OP_BINMODE,XTERM);
7834 LOP(OP_BLESS,XTERM);
7843 /* We have to disambiguate the two senses of
7844 "continue". If the next token is a '{' then
7845 treat it as the start of a continue block;
7846 otherwise treat it as a control operator.
7856 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7866 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7875 if (!PL_cryptseen) {
7876 PL_cryptseen = TRUE;
7880 LOP(OP_CRYPT,XTERM);
7883 LOP(OP_CHMOD,XTERM);
7886 LOP(OP_CHOWN,XTERM);
7889 LOP(OP_CONNECT,XTERM);
7909 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7911 if (len && (len != 4 || strNE(PL_tokenbuf+1, "CORE"))
7912 && !keyword(PL_tokenbuf + 1, len, 0)) {
7915 force_ident_maybe_lex('&');
7920 if (orig_keyword == KEY_do) {
7929 PL_hints |= HINT_BLOCK_SCOPE;
7939 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7940 STR_WITH_LEN("NDBM_File::"),
7941 STR_WITH_LEN("DB_File::"),
7942 STR_WITH_LEN("GDBM_File::"),
7943 STR_WITH_LEN("SDBM_File::"),
7944 STR_WITH_LEN("ODBM_File::"),
7946 LOP(OP_DBMOPEN,XTERM);
7952 PL_expect = XOPERATOR;
7953 s = force_word(s,WORD,TRUE,FALSE);
7960 pl_yylval.ival = CopLINE(PL_curcop);
7964 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7978 if (*s == '{') { /* block eval */
7979 PL_expect = XTERMBLOCK;
7980 UNIBRACK(OP_ENTERTRY);
7982 else { /* string eval */
7984 UNIBRACK(OP_ENTEREVAL);
7989 UNIBRACK(-OP_ENTEREVAL);
8003 case KEY_endhostent:
8009 case KEY_endservent:
8012 case KEY_endprotoent:
8023 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8025 pl_yylval.ival = CopLINE(PL_curcop);
8027 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
8030 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
8033 if ((PL_bufend - p) >= 3 &&
8034 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
8036 else if ((PL_bufend - p) >= 4 &&
8037 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
8040 /* skip optional package name, as in "for my abc $x (..)" */
8041 if (isIDFIRST_lazy_if(p,UTF)) {
8042 p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8046 Perl_croak(aTHX_ "Missing $ on loop variable");
8048 s = SvPVX(PL_linestr) + soff;
8054 LOP(OP_FORMLINE,XTERM);
8063 LOP(OP_FCNTL,XTERM);
8069 LOP(OP_FLOCK,XTERM);
8072 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8077 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8082 LOP(OP_GREPSTART, XREF);
8085 PL_expect = XOPERATOR;
8086 s = force_word(s,WORD,TRUE,FALSE);
8101 case KEY_getpriority:
8102 LOP(OP_GETPRIORITY,XTERM);
8104 case KEY_getprotobyname:
8107 case KEY_getprotobynumber:
8108 LOP(OP_GPBYNUMBER,XTERM);
8110 case KEY_getprotoent:
8122 case KEY_getpeername:
8123 UNI(OP_GETPEERNAME);
8125 case KEY_gethostbyname:
8128 case KEY_gethostbyaddr:
8129 LOP(OP_GHBYADDR,XTERM);
8131 case KEY_gethostent:
8134 case KEY_getnetbyname:
8137 case KEY_getnetbyaddr:
8138 LOP(OP_GNBYADDR,XTERM);
8143 case KEY_getservbyname:
8144 LOP(OP_GSBYNAME,XTERM);
8146 case KEY_getservbyport:
8147 LOP(OP_GSBYPORT,XTERM);
8149 case KEY_getservent:
8152 case KEY_getsockname:
8153 UNI(OP_GETSOCKNAME);
8155 case KEY_getsockopt:
8156 LOP(OP_GSOCKOPT,XTERM);
8171 pl_yylval.ival = CopLINE(PL_curcop);
8172 Perl_ck_warner_d(aTHX_
8173 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8174 "given is experimental");
8179 orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB,
8187 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8189 pl_yylval.ival = CopLINE(PL_curcop);
8193 LOP(OP_INDEX,XTERM);
8199 LOP(OP_IOCTL,XTERM);
8211 PL_expect = XOPERATOR;
8212 s = force_word(s,WORD,TRUE,FALSE);
8229 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8234 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8248 LOP(OP_LISTEN,XTERM);
8257 s = scan_pat(s,OP_MATCH);
8258 TERM(sublex_start());
8261 LOP(OP_MAPSTART, XREF);
8264 LOP(OP_MKDIR,XTERM);
8267 LOP(OP_MSGCTL,XTERM);
8270 LOP(OP_MSGGET,XTERM);
8273 LOP(OP_MSGRCV,XTERM);
8276 LOP(OP_MSGSND,XTERM);
8281 PL_in_my = (U16)tmp;
8283 if (isIDFIRST_lazy_if(s,UTF)) {
8287 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8288 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
8290 if (!FEATURE_LEXSUBS_IS_ENABLED)
8292 "Experimental \"%s\" subs not enabled",
8293 tmp == KEY_my ? "my" :
8294 tmp == KEY_state ? "state" : "our");
8295 Perl_ck_warner_d(aTHX_
8296 packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
8297 "The lexical_subs feature is experimental");
8300 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
8301 if (!PL_in_my_stash) {
8304 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
8305 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
8308 if (PL_madskills) { /* just add type to declarator token */
8309 sv_catsv(PL_thistoken, PL_nextwhite);
8311 sv_catpvn(PL_thistoken, start, s - start);
8319 PL_expect = XOPERATOR;
8320 s = force_word(s,WORD,TRUE,FALSE);
8324 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8329 s = tokenize_use(0, s);
8333 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
8336 if (!PL_lex_allbrackets &&
8337 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8338 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8344 if (isIDFIRST_lazy_if(s,UTF)) {
8346 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
8348 for (t=d; isSPACE(*t);)
8350 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8352 && !(t[0] == '=' && t[1] == '>')
8353 && !(t[0] == ':' && t[1] == ':')
8354 && !keyword(s, d-s, 0)
8356 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8357 "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
8358 UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8364 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8366 pl_yylval.ival = OP_OR;
8376 LOP(OP_OPEN_DIR,XTERM);
8379 checkcomma(s,PL_tokenbuf,"filehandle");
8383 checkcomma(s,PL_tokenbuf,"filehandle");
8402 s = force_word(s,WORD,FALSE,TRUE);
8404 s = force_strict_version(s);
8405 PL_lex_expect = XBLOCK;
8409 LOP(OP_PIPE_OP,XTERM);
8412 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8415 COPLINE_SET_FROM_MULTI_END;
8416 pl_yylval.ival = OP_CONST;
8417 TERM(sublex_start());
8424 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8427 COPLINE_SET_FROM_MULTI_END;
8428 PL_expect = XOPERATOR;
8429 if (SvCUR(PL_lex_stuff)) {
8430 int warned_comma = !ckWARN(WARN_QW);
8431 int warned_comment = warned_comma;
8432 d = SvPV_force(PL_lex_stuff, len);
8434 for (; isSPACE(*d) && len; --len, ++d)
8439 if (!warned_comma || !warned_comment) {
8440 for (; !isSPACE(*d) && len; --len, ++d) {
8441 if (!warned_comma && *d == ',') {
8442 Perl_warner(aTHX_ packWARN(WARN_QW),
8443 "Possible attempt to separate words with commas");
8446 else if (!warned_comment && *d == '#') {
8447 Perl_warner(aTHX_ packWARN(WARN_QW),
8448 "Possible attempt to put comments in qw() list");
8454 for (; !isSPACE(*d) && len; --len, ++d)
8457 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8458 words = op_append_elem(OP_LIST, words,
8459 newSVOP(OP_CONST, 0, tokeq(sv)));
8464 words = newNULLLIST();
8466 SvREFCNT_dec(PL_lex_stuff);
8467 PL_lex_stuff = NULL;
8469 PL_expect = XOPERATOR;
8470 pl_yylval.opval = sawparens(words);
8475 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8478 pl_yylval.ival = OP_STRINGIFY;
8479 if (SvIVX(PL_lex_stuff) == '\'')
8480 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
8481 TERM(sublex_start());
8484 s = scan_pat(s,OP_QR);
8485 TERM(sublex_start());
8488 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8491 pl_yylval.ival = OP_BACKTICK;
8492 TERM(sublex_start());
8499 PL_expect = XOPERATOR;
8501 s = force_version(s, FALSE);
8503 else if (*s != 'v' || !isDIGIT(s[1])
8504 || (s = force_version(s, TRUE), *s == 'v'))
8506 *PL_tokenbuf = '\0';
8507 s = force_word(s,WORD,TRUE,TRUE);
8508 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
8509 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8510 GV_ADD | (UTF ? SVf_UTF8 : 0));
8512 yyerror("<> at require-statement should be quotes");
8514 if (orig_keyword == KEY_require) {
8522 PL_last_uni = PL_oldbufptr;
8523 PL_last_lop_op = OP_REQUIRE;
8525 return REPORT( (int)REQUIRE );
8531 PL_expect = XOPERATOR;
8532 s = force_word(s,WORD,TRUE,FALSE);
8536 LOP(OP_RENAME,XTERM);
8545 LOP(OP_RINDEX,XTERM);
8554 UNIDOR(OP_READLINE);
8557 UNIDOR(OP_BACKTICK);
8566 LOP(OP_REVERSE,XTERM);
8569 UNIDOR(OP_READLINK);
8576 if (pl_yylval.opval)
8577 TERM(sublex_start());
8579 TOKEN(1); /* force error */
8582 checkcomma(s,PL_tokenbuf,"filehandle");
8592 LOP(OP_SELECT,XTERM);
8598 LOP(OP_SEMCTL,XTERM);
8601 LOP(OP_SEMGET,XTERM);
8604 LOP(OP_SEMOP,XTERM);
8610 LOP(OP_SETPGRP,XTERM);
8612 case KEY_setpriority:
8613 LOP(OP_SETPRIORITY,XTERM);
8615 case KEY_sethostent:
8621 case KEY_setservent:
8624 case KEY_setprotoent:
8634 LOP(OP_SEEKDIR,XTERM);
8636 case KEY_setsockopt:
8637 LOP(OP_SSOCKOPT,XTERM);
8643 LOP(OP_SHMCTL,XTERM);
8646 LOP(OP_SHMGET,XTERM);
8649 LOP(OP_SHMREAD,XTERM);
8652 LOP(OP_SHMWRITE,XTERM);
8655 LOP(OP_SHUTDOWN,XTERM);
8664 LOP(OP_SOCKET,XTERM);
8666 case KEY_socketpair:
8667 LOP(OP_SOCKPAIR,XTERM);
8670 checkcomma(s,PL_tokenbuf,"subroutine name");
8673 s = force_word(s,WORD,TRUE,TRUE);
8677 LOP(OP_SPLIT,XTERM);
8680 LOP(OP_SPRINTF,XTERM);
8683 LOP(OP_SPLICE,XTERM);
8698 LOP(OP_SUBSTR,XTERM);
8704 char * const tmpbuf = PL_tokenbuf + 1;
8705 expectation attrful;
8706 bool have_name, have_proto;
8707 const int key = tmp;
8709 SV *format_name = NULL;
8715 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
8716 SV *subtoken = PL_madskills
8717 ? newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr))
8722 s = SKIPSPACE2(s,tmpwhite);
8728 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
8729 (*s == ':' && s[1] == ':'))
8732 SV *nametoke = NULL;
8736 attrful = XATTRBLOCK;
8737 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8741 nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
8743 if (key == KEY_format)
8744 format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
8747 if (memchr(tmpbuf, ':', len) || key != KEY_sub
8749 PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
8751 sv_setpvn(PL_subname, tmpbuf, len);
8753 sv_setsv(PL_subname,PL_curstname);
8754 sv_catpvs(PL_subname,"::");
8755 sv_catpvn(PL_subname,tmpbuf,len);
8757 if (SvUTF8(PL_linestr))
8758 SvUTF8_on(PL_subname);
8764 CURMAD('X', nametoke);
8765 CURMAD('_', tmpwhite);
8766 force_ident_maybe_lex('&');
8768 s = SKIPSPACE2(d,tmpwhite);
8774 if (key == KEY_my || key == KEY_our || key==KEY_state)
8777 /* diag_listed_as: Missing name in "%s sub" */
8779 "Missing name in \"%s\"", PL_bufptr);
8781 PL_expect = XTERMBLOCK;
8782 attrful = XATTRTERM;
8783 sv_setpvs(PL_subname,"?");
8787 if (key == KEY_format) {
8789 PL_thistoken = subtoken;
8793 start_force(PL_curforce);
8794 NEXTVAL_NEXTTOKE.opval
8795 = (OP*)newSVOP(OP_CONST,0, format_name);
8796 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
8803 /* Look for a prototype */
8804 if (*s == '(' && !FEATURE_SIGNATURES_IS_ENABLED) {
8805 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8806 COPLINE_SET_FROM_MULTI_END;
8808 Perl_croak(aTHX_ "Prototype not terminated");
8809 (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
8814 CURMAD('q', PL_thisopen);
8815 CURMAD('_', tmpwhite);
8816 CURMAD('=', PL_thisstuff);
8817 CURMAD('Q', PL_thisclose);
8818 NEXTVAL_NEXTTOKE.opval =
8819 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8820 PL_lex_stuff = NULL;
8823 s = SKIPSPACE2(s,tmpwhite);
8831 if (*s == ':' && s[1] != ':')
8832 PL_expect = attrful;
8833 else if ((*s != '{' && *s != '(') && key == KEY_sub) {
8835 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8836 else if (*s != ';' && *s != '}')
8837 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8844 curmad('^', newSVpvs(""));
8845 CURMAD('_', tmpwhite);
8849 PL_thistoken = subtoken;
8850 PERL_UNUSED_VAR(have_proto);
8853 NEXTVAL_NEXTTOKE.opval =
8854 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8855 PL_lex_stuff = NULL;
8861 sv_setpvs(PL_subname, "__ANON__");
8863 sv_setpvs(PL_subname, "__ANON__::__ANON__");
8867 force_ident_maybe_lex('&');
8873 LOP(OP_SYSTEM,XREF);
8876 LOP(OP_SYMLINK,XTERM);
8879 LOP(OP_SYSCALL,XTERM);
8882 LOP(OP_SYSOPEN,XTERM);
8885 LOP(OP_SYSSEEK,XTERM);
8888 LOP(OP_SYSREAD,XTERM);
8891 LOP(OP_SYSWRITE,XTERM);
8896 TERM(sublex_start());
8917 LOP(OP_TRUNCATE,XTERM);
8929 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8931 pl_yylval.ival = CopLINE(PL_curcop);
8935 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8937 pl_yylval.ival = CopLINE(PL_curcop);
8941 LOP(OP_UNLINK,XTERM);
8947 LOP(OP_UNPACK,XTERM);
8950 LOP(OP_UTIME,XTERM);
8956 LOP(OP_UNSHIFT,XTERM);
8959 s = tokenize_use(1, s);
8969 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8971 pl_yylval.ival = CopLINE(PL_curcop);
8972 Perl_ck_warner_d(aTHX_
8973 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8974 "when is experimental");
8978 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8980 pl_yylval.ival = CopLINE(PL_curcop);
8984 PL_hints |= HINT_BLOCK_SCOPE;
8991 LOP(OP_WAITPID,XTERM);
8997 /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8998 * we use the same number on EBCDIC */
8999 gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
9003 if (PL_expect == XOPERATOR) {
9004 if (*s == '=' && !PL_lex_allbrackets &&
9005 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
9013 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
9015 pl_yylval.ival = OP_XOR;
9024 Looks up an identifier in the pad or in a package
9027 PRIVATEREF if this is a lexical name.
9028 WORD if this belongs to a package.
9031 if we're in a my declaration
9032 croak if they tried to say my($foo::bar)
9033 build the ops for a my() declaration
9034 if it's an access to a my() variable
9035 build ops for access to a my() variable
9036 if in a dq string, and they've said @foo and we can't find @foo
9038 build ops for a bareword
9042 S_pending_ident(pTHX)
9046 const char pit = (char)pl_yylval.ival;
9047 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
9048 /* All routes through this function want to know if there is a colon. */
9049 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
9051 DEBUG_T({ PerlIO_printf(Perl_debug_log,
9052 "### Pending identifier '%s'\n", PL_tokenbuf); });
9054 /* if we're in a my(), we can't allow dynamics here.
9055 $foo'bar has already been turned into $foo::bar, so
9056 just check for colons.
9058 if it's a legal name, the OP is a PADANY.
9061 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9063 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
9064 "variable %s in \"our\"",
9065 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
9066 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
9070 /* PL_no_myglob is constant */
9071 GCC_DIAG_IGNORE(-Wformat-nonliteral);
9072 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
9073 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
9074 UTF ? SVf_UTF8 : 0);
9078 pl_yylval.opval = newOP(OP_PADANY, 0);
9079 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
9080 UTF ? SVf_UTF8 : 0);
9086 build the ops for accesses to a my() variable.
9091 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9092 UTF ? SVf_UTF8 : 0);
9093 if (tmp != NOT_IN_PAD) {
9094 /* might be an "our" variable" */
9095 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9096 /* build ops for a bareword */
9097 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9098 HEK * const stashname = HvNAME_HEK(stash);
9099 SV * const sym = newSVhek(stashname);
9100 sv_catpvs(sym, "::");
9101 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9102 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
9103 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9107 ? (GV_ADDMULTI | GV_ADDINEVAL)
9110 ((PL_tokenbuf[0] == '$') ? SVt_PV
9111 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9116 pl_yylval.opval = newOP(OP_PADANY, 0);
9117 pl_yylval.opval->op_targ = tmp;
9123 Whine if they've said @foo in a doublequoted string,
9124 and @foo isn't a variable we can find in the symbol
9127 if (ckWARN(WARN_AMBIGUOUS) &&
9128 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
9129 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
9130 ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
9131 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9132 /* DO NOT warn for @- and @+ */
9133 && !( PL_tokenbuf[2] == '\0' &&
9134 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
9137 /* Downgraded from fatal to warning 20000522 mjd */
9138 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9139 "Possible unintended interpolation of %"UTF8f
9141 UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9145 /* build ops for a bareword */
9146 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
9147 newSVpvn_flags(PL_tokenbuf + 1,
9149 UTF ? SVf_UTF8 : 0 ));
9150 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9152 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
9153 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
9154 | ( UTF ? SVf_UTF8 : 0 ),
9155 ((PL_tokenbuf[0] == '$') ? SVt_PV
9156 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9162 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9166 PERL_ARGS_ASSERT_CHECKCOMMA;
9168 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
9169 if (ckWARN(WARN_SYNTAX)) {
9172 for (w = s+2; *w && level; w++) {
9180 /* the list of chars below is for end of statements or
9181 * block / parens, boolean operators (&&, ||, //) and branch
9182 * constructs (or, and, if, until, unless, while, err, for).
9183 * Not a very solid hack... */
9184 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9185 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9186 "%s (...) interpreted as function",name);
9189 while (s < PL_bufend && isSPACE(*s))
9193 while (s < PL_bufend && isSPACE(*s))
9195 if (isIDFIRST_lazy_if(s,UTF)) {
9196 const char * const w = s;
9197 s += UTF ? UTF8SKIP(s) : 1;
9198 while (isWORDCHAR_lazy_if(s,UTF))
9199 s += UTF ? UTF8SKIP(s) : 1;
9200 while (s < PL_bufend && isSPACE(*s))
9204 if (keyword(w, s - w, 0))
9207 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9208 if (gv && GvCVu(gv))
9210 Perl_croak(aTHX_ "No comma allowed after %s", what);
9215 /* S_new_constant(): do any overload::constant lookup.
9217 Either returns sv, or mortalizes/frees sv and returns a new SV*.
9218 Best used as sv=new_constant(..., sv, ...).
9219 If s, pv are NULL, calls subroutine with one argument,
9220 and <type> is used with error messages only.
9221 <type> is assumed to be well formed UTF-8 */
9224 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9225 SV *sv, SV *pv, const char *type, STRLEN typelen)
9228 HV * table = GvHV(PL_hintgv); /* ^H */
9233 const char *why1 = "", *why2 = "", *why3 = "";
9235 PERL_ARGS_ASSERT_NEW_CONSTANT;
9236 /* We assume that this is true: */
9237 if (*key == 'c') { assert (strEQ(key, "charnames")); }
9240 /* charnames doesn't work well if there have been errors found */
9241 if (PL_error_count > 0 && *key == 'c')
9243 SvREFCNT_dec_NN(sv);
9244 return &PL_sv_undef;
9247 sv_2mortal(sv); /* Parent created it permanently */
9249 || ! (PL_hints & HINT_LOCALIZE_HH)
9250 || ! (cvp = hv_fetch(table, key, keylen, FALSE))
9255 /* Here haven't found what we're looking for. If it is charnames,
9256 * perhaps it needs to be loaded. Try doing that before giving up */
9258 Perl_load_module(aTHX_
9260 newSVpvs("_charnames"),
9261 /* version parameter; no need to specify it, as if
9262 * we get too early a version, will fail anyway,
9263 * not being able to find '_charnames' */
9268 assert(sp == PL_stack_sp);
9269 table = GvHV(PL_hintgv);
9271 && (PL_hints & HINT_LOCALIZE_HH)
9272 && (cvp = hv_fetch(table, key, keylen, FALSE))
9278 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9279 msg = Perl_form(aTHX_
9280 "Constant(%.*s) unknown",
9281 (int)(type ? typelen : len),
9287 why3 = "} is not defined";
9290 msg = Perl_form(aTHX_
9291 /* The +3 is for '\N{'; -4 for that, plus '}' */
9292 "Unknown charname '%.*s'", (int)typelen - 4, type + 3
9296 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
9297 (int)(type ? typelen : len),
9298 (type ? type: s), why1, why2, why3);
9301 yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9302 return SvREFCNT_inc_simple_NN(sv);
9307 pv = newSVpvn_flags(s, len, SVs_TEMP);
9309 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9311 typesv = &PL_sv_undef;
9313 PUSHSTACKi(PERLSI_OVERLOAD);
9325 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9329 /* Check the eval first */
9330 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9332 const char * errstr;
9333 sv_catpvs(errsv, "Propagated");
9334 errstr = SvPV_const(errsv, errlen);
9335 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9337 res = SvREFCNT_inc_simple_NN(sv);
9341 SvREFCNT_inc_simple_void_NN(res);
9350 why1 = "Call to &{$^H{";
9352 why3 = "}} did not return a defined value";
9354 (void)sv_2mortal(sv);
9361 PERL_STATIC_INLINE void
9362 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) {
9364 PERL_ARGS_ASSERT_PARSE_IDENT;
9368 Perl_croak(aTHX_ "%s", ident_too_long);
9369 if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
9370 /* The UTF-8 case must come first, otherwise things
9371 * like c\N{COMBINING TILDE} would start failing, as the
9372 * isWORDCHAR_A case below would gobble the 'c' up.
9375 char *t = *s + UTF8SKIP(*s);
9376 while (isIDCONT_utf8((U8*)t))
9378 if (*d + (t - *s) > e)
9379 Perl_croak(aTHX_ "%s", ident_too_long);
9380 Copy(*s, *d, t - *s, char);
9384 else if ( isWORDCHAR_A(**s) ) {
9387 } while (isWORDCHAR_A(**s) && *d < e);
9389 else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
9394 else if (allow_package && **s == ':' && (*s)[1] == ':'
9395 /* Disallow things like Foo::$bar. For the curious, this is
9396 * the code path that triggers the "Bad name after" warning
9397 * when looking for barewords.
9399 && (*s)[2] != '$') {
9409 /* Returns a NUL terminated string, with the length of the string written to
9413 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9417 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9418 bool is_utf8 = cBOOL(UTF);
9420 PERL_ARGS_ASSERT_SCAN_WORD;
9422 parse_ident(&s, &d, e, allow_package, is_utf8);
9429 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
9432 I32 herelines = PL_parser->herelines;
9433 SSize_t bracket = -1;
9436 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9437 bool is_utf8 = cBOOL(UTF);
9438 I32 orig_copline = 0, tmp_copline = 0;
9440 PERL_ARGS_ASSERT_SCAN_IDENT;
9445 while (isDIGIT(*s)) {
9447 Perl_croak(aTHX_ "%s", ident_too_long);
9452 parse_ident(&s, &d, e, 1, is_utf8);
9457 /* Either a digit variable, or parse_ident() found an identifier
9458 (anything valid as a bareword), so job done and return. */
9459 if (PL_lex_state != LEX_NORMAL)
9460 PL_lex_state = LEX_INTERPENDMAYBE;
9463 if (*s == '$' && s[1] &&
9464 (isIDFIRST_lazy_if(s+1,is_utf8)
9465 || isDIGIT_A((U8)s[1])
9468 || strnEQ(s+1,"::",2)) )
9470 /* Dereferencing a value in a scalar variable.
9471 The alternatives are different syntaxes for a scalar variable.
9472 Using ' as a leading package separator isn't allowed. :: is. */
9475 /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */
9477 bracket = s - SvPVX(PL_linestr);
9479 orig_copline = CopLINE(PL_curcop);
9480 if (s < PL_bufend && isSPACE(*s)) {
9485 /* Is the byte 'd' a legal single character identifier name? 'u' is true
9486 * iff Unicode semantics are to be used. The legal ones are any of:
9488 * b) ASCII punctuation
9489 * c) When not under Unicode rules, any upper Latin1 character
9490 * d) \c?, \c\, \c^, \c_, and \cA..\cZ, minus the ones that have traditionally
9491 * been matched by \s on ASCII platforms. That is: \c?, plus 1-32, minus
9493 #define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d)) \
9494 || isDIGIT_A((U8)(d)) \
9495 || (!(u) && !isASCII((U8)(d))) \
9496 || ((((U8)(d)) < 32) \
9497 && (((((U8)(d)) >= 14) \
9498 || (((U8)(d)) <= 8 && (d) != 0) \
9499 || (((U8)(d)) == 13)))) \
9500 || (((U8)(d)) == toCTRL('?')))
9502 && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8)))
9504 if ( isCNTRL_A((U8)*s) ) {
9505 deprecate("literal control characters in variable names");
9509 const STRLEN skip = UTF8SKIP(s);
9512 for ( i = 0; i < skip; i++ )
9520 /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
9521 if (*d == '^' && *s && isCONTROLVAR(*s)) {
9525 /* Warn about ambiguous code after unary operators if {...} notation isn't
9526 used. There's no difference in ambiguity; it's merely a heuristic
9527 about when not to warn. */
9528 else if (ck_uni && bracket == -1)
9530 if (bracket != -1) {
9531 /* If we were processing {...} notation then... */
9532 if (isIDFIRST_lazy_if(d,is_utf8)) {
9533 /* if it starts as a valid identifier, assume that it is one.
9534 (the later check for } being at the expected point will trap
9535 cases where this doesn't pan out.) */
9536 d += is_utf8 ? UTF8SKIP(d) : 1;
9537 parse_ident(&s, &d, e, 1, is_utf8);
9539 tmp_copline = CopLINE(PL_curcop);
9540 if (s < PL_bufend && isSPACE(*s)) {
9543 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9544 /* ${foo[0]} and ${foo{bar}} notation. */
9545 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9546 const char * const brack =
9548 ((*s == '[') ? "[...]" : "{...}");
9549 orig_copline = CopLINE(PL_curcop);
9550 CopLINE_set(PL_curcop, tmp_copline);
9551 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9552 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9553 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9554 funny, dest, brack, funny, dest, brack);
9555 CopLINE_set(PL_curcop, orig_copline);
9558 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9559 PL_lex_allbrackets++;
9563 /* Handle extended ${^Foo} variables
9564 * 1999-02-27 mjd-perl-patch@plover.com */
9565 else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
9569 while (isWORDCHAR(*s) && d < e) {
9573 Perl_croak(aTHX_ "%s", ident_too_long);
9578 tmp_copline = CopLINE(PL_curcop);
9579 if (s < PL_bufend && isSPACE(*s)) {
9583 /* Expect to find a closing } after consuming any trailing whitespace.
9587 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9588 PL_lex_state = LEX_INTERPEND;
9591 if (PL_lex_state == LEX_NORMAL) {
9592 if (ckWARN(WARN_AMBIGUOUS) &&
9593 (keyword(dest, d - dest, 0)
9594 || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0)))
9596 SV *tmp = newSVpvn_flags( dest, d - dest,
9597 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
9600 orig_copline = CopLINE(PL_curcop);
9601 CopLINE_set(PL_curcop, tmp_copline);
9602 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9603 "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9604 funny, tmp, funny, tmp);
9605 CopLINE_set(PL_curcop, orig_copline);
9610 /* Didn't find the closing } at the point we expected, so restore
9611 state such that the next thing to process is the opening { and */
9612 s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
9613 CopLINE_set(PL_curcop, orig_copline);
9614 PL_parser->herelines = herelines;
9618 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9619 PL_lex_state = LEX_INTERPEND;
9624 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
9626 /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
9627 * the parse starting at 's', based on the subset that are valid in this
9628 * context input to this routine in 'valid_flags'. Advances s. Returns
9629 * TRUE if the input should be treated as a valid flag, so the next char
9630 * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
9631 * first call on the current regex. This routine will set it to any
9632 * charset modifier found. The caller shouldn't change it. This way,
9633 * another charset modifier encountered in the parse can be detected as an
9634 * error, as we have decided to allow only one */
9637 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9639 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9640 if (isWORDCHAR_lazy_if(*s, UTF)) {
9641 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9642 UTF ? SVf_UTF8 : 0);
9644 /* Pretend that it worked, so will continue processing before
9653 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
9654 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
9655 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
9656 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
9657 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
9658 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9659 case LOCALE_PAT_MOD:
9661 goto multiple_charsets;
9663 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9666 case UNICODE_PAT_MOD:
9668 goto multiple_charsets;
9670 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9673 case ASCII_RESTRICT_PAT_MOD:
9675 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9679 /* Error if previous modifier wasn't an 'a', but if it was, see
9680 * if, and accept, a second occurrence (only) */
9682 || get_regex_charset(*pmfl)
9683 != REGEX_ASCII_RESTRICTED_CHARSET)
9685 goto multiple_charsets;
9687 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9691 case DEPENDS_PAT_MOD:
9693 goto multiple_charsets;
9695 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9704 if (*charset != c) {
9705 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9707 else if (c == 'a') {
9708 /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
9709 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9712 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9715 /* Pretend that it worked, so will continue processing before dieing */
9721 S_scan_pat(pTHX_ char *start, I32 type)
9726 const char * const valid_flags =
9727 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9728 char charset = '\0'; /* character set modifier */
9733 PERL_ARGS_ASSERT_SCAN_PAT;
9735 s = scan_str(start,!!PL_madskills,FALSE, (PL_in_eval & EVAL_RE_REPARSING),
9736 TRUE /* look for escaped bracketed metas */, NULL);
9739 const char * const delimiter = skipspace(start);
9743 ? "Search pattern not terminated or ternary operator parsed as search pattern"
9744 : "Search pattern not terminated" ));
9747 pm = (PMOP*)newPMOP(type, 0);
9748 if (PL_multi_open == '?') {
9749 /* This is the only point in the code that sets PMf_ONCE: */
9750 pm->op_pmflags |= PMf_ONCE;
9752 /* Hence it's safe to do this bit of PMOP book-keeping here, which
9753 allows us to restrict the list needed by reset to just the ??
9755 assert(type != OP_TRANS);
9757 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9760 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9763 elements = mg->mg_len / sizeof(PMOP**);
9764 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9765 ((PMOP**)mg->mg_ptr) [elements++] = pm;
9766 mg->mg_len = elements * sizeof(PMOP**);
9767 PmopSTASH_set(pm,PL_curstash);
9774 /* if qr/...(?{..}).../, then need to parse the pattern within a new
9775 * anon CV. False positives like qr/[(?{]/ are harmless */
9777 if (type == OP_QR) {
9779 char *e, *p = SvPV(PL_lex_stuff, len);
9781 for (; p < e; p++) {
9782 if (p[0] == '(' && p[1] == '?'
9783 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9785 pm->op_pmflags |= PMf_HAS_CV;
9789 pm->op_pmflags |= PMf_IS_QR;
9792 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
9794 if (PL_madskills && modstart != s) {
9795 SV* tmptoken = newSVpvn(modstart, s - modstart);
9796 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9799 /* issue a warning if /c is specified,but /g is not */
9800 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9802 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9803 "Use of /c modifier is meaningless without /g" );
9806 PL_lex_op = (OP*)pm;
9807 pl_yylval.ival = OP_MATCH;
9812 S_scan_subst(pTHX_ char *start)
9820 char charset = '\0'; /* character set modifier */
9826 PERL_ARGS_ASSERT_SCAN_SUBST;
9828 pl_yylval.ival = OP_NULL;
9830 s = scan_str(start,!!PL_madskills,FALSE,FALSE,
9831 TRUE /* look for escaped bracketed metas */, &t);
9834 Perl_croak(aTHX_ "Substitution pattern not terminated");
9839 CURMAD('q', PL_thisopen);
9840 CURMAD('_', PL_thiswhite);
9841 CURMAD('E', PL_thisstuff);
9842 CURMAD('Q', PL_thisclose);
9843 PL_realtokenstart = s - SvPVX(PL_linestr);
9847 first_start = PL_multi_start;
9848 first_line = CopLINE(PL_curcop);
9849 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
9852 SvREFCNT_dec(PL_lex_stuff);
9853 PL_lex_stuff = NULL;
9855 Perl_croak(aTHX_ "Substitution replacement not terminated");
9857 PL_multi_start = first_start; /* so whole substitution is taken together */
9859 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9863 CURMAD('z', PL_thisopen);
9864 CURMAD('R', PL_thisstuff);
9865 CURMAD('Z', PL_thisclose);
9871 if (*s == EXEC_PAT_MOD) {
9875 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9884 curmad('m', newSVpvn(modstart, s - modstart));
9885 append_madprops(PL_thismad, (OP*)pm, 0);
9889 if ((pm->op_pmflags & PMf_CONTINUE)) {
9890 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9894 SV * const repl = newSVpvs("");
9897 pm->op_pmflags |= PMf_EVAL;
9900 sv_catpvs(repl, "eval ");
9902 sv_catpvs(repl, "do ");
9904 sv_catpvs(repl, "{");
9905 sv_catsv(repl, PL_sublex_info.repl);
9906 sv_catpvs(repl, "}");
9908 SvREFCNT_dec(PL_sublex_info.repl);
9909 PL_sublex_info.repl = repl;
9911 if (CopLINE(PL_curcop) != first_line) {
9912 sv_upgrade(PL_sublex_info.repl, SVt_PVNV);
9913 ((XPVNV*)SvANY(PL_sublex_info.repl))->xnv_u.xpad_cop_seq.xlow =
9914 CopLINE(PL_curcop) - first_line;
9915 CopLINE_set(PL_curcop, first_line);
9918 PL_lex_op = (OP*)pm;
9919 pl_yylval.ival = OP_SUBST;
9924 S_scan_trans(pTHX_ char *start)
9932 bool nondestruct = 0;
9938 PERL_ARGS_ASSERT_SCAN_TRANS;
9940 pl_yylval.ival = OP_NULL;
9942 s = scan_str(start,!!PL_madskills,FALSE,FALSE,FALSE,&t);
9944 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9949 CURMAD('q', PL_thisopen);
9950 CURMAD('_', PL_thiswhite);
9951 CURMAD('E', PL_thisstuff);
9952 CURMAD('Q', PL_thisclose);
9953 PL_realtokenstart = s - SvPVX(PL_linestr);
9957 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
9960 SvREFCNT_dec(PL_lex_stuff);
9961 PL_lex_stuff = NULL;
9963 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9966 CURMAD('z', PL_thisopen);
9967 CURMAD('R', PL_thisstuff);
9968 CURMAD('Z', PL_thisclose);
9971 complement = del = squash = 0;
9978 complement = OPpTRANS_COMPLEMENT;
9981 del = OPpTRANS_DELETE;
9984 squash = OPpTRANS_SQUASH;
9996 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9997 o->op_private &= ~OPpTRANS_ALL;
9998 o->op_private |= del|squash|complement|
9999 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
10000 (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF : 0);
10003 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
10006 if (PL_madskills) {
10008 curmad('m', newSVpvn(modstart, s - modstart));
10009 append_madprops(PL_thismad, o, 0);
10018 Takes a pointer to the first < in <<FOO.
10019 Returns a pointer to the byte following <<FOO.
10021 This function scans a heredoc, which involves different methods
10022 depending on whether we are in a string eval, quoted construct, etc.
10023 This is because PL_linestr could containing a single line of input, or
10024 a whole string being evalled, or the contents of the current quote-
10027 The two basic methods are:
10028 - Steal lines from the input stream
10029 - Scan the heredoc in PL_linestr and remove it therefrom
10031 In a file scope or filtered eval, the first method is used; in a
10032 string eval, the second.
10034 In a quote-like operator, we have to choose between the two,
10035 depending on where we can find a newline. We peek into outer lex-
10036 ing scopes until we find one with a newline in it. If we reach the
10037 outermost lexing scope and it is a file, we use the stream method.
10038 Otherwise it is treated as an eval.
10042 S_scan_heredoc(pTHX_ char *s)
10045 I32 op_type = OP_SCALAR;
10052 const bool infile = PL_rsfp || PL_parser->filtered;
10053 const line_t origline = CopLINE(PL_curcop);
10054 LEXSHARED *shared = PL_parser->lex_shared;
10056 I32 stuffstart = s - SvPVX(PL_linestr);
10059 PL_realtokenstart = -1;
10062 PERL_ARGS_ASSERT_SCAN_HEREDOC;
10065 d = PL_tokenbuf + 1;
10066 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
10067 *PL_tokenbuf = '\n';
10069 while (SPACE_OR_TAB(*peek))
10071 if (*peek == '`' || *peek == '\'' || *peek =='"') {
10074 s = delimcpy(d, e, s, PL_bufend, term, &len);
10075 if (s == PL_bufend)
10076 Perl_croak(aTHX_ "Unterminated delimiter for here document");
10082 /* <<\FOO is equivalent to <<'FOO' */
10086 if (!isWORDCHAR_lazy_if(s,UTF))
10087 deprecate("bare << to mean <<\"\"");
10088 for (; isWORDCHAR_lazy_if(s,UTF); s++) {
10093 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
10094 Perl_croak(aTHX_ "Delimiter for here document is too long");
10097 len = d - PL_tokenbuf;
10100 if (PL_madskills) {
10101 tstart = PL_tokenbuf + 1;
10102 PL_thisclose = newSVpvn(tstart, len - 1);
10103 tstart = SvPVX(PL_linestr) + stuffstart;
10104 PL_thisopen = newSVpvn(tstart, s - tstart);
10105 stuffstart = s - SvPVX(PL_linestr);
10108 #ifndef PERL_STRICT_CR
10109 d = strchr(s, '\r');
10111 char * const olds = s;
10113 while (s < PL_bufend) {
10119 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
10128 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10133 if (PL_madskills) {
10134 tstart = SvPVX(PL_linestr) + stuffstart;
10136 sv_catpvn(PL_thisstuff, tstart, s - tstart);
10138 PL_thisstuff = newSVpvn(tstart, s - tstart);
10141 stuffstart = s - SvPVX(PL_linestr);
10144 tmpstr = newSV_type(SVt_PVIV);
10145 SvGROW(tmpstr, 80);
10146 if (term == '\'') {
10147 op_type = OP_CONST;
10148 SvIV_set(tmpstr, -1);
10150 else if (term == '`') {
10151 op_type = OP_BACKTICK;
10152 SvIV_set(tmpstr, '\\');
10155 PL_multi_start = origline + 1 + PL_parser->herelines;
10156 PL_multi_open = PL_multi_close = '<';
10157 /* inside a string eval or quote-like operator */
10158 if (!infile || PL_lex_inwhat) {
10161 char * const olds = s;
10162 PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
10163 /* These two fields are not set until an inner lexing scope is
10164 entered. But we need them set here. */
10165 shared->ls_bufptr = s;
10166 shared->ls_linestr = PL_linestr;
10168 /* Look for a newline. If the current buffer does not have one,
10169 peek into the line buffer of the parent lexing scope, going
10170 up as many levels as necessary to find one with a newline
10173 while (!(s = (char *)memchr(
10174 (void *)shared->ls_bufptr, '\n',
10175 SvEND(shared->ls_linestr)-shared->ls_bufptr
10177 shared = shared->ls_prev;
10178 /* shared is only null if we have gone beyond the outermost
10179 lexing scope. In a file, we will have broken out of the
10180 loop in the previous iteration. In an eval, the string buf-
10181 fer ends with "\n;", so the while condition above will have
10182 evaluated to false. So shared can never be null. */
10184 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10185 most lexing scope. In a file, shared->ls_linestr at that
10186 level is just one line, so there is no body to steal. */
10187 if (infile && !shared->ls_prev) {
10193 s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10196 linestr = shared->ls_linestr;
10197 bufend = SvEND(linestr);
10199 while (s < bufend - len + 1 &&
10200 memNE(s,PL_tokenbuf,len) ) {
10202 ++PL_parser->herelines;
10204 if (s >= bufend - len + 1) {
10207 sv_setpvn(tmpstr,d+1,s-d);
10209 if (PL_madskills) {
10211 sv_catpvn(PL_thisstuff, d + 1, s - d);
10213 PL_thisstuff = newSVpvn(d + 1, s - d);
10214 stuffstart = s - SvPVX(PL_linestr);
10218 /* the preceding stmt passes a newline */
10219 PL_parser->herelines++;
10221 /* s now points to the newline after the heredoc terminator.
10222 d points to the newline before the body of the heredoc.
10225 /* We are going to modify linestr in place here, so set
10226 aside copies of the string if necessary for re-evals or
10228 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10229 check shared->re_eval_str. */
10230 if (shared->re_eval_start || shared->re_eval_str) {
10231 /* Set aside the rest of the regexp */
10232 if (!shared->re_eval_str)
10233 shared->re_eval_str =
10234 newSVpvn(shared->re_eval_start,
10235 bufend - shared->re_eval_start);
10236 shared->re_eval_start -= s-d;
10238 if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
10239 CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
10240 cx->blk_eval.cur_text == linestr)
10242 cx->blk_eval.cur_text = newSVsv(linestr);
10243 SvSCREAM_on(cx->blk_eval.cur_text);
10245 /* Copy everything from s onwards back to d. */
10246 Move(s,d,bufend-s + 1,char);
10247 SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10248 /* Setting PL_bufend only applies when we have not dug deeper
10249 into other scopes, because sublex_done sets PL_bufend to
10250 SvEND(PL_linestr). */
10251 if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
10258 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
10259 term = PL_tokenbuf[1];
10261 linestr_save = PL_linestr; /* must restore this afterwards */
10262 d = s; /* and this */
10263 PL_linestr = newSVpvs("");
10264 PL_bufend = SvPVX(PL_linestr);
10267 if (PL_madskills) {
10268 tstart = SvPVX(PL_linestr) + stuffstart;
10270 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10272 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10275 PL_bufptr = PL_bufend;
10276 CopLINE_set(PL_curcop,
10277 origline + 1 + PL_parser->herelines);
10278 if (!lex_next_chunk(LEX_NO_TERM)
10279 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
10280 SvREFCNT_dec(linestr_save);
10283 CopLINE_set(PL_curcop, origline);
10284 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10285 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10286 /* ^That should be enough to avoid this needing to grow: */
10287 sv_catpvs(PL_linestr, "\n\0");
10288 assert(s == SvPVX(PL_linestr));
10289 PL_bufend = SvEND(PL_linestr);
10293 stuffstart = s - SvPVX(PL_linestr);
10295 PL_parser->herelines++;
10296 PL_last_lop = PL_last_uni = NULL;
10297 #ifndef PERL_STRICT_CR
10298 if (PL_bufend - PL_linestart >= 2) {
10299 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
10300 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10302 PL_bufend[-2] = '\n';
10304 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10306 else if (PL_bufend[-1] == '\r')
10307 PL_bufend[-1] = '\n';
10309 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10310 PL_bufend[-1] = '\n';
10312 if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
10313 SvREFCNT_dec(PL_linestr);
10314 PL_linestr = linestr_save;
10315 PL_linestart = SvPVX(linestr_save);
10316 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10321 sv_catsv(tmpstr,PL_linestr);
10325 PL_multi_end = origline + PL_parser->herelines;
10326 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10327 SvPV_shrink_to_cur(tmpstr);
10330 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10332 else if (PL_encoding)
10333 sv_recode_to_utf8(tmpstr, PL_encoding);
10335 PL_lex_stuff = tmpstr;
10336 pl_yylval.ival = op_type;
10340 SvREFCNT_dec(tmpstr);
10341 CopLINE_set(PL_curcop, origline);
10342 missingterm(PL_tokenbuf + 1);
10345 /* scan_inputsymbol
10346 takes: current position in input buffer
10347 returns: new position in input buffer
10348 side-effects: pl_yylval and lex_op are set.
10353 <FH> read from filehandle
10354 <pkg::FH> read from package qualified filehandle
10355 <pkg'FH> read from package qualified filehandle
10356 <$fh> read from filehandle in $fh
10357 <*.h> filename glob
10362 S_scan_inputsymbol(pTHX_ char *start)
10365 char *s = start; /* current position in buffer */
10368 char *d = PL_tokenbuf; /* start of temp holding space */
10369 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
10371 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10373 end = strchr(s, '\n');
10376 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
10378 /* die if we didn't have space for the contents of the <>,
10379 or if it didn't end, or if we see a newline
10382 if (len >= (I32)sizeof PL_tokenbuf)
10383 Perl_croak(aTHX_ "Excessively long <> operator");
10385 Perl_croak(aTHX_ "Unterminated <> operator");
10390 Remember, only scalar variables are interpreted as filehandles by
10391 this code. Anything more complex (e.g., <$fh{$num}>) will be
10392 treated as a glob() call.
10393 This code makes use of the fact that except for the $ at the front,
10394 a scalar variable and a filehandle look the same.
10396 if (*d == '$' && d[1]) d++;
10398 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10399 while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
10400 d += UTF ? UTF8SKIP(d) : 1;
10402 /* If we've tried to read what we allow filehandles to look like, and
10403 there's still text left, then it must be a glob() and not a getline.
10404 Use scan_str to pull out the stuff between the <> and treat it
10405 as nothing more than a string.
10408 if (d - PL_tokenbuf != len) {
10409 pl_yylval.ival = OP_GLOB;
10410 s = scan_str(start,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
10412 Perl_croak(aTHX_ "Glob not terminated");
10416 bool readline_overriden = FALSE;
10418 /* we're in a filehandle read situation */
10421 /* turn <> into <ARGV> */
10423 Copy("ARGV",d,5,char);
10425 /* Check whether readline() is overriden */
10426 if ((gv_readline = gv_override("readline",8)))
10427 readline_overriden = TRUE;
10429 /* if <$fh>, create the ops to turn the variable into a
10433 /* try to find it in the pad for this block, otherwise find
10434 add symbol table ops
10436 const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
10437 if (tmp != NOT_IN_PAD) {
10438 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10439 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10440 HEK * const stashname = HvNAME_HEK(stash);
10441 SV * const sym = sv_2mortal(newSVhek(stashname));
10442 sv_catpvs(sym, "::");
10443 sv_catpv(sym, d+1);
10448 OP * const o = newOP(OP_PADSV, 0);
10450 PL_lex_op = readline_overriden
10451 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10452 op_append_elem(OP_LIST, o,
10453 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10454 : (OP*)newUNOP(OP_READLINE, 0, o);
10463 ? (GV_ADDMULTI | GV_ADDINEVAL)
10464 : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
10466 PL_lex_op = readline_overriden
10467 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10468 op_append_elem(OP_LIST,
10469 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10470 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10471 : (OP*)newUNOP(OP_READLINE, 0,
10472 newUNOP(OP_RV2SV, 0,
10473 newGVOP(OP_GV, 0, gv)));
10475 if (!readline_overriden)
10476 PL_lex_op->op_flags |= OPf_SPECIAL;
10477 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10478 pl_yylval.ival = OP_NULL;
10481 /* If it's none of the above, it must be a literal filehandle
10482 (<Foo::BAR> or <FOO>) so build a simple readline OP */
10484 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
10485 PL_lex_op = readline_overriden
10486 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10487 op_append_elem(OP_LIST,
10488 newGVOP(OP_GV, 0, gv),
10489 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10490 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
10491 pl_yylval.ival = OP_NULL;
10501 start position in buffer
10502 keep_quoted preserve \ on the embedded delimiter(s)
10503 keep_delims preserve the delimiters around the string
10504 re_reparse compiling a run-time /(?{})/:
10505 collapse // to /, and skip encoding src
10506 deprecate_escaped_meta issue a deprecation warning for cer-
10507 tain paired metacharacters that appear
10509 delimp if non-null, this is set to the position of
10510 the closing delimiter, or just after it if
10511 the closing and opening delimiters differ
10512 (i.e., the opening delimiter of a substitu-
10514 returns: position to continue reading from buffer
10515 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10516 updates the read buffer.
10518 This subroutine pulls a string out of the input. It is called for:
10519 q single quotes q(literal text)
10520 ' single quotes 'literal text'
10521 qq double quotes qq(interpolate $here please)
10522 " double quotes "interpolate $here please"
10523 qx backticks qx(/bin/ls -l)
10524 ` backticks `/bin/ls -l`
10525 qw quote words @EXPORT_OK = qw( func() $spam )
10526 m// regexp match m/this/
10527 s/// regexp substitute s/this/that/
10528 tr/// string transliterate tr/this/that/
10529 y/// string transliterate y/this/that/
10530 ($*@) sub prototypes sub foo ($)
10531 (stuff) sub attr parameters sub foo : attr(stuff)
10532 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
10534 In most of these cases (all but <>, patterns and transliterate)
10535 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
10536 calls scan_str(). s/// makes yylex() call scan_subst() which calls
10537 scan_str(). tr/// and y/// make yylex() call scan_trans() which
10540 It skips whitespace before the string starts, and treats the first
10541 character as the delimiter. If the delimiter is one of ([{< then
10542 the corresponding "close" character )]}> is used as the closing
10543 delimiter. It allows quoting of delimiters, and if the string has
10544 balanced delimiters ([{<>}]) it allows nesting.
10546 On success, the SV with the resulting string is put into lex_stuff or,
10547 if that is already non-NULL, into lex_repl. The second case occurs only
10548 when parsing the RHS of the special constructs s/// and tr/// (y///).
10549 For convenience, the terminating delimiter character is stuffed into
10554 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
10555 bool deprecate_escaped_meta, char **delimp
10559 SV *sv; /* scalar value: string */
10560 const char *tmps; /* temp string, used for delimiter matching */
10561 char *s = start; /* current position in the buffer */
10562 char term; /* terminating character */
10563 char *to; /* current position in the sv's data */
10564 I32 brackets = 1; /* bracket nesting level */
10565 bool has_utf8 = FALSE; /* is there any utf8 content? */
10566 I32 termcode; /* terminating char. code */
10567 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
10568 STRLEN termlen; /* length of terminating string */
10569 int last_off = 0; /* last position for nesting bracket */
10570 char *escaped_open = NULL;
10577 PERL_ARGS_ASSERT_SCAN_STR;
10579 /* skip space before the delimiter */
10585 if (PL_realtokenstart >= 0) {
10586 stuffstart = PL_realtokenstart;
10587 PL_realtokenstart = -1;
10590 stuffstart = start - SvPVX(PL_linestr);
10592 /* mark where we are, in case we need to report errors */
10595 /* after skipping whitespace, the next character is the terminator */
10598 termcode = termstr[0] = term;
10602 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10603 Copy(s, termstr, termlen, U8);
10604 if (!UTF8_IS_INVARIANT(term))
10608 /* mark where we are */
10609 PL_multi_start = CopLINE(PL_curcop);
10610 PL_multi_open = term;
10611 herelines = PL_parser->herelines;
10613 /* find corresponding closing delimiter */
10614 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10615 termcode = termstr[0] = term = tmps[5];
10617 PL_multi_close = term;
10619 /* A warning is raised if the input parameter requires it for escaped (by a
10620 * backslash) paired metacharacters {} [] and () when the delimiters are
10621 * those same characters, and the backslash is ineffective. This doesn't
10622 * happen for <>, as they aren't metas. */
10623 if (deprecate_escaped_meta
10624 && (PL_multi_open == PL_multi_close
10625 || PL_multi_open == '<'
10626 || ! ckWARN_d(WARN_DEPRECATED)))
10628 deprecate_escaped_meta = FALSE;
10631 /* create a new SV to hold the contents. 79 is the SV's initial length.
10632 What a random number. */
10633 sv = newSV_type(SVt_PVIV);
10635 SvIV_set(sv, termcode);
10636 (void)SvPOK_only(sv); /* validate pointer */
10638 /* move past delimiter and try to read a complete string */
10640 sv_catpvn(sv, s, termlen);
10643 tstart = SvPVX(PL_linestr) + stuffstart;
10644 if (PL_madskills && !PL_thisopen && !keep_delims) {
10645 PL_thisopen = newSVpvn(tstart, s - tstart);
10646 stuffstart = s - SvPVX(PL_linestr);
10650 if (PL_encoding && !UTF && !re_reparse) {
10654 int offset = s - SvPVX_const(PL_linestr);
10655 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
10656 &offset, (char*)termstr, termlen);
10660 if (SvIsCOW(PL_linestr)) {
10661 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos;
10662 STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos;
10663 STRLEN last_lop_pos, re_eval_start_pos, s_pos;
10664 char *buf = SvPVX(PL_linestr);
10665 bufend_pos = PL_parser->bufend - buf;
10666 bufptr_pos = PL_parser->bufptr - buf;
10667 oldbufptr_pos = PL_parser->oldbufptr - buf;
10668 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
10669 linestart_pos = PL_parser->linestart - buf;
10670 last_uni_pos = PL_parser->last_uni
10671 ? PL_parser->last_uni - buf
10673 last_lop_pos = PL_parser->last_lop
10674 ? PL_parser->last_lop - buf
10676 re_eval_start_pos =
10677 PL_parser->lex_shared->re_eval_start ?
10678 PL_parser->lex_shared->re_eval_start - buf : 0;
10681 sv_force_normal(PL_linestr);
10683 buf = SvPVX(PL_linestr);
10684 PL_parser->bufend = buf + bufend_pos;
10685 PL_parser->bufptr = buf + bufptr_pos;
10686 PL_parser->oldbufptr = buf + oldbufptr_pos;
10687 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
10688 PL_parser->linestart = buf + linestart_pos;
10689 if (PL_parser->last_uni)
10690 PL_parser->last_uni = buf + last_uni_pos;
10691 if (PL_parser->last_lop)
10692 PL_parser->last_lop = buf + last_lop_pos;
10693 if (PL_parser->lex_shared->re_eval_start)
10694 PL_parser->lex_shared->re_eval_start =
10695 buf + re_eval_start_pos;
10698 ns = SvPVX_const(PL_linestr) + offset;
10699 svlast = SvEND(sv) - 1;
10701 for (; s < ns; s++) {
10702 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10703 COPLINE_INC_WITH_HERELINES;
10706 goto read_more_line;
10708 /* handle quoted delimiters */
10709 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10711 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10713 if ((svlast-1 - t) % 2) {
10714 if (!keep_quoted) {
10715 *(svlast-1) = term;
10717 SvCUR_set(sv, SvCUR(sv) - 1);
10722 if (PL_multi_open == PL_multi_close) {
10728 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
10729 /* At here, all closes are "was quoted" one,
10730 so we don't check PL_multi_close. */
10732 if (!keep_quoted && *(t+1) == PL_multi_open)
10737 else if (*t == PL_multi_open)
10745 SvCUR_set(sv, w - SvPVX_const(sv));
10747 last_off = w - SvPVX(sv);
10748 if (--brackets <= 0)
10753 if (!keep_delims) {
10754 SvCUR_set(sv, SvCUR(sv) - 1);
10760 /* extend sv if need be */
10761 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10762 /* set 'to' to the next character in the sv's string */
10763 to = SvPVX(sv)+SvCUR(sv);
10765 /* if open delimiter is the close delimiter read unbridle */
10766 if (PL_multi_open == PL_multi_close) {
10767 for (; s < PL_bufend; s++,to++) {
10768 /* embedded newlines increment the current line number */
10769 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10770 COPLINE_INC_WITH_HERELINES;
10771 /* handle quoted delimiters */
10772 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10775 || (re_reparse && s[1] == '\\'))
10778 /* any other quotes are simply copied straight through */
10782 /* terminate when run out of buffer (the for() condition), or
10783 have found the terminator */
10784 else if (*s == term) {
10787 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10790 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10796 /* if the terminator isn't the same as the start character (e.g.,
10797 matched brackets), we have to allow more in the quoting, and
10798 be prepared for nested brackets.
10801 /* read until we run out of string, or we find the terminator */
10802 for (; s < PL_bufend; s++,to++) {
10803 /* embedded newlines increment the line count */
10804 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10805 COPLINE_INC_WITH_HERELINES;
10806 /* backslashes can escape the open or closing characters */
10807 if (*s == '\\' && s+1 < PL_bufend) {
10808 if (!keep_quoted &&
10809 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10813 /* Here, 'deprecate_escaped_meta' is true iff the
10814 * delimiters are paired metacharacters, and 's' points
10815 * to an occurrence of one of them within the string,
10816 * which was preceded by a backslash. If this is a
10817 * context where the delimiter is also a metacharacter,
10818 * the backslash is useless, and deprecated. () and []
10819 * are meta in any context. {} are meta only when
10820 * appearing in a quantifier or in things like '\p{'
10821 * (but '\\p{' isn't meta). They also aren't meta
10822 * unless there is a matching closed, escaped char
10823 * later on within the string. If 's' points to an
10824 * open, set a flag; if to a close, test that flag, and
10825 * raise a warning if it was set */
10827 if (deprecate_escaped_meta) {
10828 if (*s == PL_multi_open) {
10832 /* Look for a closing '\}' */
10833 else if (regcurly(s, TRUE)) {
10836 /* Look for e.g. '\x{' */
10837 else if (s - start > 2
10838 && _generic_isCC(*(s-2),
10839 _CC_BACKSLASH_FOO_LBRACE_IS_META))
10840 { /* Exclude '\\x', '\\\\x', etc. */
10841 char *lookbehind = s - 4;
10842 bool is_meta = TRUE;
10843 while (lookbehind >= start
10844 && *lookbehind == '\\')
10846 is_meta = ! is_meta;
10854 else if (escaped_open) {
10855 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
10856 "Useless use of '\\'; doesn't escape metacharacter '%c'", PL_multi_open);
10857 escaped_open = NULL;
10864 /* allow nested opens and closes */
10865 else if (*s == PL_multi_close && --brackets <= 0)
10867 else if (*s == PL_multi_open)
10869 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10874 /* terminate the copied string and update the sv's end-of-string */
10876 SvCUR_set(sv, to - SvPVX_const(sv));
10879 * this next chunk reads more into the buffer if we're not done yet
10883 break; /* handle case where we are done yet :-) */
10885 #ifndef PERL_STRICT_CR
10886 if (to - SvPVX_const(sv) >= 2) {
10887 if ((to[-2] == '\r' && to[-1] == '\n') ||
10888 (to[-2] == '\n' && to[-1] == '\r'))
10892 SvCUR_set(sv, to - SvPVX_const(sv));
10894 else if (to[-1] == '\r')
10897 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10902 /* if we're out of file, or a read fails, bail and reset the current
10903 line marker so we can report where the unterminated string began
10906 if (PL_madskills) {
10907 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10909 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10911 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10914 COPLINE_INC_WITH_HERELINES;
10915 PL_bufptr = PL_bufend;
10916 if (!lex_next_chunk(0)) {
10918 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10927 /* at this point, we have successfully read the delimited string */
10929 if (!PL_encoding || UTF || re_reparse) {
10931 if (PL_madskills) {
10932 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10933 const int len = s - tstart;
10935 sv_catpvn(PL_thisstuff, tstart, len);
10937 PL_thisstuff = newSVpvn(tstart, len);
10938 if (!PL_thisclose && !keep_delims)
10939 PL_thisclose = newSVpvn(s,termlen);
10944 sv_catpvn(sv, s, termlen);
10949 if (PL_madskills) {
10950 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10951 const int len = s - tstart - termlen;
10953 sv_catpvn(PL_thisstuff, tstart, len);
10955 PL_thisstuff = newSVpvn(tstart, len);
10956 if (!PL_thisclose && !keep_delims)
10957 PL_thisclose = newSVpvn(s - termlen,termlen);
10961 if (has_utf8 || (PL_encoding && !re_reparse))
10964 PL_multi_end = CopLINE(PL_curcop);
10965 CopLINE_set(PL_curcop, PL_multi_start);
10966 PL_parser->herelines = herelines;
10968 /* if we allocated too much space, give some back */
10969 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10970 SvLEN_set(sv, SvCUR(sv) + 1);
10971 SvPV_renew(sv, SvLEN(sv));
10974 /* decide whether this is the first or second quoted string we've read
10979 PL_sublex_info.repl = sv;
10982 if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
10988 takes: pointer to position in buffer
10989 returns: pointer to new position in buffer
10990 side-effects: builds ops for the constant in pl_yylval.op
10992 Read a number in any of the formats that Perl accepts:
10994 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10995 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
10998 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
11000 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
11003 If it reads a number without a decimal point or an exponent, it will
11004 try converting the number to an integer and see if it can do so
11005 without loss of precision.
11009 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
11012 const char *s = start; /* current position in buffer */
11013 char *d; /* destination in temp buffer */
11014 char *e; /* end of temp buffer */
11015 NV nv; /* number read, as a double */
11016 SV *sv = NULL; /* place to put the converted number */
11017 bool floatit; /* boolean: int or float? */
11018 const char *lastub = NULL; /* position of last underbar */
11019 static const char* const number_too_long = "Number too long";
11021 PERL_ARGS_ASSERT_SCAN_NUM;
11023 /* We use the first character to decide what type of number this is */
11027 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
11029 /* if it starts with a 0, it could be an octal number, a decimal in
11030 0.13 disguise, or a hexadecimal number, or a binary number. */
11034 u holds the "number so far"
11035 shift the power of 2 of the base
11036 (hex == 4, octal == 3, binary == 1)
11037 overflowed was the number more than we can hold?
11039 Shift is used when we add a digit. It also serves as an "are
11040 we in octal/hex/binary?" indicator to disallow hex characters
11041 when in octal mode.
11046 bool overflowed = FALSE;
11047 bool just_zero = TRUE; /* just plain 0 or binary number? */
11048 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11049 static const char* const bases[5] =
11050 { "", "binary", "", "octal", "hexadecimal" };
11051 static const char* const Bases[5] =
11052 { "", "Binary", "", "Octal", "Hexadecimal" };
11053 static const char* const maxima[5] =
11055 "0b11111111111111111111111111111111",
11059 const char *base, *Base, *max;
11061 /* check for hex */
11062 if (s[1] == 'x' || s[1] == 'X') {
11066 } else if (s[1] == 'b' || s[1] == 'B') {
11071 /* check for a decimal in disguise */
11072 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
11074 /* so it must be octal */
11081 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11082 "Misplaced _ in number");
11086 base = bases[shift];
11087 Base = Bases[shift];
11088 max = maxima[shift];
11090 /* read the rest of the number */
11092 /* x is used in the overflow test,
11093 b is the digit we're adding on. */
11098 /* if we don't mention it, we're done */
11102 /* _ are ignored -- but warned about if consecutive */
11104 if (lastub && s == lastub + 1)
11105 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11106 "Misplaced _ in number");
11110 /* 8 and 9 are not octal */
11111 case '8': case '9':
11113 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11117 case '2': case '3': case '4':
11118 case '5': case '6': case '7':
11120 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11123 case '0': case '1':
11124 b = *s++ & 15; /* ASCII digit -> value of digit */
11128 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11129 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11130 /* make sure they said 0x */
11133 b = (*s++ & 7) + 9;
11135 /* Prepare to put the digit we have onto the end
11136 of the number so far. We check for overflows.
11142 x = u << shift; /* make room for the digit */
11144 if ((x >> shift) != u
11145 && !(PL_hints & HINT_NEW_BINARY)) {
11148 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11149 "Integer overflow in %s number",
11152 u = x | b; /* add the digit to the end */
11155 n *= nvshift[shift];
11156 /* If an NV has not enough bits in its
11157 * mantissa to represent an UV this summing of
11158 * small low-order numbers is a waste of time
11159 * (because the NV cannot preserve the
11160 * low-order bits anyway): we could just
11161 * remember when did we overflow and in the
11162 * end just multiply n by the right
11170 /* if we get here, we had success: make a scalar value from
11175 /* final misplaced underbar check */
11176 if (s[-1] == '_') {
11177 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11181 if (n > 4294967295.0)
11182 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11183 "%s number > %s non-portable",
11189 if (u > 0xffffffff)
11190 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11191 "%s number > %s non-portable",
11196 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
11197 sv = new_constant(start, s - start, "integer",
11198 sv, NULL, NULL, 0);
11199 else if (PL_hints & HINT_NEW_BINARY)
11200 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
11205 handle decimal numbers.
11206 we're also sent here when we read a 0 as the first digit
11208 case '1': case '2': case '3': case '4': case '5':
11209 case '6': case '7': case '8': case '9': case '.':
11212 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
11215 /* read next group of digits and _ and copy into d */
11216 while (isDIGIT(*s) || *s == '_') {
11217 /* skip underscores, checking for misplaced ones
11221 if (lastub && s == lastub + 1)
11222 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11223 "Misplaced _ in number");
11227 /* check for end of fixed-length buffer */
11229 Perl_croak(aTHX_ "%s", number_too_long);
11230 /* if we're ok, copy the character */
11235 /* final misplaced underbar check */
11236 if (lastub && s == lastub + 1) {
11237 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11240 /* read a decimal portion if there is one. avoid
11241 3..5 being interpreted as the number 3. followed
11244 if (*s == '.' && s[1] != '.') {
11249 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11250 "Misplaced _ in number");
11254 /* copy, ignoring underbars, until we run out of digits.
11256 for (; isDIGIT(*s) || *s == '_'; s++) {
11257 /* fixed length buffer check */
11259 Perl_croak(aTHX_ "%s", number_too_long);
11261 if (lastub && s == lastub + 1)
11262 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11263 "Misplaced _ in number");
11269 /* fractional part ending in underbar? */
11270 if (s[-1] == '_') {
11271 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11272 "Misplaced _ in number");
11274 if (*s == '.' && isDIGIT(s[1])) {
11275 /* oops, it's really a v-string, but without the "v" */
11281 /* read exponent part, if present */
11282 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
11286 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
11287 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
11289 /* stray preinitial _ */
11291 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11292 "Misplaced _ in number");
11296 /* allow positive or negative exponent */
11297 if (*s == '+' || *s == '-')
11300 /* stray initial _ */
11302 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11303 "Misplaced _ in number");
11307 /* read digits of exponent */
11308 while (isDIGIT(*s) || *s == '_') {
11311 Perl_croak(aTHX_ "%s", number_too_long);
11315 if (((lastub && s == lastub + 1) ||
11316 (!isDIGIT(s[1]) && s[1] != '_')))
11317 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11318 "Misplaced _ in number");
11326 We try to do an integer conversion first if no characters
11327 indicating "float" have been found.
11332 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
11334 if (flags == IS_NUMBER_IN_UV) {
11336 sv = newSViv(uv); /* Prefer IVs over UVs. */
11339 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11340 if (uv <= (UV) IV_MIN)
11341 sv = newSViv(-(IV)uv);
11348 STORE_NUMERIC_LOCAL_SET_STANDARD();
11349 /* terminate the string */
11351 nv = Atof(PL_tokenbuf);
11352 RESTORE_NUMERIC_LOCAL();
11357 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
11358 const char *const key = floatit ? "float" : "integer";
11359 const STRLEN keylen = floatit ? 5 : 7;
11360 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
11361 key, keylen, sv, NULL, NULL, 0);
11365 /* if it starts with a v, it could be a v-string */
11368 sv = newSV(5); /* preallocate storage space */
11369 ENTER_with_name("scan_vstring");
11371 s = scan_vstring(s, PL_bufend, sv);
11372 SvREFCNT_inc_simple_void_NN(sv);
11373 LEAVE_with_name("scan_vstring");
11377 /* make the op for the constant and return */
11380 lvalp->opval = newSVOP(OP_CONST, 0, sv);
11382 lvalp->opval = NULL;
11388 S_scan_formline(pTHX_ char *s)
11393 SV * const stuff = newSVpvs("");
11394 bool needargs = FALSE;
11395 bool eofmt = FALSE;
11397 char *tokenstart = s;
11398 SV* savewhite = NULL;
11400 if (PL_madskills) {
11401 savewhite = PL_thiswhite;
11406 PERL_ARGS_ASSERT_SCAN_FORMLINE;
11408 while (!needargs) {
11411 #ifdef PERL_STRICT_CR
11412 while (SPACE_OR_TAB(*t))
11415 while (SPACE_OR_TAB(*t) || *t == '\r')
11418 if (*t == '\n' || t == PL_bufend) {
11423 eol = (char *) memchr(s,'\n',PL_bufend-s);
11427 for (t = s; t < eol; t++) {
11428 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
11430 goto enough; /* ~~ must be first line in formline */
11432 if (*t == '@' || *t == '^')
11436 sv_catpvn(stuff, s, eol-s);
11437 #ifndef PERL_STRICT_CR
11438 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
11439 char *end = SvPVX(stuff) + SvCUR(stuff);
11442 SvCUR_set(stuff, SvCUR(stuff) - 1);
11450 if ((PL_rsfp || PL_parser->filtered)
11451 && PL_parser->form_lex_state == LEX_NORMAL) {
11454 if (PL_madskills) {
11456 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
11458 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
11461 PL_bufptr = PL_bufend;
11462 COPLINE_INC_WITH_HERELINES;
11463 got_some = lex_next_chunk(0);
11464 CopLINE_dec(PL_curcop);
11467 tokenstart = PL_bufptr;
11475 if (!SvCUR(stuff) || needargs)
11476 PL_lex_state = PL_parser->form_lex_state;
11477 if (SvCUR(stuff)) {
11478 PL_expect = XSTATE;
11480 const char *s2 = s;
11481 while (*s2 == '\r' || *s2 == ' ' || *s2 == '\t' || *s2 == '\f'
11485 start_force(PL_curforce);
11486 PL_expect = XTERMBLOCK;
11487 NEXTVAL_NEXTTOKE.ival = 0;
11490 start_force(PL_curforce);
11491 NEXTVAL_NEXTTOKE.ival = 0;
11492 force_next(FORMLBRACK);
11495 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
11497 else if (PL_encoding)
11498 sv_recode_to_utf8(stuff, PL_encoding);
11500 start_force(PL_curforce);
11501 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
11505 SvREFCNT_dec(stuff);
11507 PL_lex_formbrack = 0;
11510 if (PL_madskills) {
11512 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
11514 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
11515 PL_thiswhite = savewhite;
11522 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
11525 const I32 oldsavestack_ix = PL_savestack_ix;
11526 CV* const outsidecv = PL_compcv;
11528 SAVEI32(PL_subline);
11529 save_item(PL_subname);
11530 SAVESPTR(PL_compcv);
11532 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
11533 CvFLAGS(PL_compcv) |= flags;
11535 PL_subline = CopLINE(PL_curcop);
11536 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
11537 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
11538 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
11539 if (outsidecv && CvPADLIST(outsidecv))
11540 CvPADLIST(PL_compcv)->xpadl_outid =
11541 PadlistNAMES(CvPADLIST(outsidecv));
11543 return oldsavestack_ix;
11547 S_yywarn(pTHX_ const char *const s, U32 flags)
11551 PERL_ARGS_ASSERT_YYWARN;
11553 PL_in_eval |= EVAL_WARNONLY;
11554 yyerror_pv(s, flags);
11555 PL_in_eval &= ~EVAL_WARNONLY;
11560 Perl_yyerror(pTHX_ const char *const s)
11562 PERL_ARGS_ASSERT_YYERROR;
11563 return yyerror_pvn(s, strlen(s), 0);
11567 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11569 PERL_ARGS_ASSERT_YYERROR_PV;
11570 return yyerror_pvn(s, strlen(s), flags);
11574 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11577 const char *context = NULL;
11580 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
11581 int yychar = PL_parser->yychar;
11583 PERL_ARGS_ASSERT_YYERROR_PVN;
11585 if (!yychar || (yychar == ';' && !PL_rsfp))
11586 sv_catpvs(where_sv, "at EOF");
11587 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
11588 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
11589 PL_oldbufptr != PL_bufptr) {
11592 The code below is removed for NetWare because it abends/crashes on NetWare
11593 when the script has error such as not having the closing quotes like:
11594 if ($var eq "value)
11595 Checking of white spaces is anyway done in NetWare code.
11598 while (isSPACE(*PL_oldoldbufptr))
11601 context = PL_oldoldbufptr;
11602 contlen = PL_bufptr - PL_oldoldbufptr;
11604 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
11605 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
11608 The code below is removed for NetWare because it abends/crashes on NetWare
11609 when the script has error such as not having the closing quotes like:
11610 if ($var eq "value)
11611 Checking of white spaces is anyway done in NetWare code.
11614 while (isSPACE(*PL_oldbufptr))
11617 context = PL_oldbufptr;
11618 contlen = PL_bufptr - PL_oldbufptr;
11620 else if (yychar > 255)
11621 sv_catpvs(where_sv, "next token ???");
11622 else if (yychar == -2) { /* YYEMPTY */
11623 if (PL_lex_state == LEX_NORMAL ||
11624 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
11625 sv_catpvs(where_sv, "at end of line");
11626 else if (PL_lex_inpat)
11627 sv_catpvs(where_sv, "within pattern");
11629 sv_catpvs(where_sv, "within string");
11632 sv_catpvs(where_sv, "next char ");
11634 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11635 else if (isPRINT_LC(yychar)) {
11636 const char string = yychar;
11637 sv_catpvn(where_sv, &string, 1);
11640 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11642 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
11643 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
11644 OutCopFILE(PL_curcop),
11645 (IV)(PL_parser->preambling == NOLINE
11646 ? CopLINE(PL_curcop)
11647 : PL_parser->preambling));
11649 Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
11650 UTF8fARG(UTF, contlen, context));
11652 Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
11653 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11654 Perl_sv_catpvf(aTHX_ msg,
11655 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
11656 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11659 if (PL_in_eval & EVAL_WARNONLY) {
11660 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
11664 if (PL_error_count >= 10) {
11666 if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
11667 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11668 SVfARG(errsv), OutCopFILE(PL_curcop));
11670 Perl_croak(aTHX_ "%s has too many errors.\n",
11671 OutCopFILE(PL_curcop));
11674 PL_in_my_stash = NULL;
11679 S_swallow_bom(pTHX_ U8 *s)
11682 const STRLEN slen = SvCUR(PL_linestr);
11684 PERL_ARGS_ASSERT_SWALLOW_BOM;
11688 if (s[1] == 0xFE) {
11689 /* UTF-16 little-endian? (or UTF-32LE?) */
11690 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
11691 /* diag_listed_as: Unsupported script encoding %s */
11692 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11693 #ifndef PERL_NO_UTF16_FILTER
11694 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11696 if (PL_bufend > (char*)s) {
11697 s = add_utf16_textfilter(s, TRUE);
11700 /* diag_listed_as: Unsupported script encoding %s */
11701 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11706 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
11707 #ifndef PERL_NO_UTF16_FILTER
11708 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11710 if (PL_bufend > (char *)s) {
11711 s = add_utf16_textfilter(s, FALSE);
11714 /* diag_listed_as: Unsupported script encoding %s */
11715 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11719 case BOM_UTF8_FIRST_BYTE: {
11720 const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
11721 if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
11722 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11723 s += len + 1; /* UTF-8 */
11730 if (s[2] == 0xFE && s[3] == 0xFF) {
11731 /* UTF-32 big-endian */
11732 /* diag_listed_as: Unsupported script encoding %s */
11733 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11736 else if (s[2] == 0 && s[3] != 0) {
11739 * are a good indicator of UTF-16BE. */
11740 #ifndef PERL_NO_UTF16_FILTER
11741 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11742 s = add_utf16_textfilter(s, FALSE);
11744 /* diag_listed_as: Unsupported script encoding %s */
11745 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11752 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11755 * are a good indicator of UTF-16LE. */
11756 #ifndef PERL_NO_UTF16_FILTER
11757 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11758 s = add_utf16_textfilter(s, TRUE);
11760 /* diag_listed_as: Unsupported script encoding %s */
11761 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11769 #ifndef PERL_NO_UTF16_FILTER
11771 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11774 SV *const filter = FILTER_DATA(idx);
11775 /* We re-use this each time round, throwing the contents away before we
11777 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11778 SV *const utf8_buffer = filter;
11779 IV status = IoPAGE(filter);
11780 const bool reverse = cBOOL(IoLINES(filter));
11783 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11785 /* As we're automatically added, at the lowest level, and hence only called
11786 from this file, we can be sure that we're not called in block mode. Hence
11787 don't bother writing code to deal with block mode. */
11789 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11792 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11794 DEBUG_P(PerlIO_printf(Perl_debug_log,
11795 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11796 FPTR2DPTR(void *, S_utf16_textfilter),
11797 reverse ? 'l' : 'b', idx, maxlen, status,
11798 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11805 /* First, look in our buffer of existing UTF-8 data: */
11806 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11810 } else if (status == 0) {
11812 IoPAGE(filter) = 0;
11813 nl = SvEND(utf8_buffer);
11816 STRLEN got = nl - SvPVX(utf8_buffer);
11817 /* Did we have anything to append? */
11819 sv_catpvn(sv, SvPVX(utf8_buffer), got);
11820 /* Everything else in this code works just fine if SVp_POK isn't
11821 set. This, however, needs it, and we need it to work, else
11822 we loop infinitely because the buffer is never consumed. */
11823 sv_chop(utf8_buffer, nl);
11827 /* OK, not a complete line there, so need to read some more UTF-16.
11828 Read an extra octect if the buffer currently has an odd number. */
11832 if (SvCUR(utf16_buffer) >= 2) {
11833 /* Location of the high octet of the last complete code point.
11834 Gosh, UTF-16 is a pain. All the benefits of variable length,
11835 *coupled* with all the benefits of partial reads and
11837 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11838 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11840 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11844 /* We have the first half of a surrogate. Read more. */
11845 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11848 status = FILTER_READ(idx + 1, utf16_buffer,
11849 160 + (SvCUR(utf16_buffer) & 1));
11850 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
11851 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11854 IoPAGE(filter) = status;
11859 chars = SvCUR(utf16_buffer) >> 1;
11860 have = SvCUR(utf8_buffer);
11861 SvGROW(utf8_buffer, have + chars * 3 + 1);
11864 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11865 (U8*)SvPVX_const(utf8_buffer) + have,
11866 chars * 2, &newlen);
11868 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11869 (U8*)SvPVX_const(utf8_buffer) + have,
11870 chars * 2, &newlen);
11872 SvCUR_set(utf8_buffer, have + newlen);
11875 /* No need to keep this SV "well-formed" with a '\0' after the end, as
11876 it's private to us, and utf16_to_utf8{,reversed} take a
11877 (pointer,length) pair, rather than a NUL-terminated string. */
11878 if(SvCUR(utf16_buffer) & 1) {
11879 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11880 SvCUR_set(utf16_buffer, 1);
11882 SvCUR_set(utf16_buffer, 0);
11885 DEBUG_P(PerlIO_printf(Perl_debug_log,
11886 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11888 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11889 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11894 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11896 SV *filter = filter_add(S_utf16_textfilter, NULL);
11898 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11900 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11901 sv_setpvs(filter, "");
11902 IoLINES(filter) = reversed;
11903 IoPAGE(filter) = 1; /* Not EOF */
11905 /* Sadly, we have to return a valid pointer, come what may, so we have to
11906 ignore any error return from this. */
11907 SvCUR_set(PL_linestr, 0);
11908 if (FILTER_READ(0, PL_linestr, 0)) {
11909 SvUTF8_on(PL_linestr);
11911 SvUTF8_on(PL_linestr);
11913 PL_bufend = SvEND(PL_linestr);
11914 return (U8*)SvPVX(PL_linestr);
11919 Returns a pointer to the next character after the parsed
11920 vstring, as well as updating the passed in sv.
11922 Function must be called like
11924 sv = sv_2mortal(newSV(5));
11925 s = scan_vstring(s,e,sv);
11927 where s and e are the start and end of the string.
11928 The sv should already be large enough to store the vstring
11929 passed in, for performance reasons.
11931 This function may croak if fatal warnings are enabled in the
11932 calling scope, hence the sv_2mortal in the example (to prevent
11933 a leak). Make sure to do SvREFCNT_inc afterwards if you use
11939 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11942 const char *pos = s;
11943 const char *start = s;
11945 PERL_ARGS_ASSERT_SCAN_VSTRING;
11947 if (*pos == 'v') pos++; /* get past 'v' */
11948 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11950 if ( *pos != '.') {
11951 /* this may not be a v-string if followed by => */
11952 const char *next = pos;
11953 while (next < e && isSPACE(*next))
11955 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11956 /* return string not v-string */
11957 sv_setpvn(sv,(char *)s,pos-s);
11958 return (char *)pos;
11962 if (!isALPHA(*pos)) {
11963 U8 tmpbuf[UTF8_MAXBYTES+1];
11966 s++; /* get past 'v' */
11971 /* this is atoi() that tolerates underscores */
11974 const char *end = pos;
11976 while (--end >= s) {
11978 const UV orev = rev;
11979 rev += (*end - '0') * mult;
11982 /* diag_listed_as: Integer overflow in %s number */
11983 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11984 "Integer overflow in decimal number");
11988 if (rev > 0x7FFFFFFF)
11989 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11991 /* Append native character for the rev point */
11992 tmpend = uvchr_to_utf8(tmpbuf, rev);
11993 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11994 if (!UVCHR_IS_INVARIANT(rev))
11996 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
12002 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12006 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12013 Perl_keyword_plugin_standard(pTHX_
12014 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
12016 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
12017 PERL_UNUSED_CONTEXT;
12018 PERL_UNUSED_ARG(keyword_ptr);
12019 PERL_UNUSED_ARG(keyword_len);
12020 PERL_UNUSED_ARG(op_ptr);
12021 return KEYWORD_PLUGIN_DECLINE;
12024 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
12026 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
12028 SAVEI32(PL_lex_brackets);
12029 if (PL_lex_brackets > 100)
12030 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
12031 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
12032 SAVEI32(PL_lex_allbrackets);
12033 PL_lex_allbrackets = 0;
12034 SAVEI8(PL_lex_fakeeof);
12035 PL_lex_fakeeof = (U8)fakeeof;
12036 if(yyparse(gramtype) && !PL_parser->error_count)
12037 qerror(Perl_mess(aTHX_ "Parse error"));
12040 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
12042 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
12046 SAVEVPTR(PL_eval_root);
12047 PL_eval_root = NULL;
12048 parse_recdescent(gramtype, fakeeof);
12054 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
12056 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
12059 if (flags & ~PARSE_OPTIONAL)
12060 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
12061 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
12062 if (!exprop && !(flags & PARSE_OPTIONAL)) {
12063 if (!PL_parser->error_count)
12064 qerror(Perl_mess(aTHX_ "Parse error"));
12065 exprop = newOP(OP_NULL, 0);
12071 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
12073 Parse a Perl arithmetic expression. This may contain operators of precedence
12074 down to the bit shift operators. The expression must be followed (and thus
12075 terminated) either by a comparison or lower-precedence operator or by
12076 something that would normally terminate an expression such as semicolon.
12077 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12078 otherwise it is mandatory. It is up to the caller to ensure that the
12079 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12080 the source of the code to be parsed and the lexical context for the
12083 The op tree representing the expression is returned. If an optional
12084 expression is absent, a null pointer is returned, otherwise the pointer
12087 If an error occurs in parsing or compilation, in most cases a valid op
12088 tree is returned anyway. The error is reflected in the parser state,
12089 normally resulting in a single exception at the top level of parsing
12090 which covers all the compilation errors that occurred. Some compilation
12091 errors, however, will throw an exception immediately.
12097 Perl_parse_arithexpr(pTHX_ U32 flags)
12099 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
12103 =for apidoc Amx|OP *|parse_termexpr|U32 flags
12105 Parse a Perl term expression. This may contain operators of precedence
12106 down to the assignment operators. The expression must be followed (and thus
12107 terminated) either by a comma or lower-precedence operator or by
12108 something that would normally terminate an expression such as semicolon.
12109 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12110 otherwise it is mandatory. It is up to the caller to ensure that the
12111 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12112 the source of the code to be parsed and the lexical context for the
12115 The op tree representing the expression is returned. If an optional
12116 expression is absent, a null pointer is returned, otherwise the pointer
12119 If an error occurs in parsing or compilation, in most cases a valid op
12120 tree is returned anyway. The error is reflected in the parser state,
12121 normally resulting in a single exception at the top level of parsing
12122 which covers all the compilation errors that occurred. Some compilation
12123 errors, however, will throw an exception immediately.
12129 Perl_parse_termexpr(pTHX_ U32 flags)
12131 return parse_expr(LEX_FAKEEOF_COMMA, flags);
12135 =for apidoc Amx|OP *|parse_listexpr|U32 flags
12137 Parse a Perl list expression. This may contain operators of precedence
12138 down to the comma operator. The expression must be followed (and thus
12139 terminated) either by a low-precedence logic operator such as C<or> or by
12140 something that would normally terminate an expression such as semicolon.
12141 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12142 otherwise it is mandatory. It is up to the caller to ensure that the
12143 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12144 the source of the code to be parsed and the lexical context for the
12147 The op tree representing the expression is returned. If an optional
12148 expression is absent, a null pointer is returned, otherwise the pointer
12151 If an error occurs in parsing or compilation, in most cases a valid op
12152 tree is returned anyway. The error is reflected in the parser state,
12153 normally resulting in a single exception at the top level of parsing
12154 which covers all the compilation errors that occurred. Some compilation
12155 errors, however, will throw an exception immediately.
12161 Perl_parse_listexpr(pTHX_ U32 flags)
12163 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
12167 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
12169 Parse a single complete Perl expression. This allows the full
12170 expression grammar, including the lowest-precedence operators such
12171 as C<or>. The expression must be followed (and thus terminated) by a
12172 token that an expression would normally be terminated by: end-of-file,
12173 closing bracketing punctuation, semicolon, or one of the keywords that
12174 signals a postfix expression-statement modifier. If I<flags> includes
12175 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
12176 mandatory. It is up to the caller to ensure that the dynamic parser
12177 state (L</PL_parser> et al) is correctly set to reflect the source of
12178 the code to be parsed and the lexical context for the expression.
12180 The op tree representing the expression is returned. If an optional
12181 expression is absent, a null pointer is returned, otherwise the pointer
12184 If an error occurs in parsing or compilation, in most cases a valid op
12185 tree is returned anyway. The error is reflected in the parser state,
12186 normally resulting in a single exception at the top level of parsing
12187 which covers all the compilation errors that occurred. Some compilation
12188 errors, however, will throw an exception immediately.
12194 Perl_parse_fullexpr(pTHX_ U32 flags)
12196 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
12200 =for apidoc Amx|OP *|parse_block|U32 flags
12202 Parse a single complete Perl code block. This consists of an opening
12203 brace, a sequence of statements, and a closing brace. The block
12204 constitutes a lexical scope, so C<my> variables and various compile-time
12205 effects can be contained within it. It is up to the caller to ensure
12206 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12207 reflect the source of the code to be parsed and the lexical context for
12210 The op tree representing the code block is returned. This is always a
12211 real op, never a null pointer. It will normally be a C<lineseq> list,
12212 including C<nextstate> or equivalent ops. No ops to construct any kind
12213 of runtime scope are included by virtue of it being a block.
12215 If an error occurs in parsing or compilation, in most cases a valid op
12216 tree (most likely null) is returned anyway. The error is reflected in
12217 the parser state, normally resulting in a single exception at the top
12218 level of parsing which covers all the compilation errors that occurred.
12219 Some compilation errors, however, will throw an exception immediately.
12221 The I<flags> parameter is reserved for future use, and must always
12228 Perl_parse_block(pTHX_ U32 flags)
12231 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
12232 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
12236 =for apidoc Amx|OP *|parse_barestmt|U32 flags
12238 Parse a single unadorned Perl statement. This may be a normal imperative
12239 statement or a declaration that has compile-time effect. It does not
12240 include any label or other affixture. It is up to the caller to ensure
12241 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12242 reflect the source of the code to be parsed and the lexical context for
12245 The op tree representing the statement is returned. This may be a
12246 null pointer if the statement is null, for example if it was actually
12247 a subroutine definition (which has compile-time side effects). If not
12248 null, it will be ops directly implementing the statement, suitable to
12249 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
12250 equivalent op (except for those embedded in a scope contained entirely
12251 within the statement).
12253 If an error occurs in parsing or compilation, in most cases a valid op
12254 tree (most likely null) is returned anyway. The error is reflected in
12255 the parser state, normally resulting in a single exception at the top
12256 level of parsing which covers all the compilation errors that occurred.
12257 Some compilation errors, however, will throw an exception immediately.
12259 The I<flags> parameter is reserved for future use, and must always
12266 Perl_parse_barestmt(pTHX_ U32 flags)
12269 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
12270 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
12274 =for apidoc Amx|SV *|parse_label|U32 flags
12276 Parse a single label, possibly optional, of the type that may prefix a
12277 Perl statement. It is up to the caller to ensure that the dynamic parser
12278 state (L</PL_parser> et al) is correctly set to reflect the source of
12279 the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
12280 label is optional, otherwise it is mandatory.
12282 The name of the label is returned in the form of a fresh scalar. If an
12283 optional label is absent, a null pointer is returned.
12285 If an error occurs in parsing, which can only occur if the label is
12286 mandatory, a valid label is returned anyway. The error is reflected in
12287 the parser state, normally resulting in a single exception at the top
12288 level of parsing which covers all the compilation errors that occurred.
12294 Perl_parse_label(pTHX_ U32 flags)
12296 if (flags & ~PARSE_OPTIONAL)
12297 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
12298 if (PL_lex_state == LEX_KNOWNEXT) {
12299 PL_parser->yychar = yylex();
12300 if (PL_parser->yychar == LABEL) {
12301 char * const lpv = pl_yylval.pval;
12302 STRLEN llen = strlen(lpv);
12303 PL_parser->yychar = YYEMPTY;
12304 return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
12311 STRLEN wlen, bufptr_pos;
12314 if (!isIDFIRST_lazy_if(s, UTF))
12316 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
12317 if (word_takes_any_delimeter(s, wlen))
12319 bufptr_pos = s - SvPVX(PL_linestr);
12321 lex_read_space(LEX_KEEP_PREVIOUS);
12323 s = SvPVX(PL_linestr) + bufptr_pos;
12324 if (t[0] == ':' && t[1] != ':') {
12325 PL_oldoldbufptr = PL_oldbufptr;
12328 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
12332 if (flags & PARSE_OPTIONAL) {
12335 qerror(Perl_mess(aTHX_ "Parse error"));
12336 return newSVpvs("x");
12343 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
12345 Parse a single complete Perl statement. This may be a normal imperative
12346 statement or a declaration that has compile-time effect, and may include
12347 optional labels. It is up to the caller to ensure that the dynamic
12348 parser state (L</PL_parser> et al) is correctly set to reflect the source
12349 of the code to be parsed and the lexical context for the statement.
12351 The op tree representing the statement is returned. This may be a
12352 null pointer if the statement is null, for example if it was actually
12353 a subroutine definition (which has compile-time side effects). If not
12354 null, it will be the result of a L</newSTATEOP> call, normally including
12355 a C<nextstate> or equivalent op.
12357 If an error occurs in parsing or compilation, in most cases a valid op
12358 tree (most likely null) is returned anyway. The error is reflected in
12359 the parser state, normally resulting in a single exception at the top
12360 level of parsing which covers all the compilation errors that occurred.
12361 Some compilation errors, however, will throw an exception immediately.
12363 The I<flags> parameter is reserved for future use, and must always
12370 Perl_parse_fullstmt(pTHX_ U32 flags)
12373 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
12374 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
12378 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
12380 Parse a sequence of zero or more Perl statements. These may be normal
12381 imperative statements, including optional labels, or declarations
12382 that have compile-time effect, or any mixture thereof. The statement
12383 sequence ends when a closing brace or end-of-file is encountered in a
12384 place where a new statement could have validly started. It is up to
12385 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
12386 is correctly set to reflect the source of the code to be parsed and the
12387 lexical context for the statements.
12389 The op tree representing the statement sequence is returned. This may
12390 be a null pointer if the statements were all null, for example if there
12391 were no statements or if there were only subroutine definitions (which
12392 have compile-time side effects). If not null, it will be a C<lineseq>
12393 list, normally including C<nextstate> or equivalent ops.
12395 If an error occurs in parsing or compilation, in most cases a valid op
12396 tree is returned anyway. The error is reflected in the parser state,
12397 normally resulting in a single exception at the top level of parsing
12398 which covers all the compilation errors that occurred. Some compilation
12399 errors, however, will throw an exception immediately.
12401 The I<flags> parameter is reserved for future use, and must always
12408 Perl_parse_stmtseq(pTHX_ U32 flags)
12413 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
12414 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
12415 c = lex_peek_unichar(0);
12416 if (c != -1 && c != /*{*/'}')
12417 qerror(Perl_mess(aTHX_ "Parse error"));
12421 #define lex_token_boundary() S_lex_token_boundary(aTHX)
12423 S_lex_token_boundary(pTHX)
12425 PL_oldoldbufptr = PL_oldbufptr;
12426 PL_oldbufptr = PL_bufptr;
12429 #define parse_opt_lexvar() S_parse_opt_lexvar(aTHX)
12431 S_parse_opt_lexvar(pTHX)
12436 lex_token_boundary();
12437 sigil = lex_read_unichar(0);
12438 if (lex_peek_unichar(0) == '#') {
12439 qerror(Perl_mess(aTHX_ "Parse error"));
12443 c = lex_peek_unichar(0);
12444 if (c == -1 || !(UTF ? isIDFIRST_uni(c) : isIDFIRST_A(c)))
12447 d = PL_tokenbuf + 1;
12448 PL_tokenbuf[0] = (char)sigil;
12449 parse_ident(&s, &d, PL_tokenbuf + sizeof(PL_tokenbuf) - 1, 0, cBOOL(UTF));
12451 if (d == PL_tokenbuf+1)
12454 var = newOP(sigil == '$' ? OP_PADSV : sigil == '@' ? OP_PADAV : OP_PADHV,
12455 OPf_MOD | (OPpLVAL_INTRO<<8));
12456 var->op_targ = allocmy(PL_tokenbuf, d - PL_tokenbuf, UTF ? SVf_UTF8 : 0);
12461 Perl_parse_subsignature(pTHX)
12464 int prev_type = 0, pos = 0, min_arity = 0, max_arity = 0;
12465 OP *initops = NULL;
12467 c = lex_peek_unichar(0);
12468 while (c != /*(*/')') {
12472 if (prev_type == 2)
12473 qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
12474 var = parse_opt_lexvar();
12476 newBINOP(OP_AELEM, 0,
12477 ref(newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv)),
12479 newSVOP(OP_CONST, 0, newSViv(pos))) :
12482 c = lex_peek_unichar(0);
12484 lex_token_boundary();
12485 lex_read_unichar(0);
12487 c = lex_peek_unichar(0);
12488 if (c == ',' || c == /*(*/')') {
12490 qerror(Perl_mess(aTHX_ "Optional parameter "
12491 "lacks default expression"));
12493 OP *defexpr = parse_termexpr(0);
12494 if (defexpr->op_type == OP_UNDEF &&
12495 !(defexpr->op_flags & OPf_KIDS)) {
12500 scalar(newUNOP(OP_RV2AV, 0,
12501 newGVOP(OP_GV, 0, PL_defgv))),
12502 newSVOP(OP_CONST, 0, newSViv(pos+1)));
12504 newCONDOP(0, ifop, expr, defexpr) :
12505 newLOGOP(OP_OR, 0, ifop, defexpr);
12510 if (prev_type == 1)
12511 qerror(Perl_mess(aTHX_ "Mandatory parameter "
12512 "follows optional parameter"));
12514 min_arity = pos + 1;
12516 if (var) expr = newASSIGNOP(OPf_STACKED, var, 0, expr);
12518 initops = op_append_list(OP_LINESEQ, initops,
12519 newSTATEOP(0, NULL, expr));
12525 if (prev_type == 2)
12526 qerror(Perl_mess(aTHX_ "Slurpy parameter not last"));
12527 var = parse_opt_lexvar();
12529 OP *chkop = newLOGOP((pos & 1) ? OP_OR : OP_AND, 0,
12530 newBINOP(OP_BIT_AND, 0,
12531 scalar(newUNOP(OP_RV2AV, 0,
12532 newGVOP(OP_GV, 0, PL_defgv))),
12533 newSVOP(OP_CONST, 0, newSViv(1))),
12534 newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
12535 newSVOP(OP_CONST, 0,
12536 newSVpvs("Odd name/value argument "
12537 "for subroutine"))));
12538 if (pos != min_arity)
12539 chkop = newLOGOP(OP_AND, 0,
12541 scalar(newUNOP(OP_RV2AV, 0,
12542 newGVOP(OP_GV, 0, PL_defgv))),
12543 newSVOP(OP_CONST, 0, newSViv(pos))),
12545 initops = op_append_list(OP_LINESEQ,
12546 newSTATEOP(0, NULL, chkop),
12551 op_prepend_elem(OP_ASLICE,
12552 newOP(OP_PUSHMARK, 0),
12553 newLISTOP(OP_ASLICE, 0,
12555 newSVOP(OP_CONST, 0, newSViv(pos)),
12556 newUNOP(OP_AV2ARYLEN, 0,
12557 ref(newUNOP(OP_RV2AV, 0,
12558 newGVOP(OP_GV, 0, PL_defgv)),
12560 ref(newUNOP(OP_RV2AV, 0,
12561 newGVOP(OP_GV, 0, PL_defgv)),
12563 newUNOP(OP_RV2AV, 0, newGVOP(OP_GV, 0, PL_defgv));
12564 initops = op_append_list(OP_LINESEQ, initops,
12565 newSTATEOP(0, NULL,
12566 newASSIGNOP(OPf_STACKED, var, 0, slice)));
12573 qerror(Perl_mess(aTHX_ "Parse error"));
12577 c = lex_peek_unichar(0);
12579 case /*(*/')': break;
12582 lex_token_boundary();
12583 lex_read_unichar(0);
12585 c = lex_peek_unichar(0);
12586 } while (c == ',');
12592 if (min_arity != 0) {
12593 initops = op_append_list(OP_LINESEQ,
12594 newSTATEOP(0, NULL,
12597 scalar(newUNOP(OP_RV2AV, 0,
12598 newGVOP(OP_GV, 0, PL_defgv))),
12599 newSVOP(OP_CONST, 0, newSViv(min_arity))),
12600 newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
12601 newSVOP(OP_CONST, 0,
12602 newSVpvs("Too few arguments for subroutine"))))),
12605 if (max_arity != -1) {
12606 initops = op_append_list(OP_LINESEQ,
12607 newSTATEOP(0, NULL,
12610 scalar(newUNOP(OP_RV2AV, 0,
12611 newGVOP(OP_GV, 0, PL_defgv))),
12612 newSVOP(OP_CONST, 0, newSViv(max_arity))),
12613 newLISTOP(OP_DIE, 0, newOP(OP_PUSHMARK, 0),
12614 newSVOP(OP_CONST, 0,
12615 newSVpvs("Too many arguments for subroutine"))))),
12623 * c-indentation-style: bsd
12624 * c-basic-offset: 4
12625 * indent-tabs-mode: nil
12628 * ex: set ts=8 sts=4 sw=4 et: