3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 =head1 Lexer interface
27 This is the lower layer of the Perl parser, managing characters and tokens.
29 =for apidoc AmU|yy_parser *|PL_parser
31 Pointer to a structure encapsulating the state of the parsing operation
32 currently in progress. The pointer can be locally changed to perform
33 a nested parse without interfering with the state of an outer parse.
34 Individual members of C<PL_parser> have their own documentation.
40 #define PERL_IN_TOKE_C
42 #include "dquote_static.c"
44 #define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
47 #define pl_yylval (PL_parser->yylval)
49 /* XXX temporary backwards compatibility */
50 #define PL_lex_brackets (PL_parser->lex_brackets)
51 #define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52 #define PL_lex_fakeeof (PL_parser->lex_fakeeof)
53 #define PL_lex_brackstack (PL_parser->lex_brackstack)
54 #define PL_lex_casemods (PL_parser->lex_casemods)
55 #define PL_lex_casestack (PL_parser->lex_casestack)
56 #define PL_lex_defer (PL_parser->lex_defer)
57 #define PL_lex_dojoin (PL_parser->lex_dojoin)
58 #define PL_lex_expect (PL_parser->lex_expect)
59 #define PL_lex_formbrack (PL_parser->lex_formbrack)
60 #define PL_lex_inpat (PL_parser->lex_inpat)
61 #define PL_lex_inwhat (PL_parser->lex_inwhat)
62 #define PL_lex_op (PL_parser->lex_op)
63 #define PL_lex_repl (PL_parser->lex_repl)
64 #define PL_lex_starts (PL_parser->lex_starts)
65 #define PL_lex_stuff (PL_parser->lex_stuff)
66 #define PL_multi_start (PL_parser->multi_start)
67 #define PL_multi_open (PL_parser->multi_open)
68 #define PL_multi_close (PL_parser->multi_close)
69 #define PL_preambled (PL_parser->preambled)
70 #define PL_sublex_info (PL_parser->sublex_info)
71 #define PL_linestr (PL_parser->linestr)
72 #define PL_expect (PL_parser->expect)
73 #define PL_copline (PL_parser->copline)
74 #define PL_bufptr (PL_parser->bufptr)
75 #define PL_oldbufptr (PL_parser->oldbufptr)
76 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
77 #define PL_linestart (PL_parser->linestart)
78 #define PL_bufend (PL_parser->bufend)
79 #define PL_last_uni (PL_parser->last_uni)
80 #define PL_last_lop (PL_parser->last_lop)
81 #define PL_last_lop_op (PL_parser->last_lop_op)
82 #define PL_lex_state (PL_parser->lex_state)
83 #define PL_rsfp (PL_parser->rsfp)
84 #define PL_rsfp_filters (PL_parser->rsfp_filters)
85 #define PL_in_my (PL_parser->in_my)
86 #define PL_in_my_stash (PL_parser->in_my_stash)
87 #define PL_tokenbuf (PL_parser->tokenbuf)
88 #define PL_multi_end (PL_parser->multi_end)
89 #define PL_error_count (PL_parser->error_count)
92 # define PL_endwhite (PL_parser->endwhite)
93 # define PL_faketokens (PL_parser->faketokens)
94 # define PL_lasttoke (PL_parser->lasttoke)
95 # define PL_nextwhite (PL_parser->nextwhite)
96 # define PL_realtokenstart (PL_parser->realtokenstart)
97 # define PL_skipwhite (PL_parser->skipwhite)
98 # define PL_thisclose (PL_parser->thisclose)
99 # define PL_thismad (PL_parser->thismad)
100 # define PL_thisopen (PL_parser->thisopen)
101 # define PL_thisstuff (PL_parser->thisstuff)
102 # define PL_thistoken (PL_parser->thistoken)
103 # define PL_thiswhite (PL_parser->thiswhite)
104 # define PL_thiswhite (PL_parser->thiswhite)
105 # define PL_nexttoke (PL_parser->nexttoke)
106 # define PL_curforce (PL_parser->curforce)
108 # define PL_nexttoke (PL_parser->nexttoke)
109 # define PL_nexttype (PL_parser->nexttype)
110 # define PL_nextval (PL_parser->nextval)
113 static const char* const ident_too_long = "Identifier too long";
116 # define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
117 # define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
119 # define CURMAD(slot,sv)
120 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
123 #define XENUMMASK 0x3f
124 #define XFAKEEOF 0x40
125 #define XFAKEBRACK 0x80
127 #ifdef USE_UTF8_SCRIPTS
128 # define UTF (!IN_BYTES)
130 # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
133 /* The maximum number of characters preceding the unrecognized one to display */
134 #define UNRECOGNIZED_PRECEDE_COUNT 10
136 /* In variables named $^X, these are the legal values for X.
137 * 1999-02-27 mjd-perl-patch@plover.com */
138 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
140 #define SPACE_OR_TAB(c) isBLANK_A(c)
142 /* LEX_* are values for PL_lex_state, the state of the lexer.
143 * They are arranged oddly so that the guard on the switch statement
144 * can get by with a single comparison (if the compiler is smart enough).
146 * These values refer to the various states within a sublex parse,
147 * i.e. within a double quotish string
150 /* #define LEX_NOTPARSING 11 is done in perl.h. */
152 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
153 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
154 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
155 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
156 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
158 /* at end of code, eg "$x" followed by: */
159 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
160 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
162 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
163 string or after \E, $foo, etc */
164 #define LEX_INTERPCONST 2 /* NOT USED */
165 #define LEX_FORMLINE 1 /* expecting a format line */
166 #define LEX_KNOWNEXT 0 /* next token known; just return it */
170 static const char* const lex_state_names[] = {
185 #include "keywords.h"
187 /* CLINE is a macro that ensures PL_copline has a sane value */
189 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
192 # define SKIPSPACE0(s) skipspace0(s)
193 # define SKIPSPACE1(s) skipspace1(s)
194 # define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
195 # define PEEKSPACE(s) skipspace2(s,0)
197 # define SKIPSPACE0(s) skipspace(s)
198 # define SKIPSPACE1(s) skipspace(s)
199 # define SKIPSPACE2(s,tsv) skipspace(s)
200 # define PEEKSPACE(s) skipspace(s)
204 * Convenience functions to return different tokens and prime the
205 * lexer for the next token. They all take an argument.
207 * TOKEN : generic token (used for '(', DOLSHARP, etc)
208 * OPERATOR : generic operator
209 * AOPERATOR : assignment operator
210 * PREBLOCK : beginning the block after an if, while, foreach, ...
211 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
212 * PREREF : *EXPR where EXPR is not a simple identifier
213 * TERM : expression term
214 * POSTDEREF : postfix dereference (->$* ->@[...] etc.)
215 * LOOPX : loop exiting command (goto, last, dump, etc)
216 * FTST : file test operator
217 * FUN0 : zero-argument function
218 * FUN0OP : zero-argument function, with its op created in this file
219 * FUN1 : not used, except for not, which isn't a UNIOP
220 * BOop : bitwise or or xor
222 * SHop : shift operator
223 * PWop : power operator
224 * PMop : pattern-matching operator
225 * Aop : addition-level operator
226 * Mop : multiplication-level operator
227 * Eop : equality-testing operator
228 * Rop : relational operator <= != gt
230 * Also see LOP and lop() below.
233 #ifdef DEBUGGING /* Serve -DT. */
234 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
236 # define REPORT(retval) (retval)
239 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
240 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
241 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
242 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
243 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
244 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
245 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
246 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
247 #define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
248 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
249 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
250 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
251 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
252 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
253 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
254 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
255 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
256 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
257 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
258 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
259 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
260 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
262 /* This bit of chicanery makes a unary function followed by
263 * a parenthesis into a function with one argument, highest precedence.
264 * The UNIDOR macro is for unary functions that can be followed by the //
265 * operator (such as C<shift // 0>).
267 #define UNI3(f,x,have_x) { \
268 pl_yylval.ival = f; \
269 if (have_x) PL_expect = x; \
271 PL_last_uni = PL_oldbufptr; \
272 PL_last_lop_op = f; \
274 return REPORT( (int)FUNC1 ); \
276 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
278 #define UNI(f) UNI3(f,XTERM,1)
279 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
280 #define UNIPROTO(f,optional) { \
281 if (optional) PL_last_uni = PL_oldbufptr; \
285 #define UNIBRACK(f) UNI3(f,0,0)
287 /* grandfather return to old style */
290 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
291 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
292 pl_yylval.ival = (f); \
298 #define COPLINE_INC_WITH_HERELINES \
300 CopLINE_inc(PL_curcop); \
301 if (PL_parser->herelines) \
302 CopLINE(PL_curcop) += PL_parser->herelines, \
303 PL_parser->herelines = 0; \
305 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
306 * is no sublex_push to follow. */
307 #define COPLINE_SET_FROM_MULTI_END \
309 CopLINE_set(PL_curcop, PL_multi_end); \
310 if (PL_multi_end != PL_multi_start) \
311 PL_parser->herelines = 0; \
317 /* how to interpret the pl_yylval associated with the token */
321 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
326 static struct debug_tokens {
328 enum token_type type;
330 } const debug_tokens[] =
332 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
333 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
334 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
335 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
336 { ARROW, TOKENTYPE_NONE, "ARROW" },
337 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
338 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
339 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
340 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
341 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
342 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
343 { DO, TOKENTYPE_NONE, "DO" },
344 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
345 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
346 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
347 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
348 { ELSE, TOKENTYPE_NONE, "ELSE" },
349 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
350 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
351 { FOR, TOKENTYPE_IVAL, "FOR" },
352 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
353 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
354 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
355 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
356 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
357 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
358 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
359 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
360 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
361 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
362 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
363 { IF, TOKENTYPE_IVAL, "IF" },
364 { LABEL, TOKENTYPE_PVAL, "LABEL" },
365 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
366 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
367 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
368 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
369 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
370 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
371 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
372 { MY, TOKENTYPE_IVAL, "MY" },
373 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
374 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
375 { OROP, TOKENTYPE_IVAL, "OROP" },
376 { OROR, TOKENTYPE_NONE, "OROR" },
377 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
378 { PEG, TOKENTYPE_NONE, "PEG" },
379 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
380 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
381 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
382 { POSTJOIN, TOKENTYPE_NONE, "POSTJOIN" },
383 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
384 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
385 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
386 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
387 { PREINC, TOKENTYPE_NONE, "PREINC" },
388 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
389 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
390 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
391 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
392 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
393 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
394 { SUB, TOKENTYPE_NONE, "SUB" },
395 { THING, TOKENTYPE_OPVAL, "THING" },
396 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
397 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
398 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
399 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
400 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
401 { USE, TOKENTYPE_IVAL, "USE" },
402 { WHEN, TOKENTYPE_IVAL, "WHEN" },
403 { WHILE, TOKENTYPE_IVAL, "WHILE" },
404 { WORD, TOKENTYPE_OPVAL, "WORD" },
405 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
406 { 0, TOKENTYPE_NONE, NULL }
409 /* dump the returned token in rv, plus any optional arg in pl_yylval */
412 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
416 PERL_ARGS_ASSERT_TOKEREPORT;
419 const char *name = NULL;
420 enum token_type type = TOKENTYPE_NONE;
421 const struct debug_tokens *p;
422 SV* const report = newSVpvs("<== ");
424 for (p = debug_tokens; p->token; p++) {
425 if (p->token == (int)rv) {
432 Perl_sv_catpv(aTHX_ report, name);
433 else if ((char)rv > ' ' && (char)rv <= '~')
435 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
437 sv_catpvs(report, " (pending identifier)");
440 sv_catpvs(report, "EOF");
442 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
447 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
449 case TOKENTYPE_OPNUM:
450 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
451 PL_op_name[lvalp->ival]);
454 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
456 case TOKENTYPE_OPVAL:
458 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
459 PL_op_name[lvalp->opval->op_type]);
460 if (lvalp->opval->op_type == OP_CONST) {
461 Perl_sv_catpvf(aTHX_ report, " %s",
462 SvPEEK(cSVOPx_sv(lvalp->opval)));
467 sv_catpvs(report, "(opval=null)");
470 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
476 /* print the buffer with suitable escapes */
479 S_printbuf(pTHX_ const char *const fmt, const char *const s)
481 SV* const tmp = newSVpvs("");
483 PERL_ARGS_ASSERT_PRINTBUF;
485 GCC_DIAG_IGNORE(-Wformat-nonliteral); /* fmt checked by caller */
486 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
494 S_deprecate_commaless_var_list(pTHX) {
496 deprecate("comma-less variable list");
497 return REPORT(','); /* grandfather non-comma-format format */
503 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
504 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
508 S_ao(pTHX_ int toketype)
511 if (*PL_bufptr == '=') {
513 if (toketype == ANDAND)
514 pl_yylval.ival = OP_ANDASSIGN;
515 else if (toketype == OROR)
516 pl_yylval.ival = OP_ORASSIGN;
517 else if (toketype == DORDOR)
518 pl_yylval.ival = OP_DORASSIGN;
526 * When Perl expects an operator and finds something else, no_op
527 * prints the warning. It always prints "<something> found where
528 * operator expected. It prints "Missing semicolon on previous line?"
529 * if the surprise occurs at the start of the line. "do you need to
530 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
531 * where the compiler doesn't know if foo is a method call or a function.
532 * It prints "Missing operator before end of line" if there's nothing
533 * after the missing operator, or "... before <...>" if there is something
534 * after the missing operator.
538 S_no_op(pTHX_ const char *const what, char *s)
541 char * const oldbp = PL_bufptr;
542 const bool is_first = (PL_oldbufptr == PL_linestart);
544 PERL_ARGS_ASSERT_NO_OP;
550 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
551 if (ckWARN_d(WARN_SYNTAX)) {
553 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
554 "\t(Missing semicolon on previous line?)\n");
555 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
557 for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
558 t += UTF ? UTF8SKIP(t) : 1)
560 if (t < PL_bufptr && isSPACE(*t))
561 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
562 "\t(Do you need to predeclare %"UTF8f"?)\n",
563 UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
567 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
568 "\t(Missing operator before %"UTF8f"?)\n",
569 UTF8fARG(UTF, s - oldbp, oldbp));
577 * Complain about missing quote/regexp/heredoc terminator.
578 * If it's called with NULL then it cauterizes the line buffer.
579 * If we're in a delimited string and the delimiter is a control
580 * character, it's reformatted into a two-char sequence like ^C.
585 S_missingterm(pTHX_ char *s)
591 char * const nl = strrchr(s,'\n');
595 else if ((U8) PL_multi_close < 32) {
597 tmpbuf[1] = (char)toCTRL(PL_multi_close);
602 *tmpbuf = (char)PL_multi_close;
606 q = strchr(s,'"') ? '\'' : '"';
607 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
613 * Check whether the named feature is enabled.
616 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
619 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
621 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
623 assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
625 if (namelen > MAX_FEATURE_LEN)
627 memcpy(&he_name[8], name, namelen);
629 return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
630 REFCOUNTED_HE_EXISTS));
634 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
635 * utf16-to-utf8-reversed.
638 #ifdef PERL_CR_FILTER
642 const char *s = SvPVX_const(sv);
643 const char * const e = s + SvCUR(sv);
645 PERL_ARGS_ASSERT_STRIP_RETURN;
647 /* outer loop optimized to do nothing if there are no CR-LFs */
649 if (*s++ == '\r' && *s == '\n') {
650 /* hit a CR-LF, need to copy the rest */
654 if (*s == '\r' && s[1] == '\n')
665 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
667 const I32 count = FILTER_READ(idx+1, sv, maxlen);
668 if (count > 0 && !maxlen)
675 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
677 Creates and initialises a new lexer/parser state object, supplying
678 a context in which to lex and parse from a new source of Perl code.
679 A pointer to the new state object is placed in L</PL_parser>. An entry
680 is made on the save stack so that upon unwinding the new state object
681 will be destroyed and the former value of L</PL_parser> will be restored.
682 Nothing else need be done to clean up the parsing context.
684 The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
685 non-null, provides a string (in SV form) containing code to be parsed.
686 A copy of the string is made, so subsequent modification of I<line>
687 does not affect parsing. I<rsfp>, if non-null, provides an input stream
688 from which code will be read to be parsed. If both are non-null, the
689 code in I<line> comes first and must consist of complete lines of input,
690 and I<rsfp> supplies the remainder of the source.
692 The I<flags> parameter is reserved for future use. Currently it is only
693 used by perl internally, so extensions should always pass zero.
698 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
699 can share filters with the current parser.
700 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
701 caller, hence isn't owned by the parser, so shouldn't be closed on parser
702 destruction. This is used to handle the case of defaulting to reading the
703 script from the standard input because no filename was given on the command
704 line (without getting confused by situation where STDIN has been closed, so
705 the script handle is opened on fd 0) */
708 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
711 const char *s = NULL;
712 yy_parser *parser, *oparser;
713 if (flags && flags & ~LEX_START_FLAGS)
714 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
716 /* create and initialise a parser */
718 Newxz(parser, 1, yy_parser);
719 parser->old_parser = oparser = PL_parser;
722 parser->stack = NULL;
724 parser->stack_size = 0;
726 /* on scope exit, free this parser and restore any outer one */
728 parser->saved_curcop = PL_curcop;
730 /* initialise lexer state */
733 parser->curforce = -1;
735 parser->nexttoke = 0;
737 parser->error_count = oparser ? oparser->error_count : 0;
738 parser->copline = parser->preambling = NOLINE;
739 parser->lex_state = LEX_NORMAL;
740 parser->expect = XSTATE;
742 parser->rsfp_filters =
743 !(flags & LEX_START_SAME_FILTER) || !oparser
745 : MUTABLE_AV(SvREFCNT_inc(
746 oparser->rsfp_filters
747 ? oparser->rsfp_filters
748 : (oparser->rsfp_filters = newAV())
751 Newx(parser->lex_brackstack, 120, char);
752 Newx(parser->lex_casestack, 12, char);
753 *parser->lex_casestack = '\0';
754 Newxz(parser->lex_shared, 1, LEXSHARED);
758 s = SvPV_const(line, len);
759 parser->linestr = flags & LEX_START_COPIED
760 ? SvREFCNT_inc_simple_NN(line)
761 : newSVpvn_flags(s, len, SvUTF8(line));
762 sv_catpvn(parser->linestr, "\n;", rsfp ? 1 : 2);
764 parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
766 parser->oldoldbufptr =
769 parser->linestart = SvPVX(parser->linestr);
770 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
771 parser->last_lop = parser->last_uni = NULL;
773 assert(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
774 |LEX_DONT_CLOSE_RSFP));
775 parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
776 |LEX_DONT_CLOSE_RSFP));
778 parser->in_pod = parser->filtered = 0;
782 /* delete a parser object */
785 Perl_parser_free(pTHX_ const yy_parser *parser)
787 PERL_ARGS_ASSERT_PARSER_FREE;
789 PL_curcop = parser->saved_curcop;
790 SvREFCNT_dec(parser->linestr);
792 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
793 PerlIO_clearerr(parser->rsfp);
794 else if (parser->rsfp && (!parser->old_parser ||
795 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
796 PerlIO_close(parser->rsfp);
797 SvREFCNT_dec(parser->rsfp_filters);
798 SvREFCNT_dec(parser->lex_stuff);
799 SvREFCNT_dec(parser->sublex_info.repl);
801 Safefree(parser->lex_brackstack);
802 Safefree(parser->lex_casestack);
803 Safefree(parser->lex_shared);
804 PL_parser = parser->old_parser;
809 Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab)
812 I32 nexttoke = parser->lasttoke;
814 I32 nexttoke = parser->nexttoke;
816 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
819 if (S_is_opval_token(parser->nexttoke[nexttoke].next_type
821 && parser->nexttoke[nexttoke].next_val.opval
822 && parser->nexttoke[nexttoke].next_val.opval->op_slabbed
823 && OpSLAB(parser->nexttoke[nexttoke].next_val.opval) == slab) {
824 op_free(parser->nexttoke[nexttoke].next_val.opval);
825 parser->nexttoke[nexttoke].next_val.opval = NULL;
828 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
829 && parser->nextval[nexttoke].opval
830 && parser->nextval[nexttoke].opval->op_slabbed
831 && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
832 op_free(parser->nextval[nexttoke].opval);
833 parser->nextval[nexttoke].opval = NULL;
841 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
843 Buffer scalar containing the chunk currently under consideration of the
844 text currently being lexed. This is always a plain string scalar (for
845 which C<SvPOK> is true). It is not intended to be used as a scalar by
846 normal scalar means; instead refer to the buffer directly by the pointer
847 variables described below.
849 The lexer maintains various C<char*> pointers to things in the
850 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
851 reallocated, all of these pointers must be updated. Don't attempt to
852 do this manually, but rather use L</lex_grow_linestr> if you need to
853 reallocate the buffer.
855 The content of the text chunk in the buffer is commonly exactly one
856 complete line of input, up to and including a newline terminator,
857 but there are situations where it is otherwise. The octets of the
858 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
859 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
860 flag on this scalar, which may disagree with it.
862 For direct examination of the buffer, the variable
863 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
864 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
865 of these pointers is usually preferable to examination of the scalar
866 through normal scalar means.
868 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
870 Direct pointer to the end of the chunk of text currently being lexed, the
871 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
872 + SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
873 always located at the end of the buffer, and does not count as part of
874 the buffer's contents.
876 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
878 Points to the current position of lexing inside the lexer buffer.
879 Characters around this point may be freely examined, within
880 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
881 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
882 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
884 Lexing code (whether in the Perl core or not) moves this pointer past
885 the characters that it consumes. It is also expected to perform some
886 bookkeeping whenever a newline character is consumed. This movement
887 can be more conveniently performed by the function L</lex_read_to>,
888 which handles newlines appropriately.
890 Interpretation of the buffer's octets can be abstracted out by
891 using the slightly higher-level functions L</lex_peek_unichar> and
892 L</lex_read_unichar>.
894 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
896 Points to the start of the current line inside the lexer buffer.
897 This is useful for indicating at which column an error occurred, and
898 not much else. This must be updated by any lexing code that consumes
899 a newline; the function L</lex_read_to> handles this detail.
905 =for apidoc Amx|bool|lex_bufutf8
907 Indicates whether the octets in the lexer buffer
908 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
909 of Unicode characters. If not, they should be interpreted as Latin-1
910 characters. This is analogous to the C<SvUTF8> flag for scalars.
912 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
913 contains valid UTF-8. Lexing code must be robust in the face of invalid
916 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
917 is significant, but not the whole story regarding the input character
918 encoding. Normally, when a file is being read, the scalar contains octets
919 and its C<SvUTF8> flag is off, but the octets should be interpreted as
920 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
921 however, the scalar may have the C<SvUTF8> flag on, and in this case its
922 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
923 is in effect. This logic may change in the future; use this function
924 instead of implementing the logic yourself.
930 Perl_lex_bufutf8(pTHX)
936 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
938 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
939 at least I<len> octets (including terminating NUL). Returns a
940 pointer to the reallocated buffer. This is necessary before making
941 any direct modification of the buffer that would increase its length.
942 L</lex_stuff_pvn> provides a more convenient way to insert text into
945 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
946 this function updates all of the lexer's variables that point directly
953 Perl_lex_grow_linestr(pTHX_ STRLEN len)
957 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
958 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
959 linestr = PL_parser->linestr;
960 buf = SvPVX(linestr);
961 if (len <= SvLEN(linestr))
963 bufend_pos = PL_parser->bufend - buf;
964 bufptr_pos = PL_parser->bufptr - buf;
965 oldbufptr_pos = PL_parser->oldbufptr - buf;
966 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
967 linestart_pos = PL_parser->linestart - buf;
968 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
969 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
970 re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
971 PL_parser->lex_shared->re_eval_start - buf : 0;
973 buf = sv_grow(linestr, len);
975 PL_parser->bufend = buf + bufend_pos;
976 PL_parser->bufptr = buf + bufptr_pos;
977 PL_parser->oldbufptr = buf + oldbufptr_pos;
978 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
979 PL_parser->linestart = buf + linestart_pos;
980 if (PL_parser->last_uni)
981 PL_parser->last_uni = buf + last_uni_pos;
982 if (PL_parser->last_lop)
983 PL_parser->last_lop = buf + last_lop_pos;
984 if (PL_parser->lex_shared->re_eval_start)
985 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
990 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
992 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
993 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
994 reallocating the buffer if necessary. This means that lexing code that
995 runs later will see the characters as if they had appeared in the input.
996 It is not recommended to do this as part of normal parsing, and most
997 uses of this facility run the risk of the inserted characters being
998 interpreted in an unintended manner.
1000 The string to be inserted is represented by I<len> octets starting
1001 at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
1002 according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
1003 The characters are recoded for the lexer buffer, according to how the
1004 buffer is currently being interpreted (L</lex_bufutf8>). If a string
1005 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1006 function is more convenient.
1012 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1016 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1017 if (flags & ~(LEX_STUFF_UTF8))
1018 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1020 if (flags & LEX_STUFF_UTF8) {
1023 STRLEN highhalf = 0; /* Count of variants */
1024 const char *p, *e = pv+len;
1025 for (p = pv; p != e; p++) {
1026 if (! UTF8_IS_INVARIANT(*p)) {
1032 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1033 bufptr = PL_parser->bufptr;
1034 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1035 SvCUR_set(PL_parser->linestr,
1036 SvCUR(PL_parser->linestr) + len+highhalf);
1037 PL_parser->bufend += len+highhalf;
1038 for (p = pv; p != e; p++) {
1040 if (! UTF8_IS_INVARIANT(c)) {
1041 *bufptr++ = UTF8_TWO_BYTE_HI(c);
1042 *bufptr++ = UTF8_TWO_BYTE_LO(c);
1044 *bufptr++ = (char)c;
1049 if (flags & LEX_STUFF_UTF8) {
1050 STRLEN highhalf = 0;
1051 const char *p, *e = pv+len;
1052 for (p = pv; p != e; p++) {
1054 if (UTF8_IS_ABOVE_LATIN1(c)) {
1055 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1056 "non-Latin-1 character into Latin-1 input");
1057 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1060 } else if (! UTF8_IS_INVARIANT(c)) {
1061 /* malformed UTF-8 */
1063 SAVESPTR(PL_warnhook);
1064 PL_warnhook = PERL_WARNHOOK_FATAL;
1065 utf8n_to_uvchr((U8*)p, e-p, NULL, 0);
1071 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1072 bufptr = PL_parser->bufptr;
1073 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1074 SvCUR_set(PL_parser->linestr,
1075 SvCUR(PL_parser->linestr) + len-highhalf);
1076 PL_parser->bufend += len-highhalf;
1079 if (UTF8_IS_INVARIANT(*p)) {
1085 *bufptr++ = TWO_BYTE_UTF8_TO_NATIVE(*p, *(p+1));
1091 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1092 bufptr = PL_parser->bufptr;
1093 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1094 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1095 PL_parser->bufend += len;
1096 Copy(pv, bufptr, len, char);
1102 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1104 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1105 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1106 reallocating the buffer if necessary. This means that lexing code that
1107 runs later will see the characters as if they had appeared in the input.
1108 It is not recommended to do this as part of normal parsing, and most
1109 uses of this facility run the risk of the inserted characters being
1110 interpreted in an unintended manner.
1112 The string to be inserted is represented by octets starting at I<pv>
1113 and continuing to the first nul. These octets are interpreted as either
1114 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1115 in I<flags>. The characters are recoded for the lexer buffer, according
1116 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1117 If it is not convenient to nul-terminate a string to be inserted, the
1118 L</lex_stuff_pvn> function is more appropriate.
1124 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1126 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1127 lex_stuff_pvn(pv, strlen(pv), flags);
1131 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1133 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1134 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1135 reallocating the buffer if necessary. This means that lexing code that
1136 runs later will see the characters as if they had appeared in the input.
1137 It is not recommended to do this as part of normal parsing, and most
1138 uses of this facility run the risk of the inserted characters being
1139 interpreted in an unintended manner.
1141 The string to be inserted is the string value of I<sv>. The characters
1142 are recoded for the lexer buffer, according to how the buffer is currently
1143 being interpreted (L</lex_bufutf8>). If a string to be inserted is
1144 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1145 need to construct a scalar.
1151 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1155 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1157 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1159 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1163 =for apidoc Amx|void|lex_unstuff|char *ptr
1165 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1166 I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1167 This hides the discarded text from any lexing code that runs later,
1168 as if the text had never appeared.
1170 This is not the normal way to consume lexed text. For that, use
1177 Perl_lex_unstuff(pTHX_ char *ptr)
1181 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1182 buf = PL_parser->bufptr;
1184 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1187 bufend = PL_parser->bufend;
1189 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1190 unstuff_len = ptr - buf;
1191 Move(ptr, buf, bufend+1-ptr, char);
1192 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1193 PL_parser->bufend = bufend - unstuff_len;
1197 =for apidoc Amx|void|lex_read_to|char *ptr
1199 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1200 to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1201 performing the correct bookkeeping whenever a newline character is passed.
1202 This is the normal way to consume lexed text.
1204 Interpretation of the buffer's octets can be abstracted out by
1205 using the slightly higher-level functions L</lex_peek_unichar> and
1206 L</lex_read_unichar>.
1212 Perl_lex_read_to(pTHX_ char *ptr)
1215 PERL_ARGS_ASSERT_LEX_READ_TO;
1216 s = PL_parser->bufptr;
1217 if (ptr < s || ptr > PL_parser->bufend)
1218 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1219 for (; s != ptr; s++)
1221 COPLINE_INC_WITH_HERELINES;
1222 PL_parser->linestart = s+1;
1224 PL_parser->bufptr = ptr;
1228 =for apidoc Amx|void|lex_discard_to|char *ptr
1230 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1231 up to I<ptr>. The remaining content of the buffer will be moved, and
1232 all pointers into the buffer updated appropriately. I<ptr> must not
1233 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1234 it is not permitted to discard text that has yet to be lexed.
1236 Normally it is not necessarily to do this directly, because it suffices to
1237 use the implicit discarding behaviour of L</lex_next_chunk> and things
1238 based on it. However, if a token stretches across multiple lines,
1239 and the lexing code has kept multiple lines of text in the buffer for
1240 that purpose, then after completion of the token it would be wise to
1241 explicitly discard the now-unneeded earlier lines, to avoid future
1242 multi-line tokens growing the buffer without bound.
1248 Perl_lex_discard_to(pTHX_ char *ptr)
1252 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1253 buf = SvPVX(PL_parser->linestr);
1255 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1258 if (ptr > PL_parser->bufptr)
1259 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1260 discard_len = ptr - buf;
1261 if (PL_parser->oldbufptr < ptr)
1262 PL_parser->oldbufptr = ptr;
1263 if (PL_parser->oldoldbufptr < ptr)
1264 PL_parser->oldoldbufptr = ptr;
1265 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1266 PL_parser->last_uni = NULL;
1267 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1268 PL_parser->last_lop = NULL;
1269 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1270 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1271 PL_parser->bufend -= discard_len;
1272 PL_parser->bufptr -= discard_len;
1273 PL_parser->oldbufptr -= discard_len;
1274 PL_parser->oldoldbufptr -= discard_len;
1275 if (PL_parser->last_uni)
1276 PL_parser->last_uni -= discard_len;
1277 if (PL_parser->last_lop)
1278 PL_parser->last_lop -= discard_len;
1282 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1284 Reads in the next chunk of text to be lexed, appending it to
1285 L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1286 looked to the end of the current chunk and wants to know more. It is
1287 usual, but not necessary, for lexing to have consumed the entirety of
1288 the current chunk at this time.
1290 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1291 chunk (i.e., the current chunk has been entirely consumed), normally the
1292 current chunk will be discarded at the same time that the new chunk is
1293 read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1294 will not be discarded. If the current chunk has not been entirely
1295 consumed, then it will not be discarded regardless of the flag.
1297 Returns true if some new text was added to the buffer, or false if the
1298 buffer has reached the end of the input text.
1303 #define LEX_FAKE_EOF 0x80000000
1304 #define LEX_NO_TERM 0x40000000
1307 Perl_lex_next_chunk(pTHX_ U32 flags)
1311 STRLEN old_bufend_pos, new_bufend_pos;
1312 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1313 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1314 bool got_some_for_debugger = 0;
1316 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1317 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1318 linestr = PL_parser->linestr;
1319 buf = SvPVX(linestr);
1320 if (!(flags & LEX_KEEP_PREVIOUS) &&
1321 PL_parser->bufptr == PL_parser->bufend) {
1322 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1324 if (PL_parser->last_uni != PL_parser->bufend)
1325 PL_parser->last_uni = NULL;
1326 if (PL_parser->last_lop != PL_parser->bufend)
1327 PL_parser->last_lop = NULL;
1328 last_uni_pos = last_lop_pos = 0;
1332 old_bufend_pos = PL_parser->bufend - buf;
1333 bufptr_pos = PL_parser->bufptr - buf;
1334 oldbufptr_pos = PL_parser->oldbufptr - buf;
1335 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1336 linestart_pos = PL_parser->linestart - buf;
1337 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1338 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1340 if (flags & LEX_FAKE_EOF) {
1342 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1344 } else if (filter_gets(linestr, old_bufend_pos)) {
1346 got_some_for_debugger = 1;
1347 } else if (flags & LEX_NO_TERM) {
1350 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1351 sv_setpvs(linestr, "");
1353 /* End of real input. Close filehandle (unless it was STDIN),
1354 * then add implicit termination.
1356 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1357 PerlIO_clearerr(PL_parser->rsfp);
1358 else if (PL_parser->rsfp)
1359 (void)PerlIO_close(PL_parser->rsfp);
1360 PL_parser->rsfp = NULL;
1361 PL_parser->in_pod = PL_parser->filtered = 0;
1363 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1366 if (!PL_in_eval && PL_minus_p) {
1368 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1369 PL_minus_n = PL_minus_p = 0;
1370 } else if (!PL_in_eval && PL_minus_n) {
1371 sv_catpvs(linestr, /*{*/";}");
1374 sv_catpvs(linestr, ";");
1377 buf = SvPVX(linestr);
1378 new_bufend_pos = SvCUR(linestr);
1379 PL_parser->bufend = buf + new_bufend_pos;
1380 PL_parser->bufptr = buf + bufptr_pos;
1381 PL_parser->oldbufptr = buf + oldbufptr_pos;
1382 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1383 PL_parser->linestart = buf + linestart_pos;
1384 if (PL_parser->last_uni)
1385 PL_parser->last_uni = buf + last_uni_pos;
1386 if (PL_parser->last_lop)
1387 PL_parser->last_lop = buf + last_lop_pos;
1388 if (PL_parser->preambling != NOLINE) {
1389 CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1390 PL_parser->preambling = NOLINE;
1392 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
1393 PL_curstash != PL_debstash) {
1394 /* debugger active and we're not compiling the debugger code,
1395 * so store the line into the debugger's array of lines
1397 update_debugger_info(NULL, buf+old_bufend_pos,
1398 new_bufend_pos-old_bufend_pos);
1404 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1406 Looks ahead one (Unicode) character in the text currently being lexed.
1407 Returns the codepoint (unsigned integer value) of the next character,
1408 or -1 if lexing has reached the end of the input text. To consume the
1409 peeked character, use L</lex_read_unichar>.
1411 If the next character is in (or extends into) the next chunk of input
1412 text, the next chunk will be read in. Normally the current chunk will be
1413 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1414 then the current chunk will not be discarded.
1416 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1417 is encountered, an exception is generated.
1423 Perl_lex_peek_unichar(pTHX_ U32 flags)
1427 if (flags & ~(LEX_KEEP_PREVIOUS))
1428 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1429 s = PL_parser->bufptr;
1430 bufend = PL_parser->bufend;
1436 if (!lex_next_chunk(flags))
1438 s = PL_parser->bufptr;
1439 bufend = PL_parser->bufend;
1442 if (UTF8_IS_INVARIANT(head))
1444 if (UTF8_IS_START(head)) {
1445 len = UTF8SKIP(&head);
1446 while ((STRLEN)(bufend-s) < len) {
1447 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1449 s = PL_parser->bufptr;
1450 bufend = PL_parser->bufend;
1453 unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1454 if (retlen == (STRLEN)-1) {
1455 /* malformed UTF-8 */
1457 SAVESPTR(PL_warnhook);
1458 PL_warnhook = PERL_WARNHOOK_FATAL;
1459 utf8n_to_uvchr((U8*)s, bufend-s, NULL, 0);
1465 if (!lex_next_chunk(flags))
1467 s = PL_parser->bufptr;
1474 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1476 Reads the next (Unicode) character in the text currently being lexed.
1477 Returns the codepoint (unsigned integer value) of the character read,
1478 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1479 if lexing has reached the end of the input text. To non-destructively
1480 examine the next character, use L</lex_peek_unichar> instead.
1482 If the next character is in (or extends into) the next chunk of input
1483 text, the next chunk will be read in. Normally the current chunk will be
1484 discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1485 then the current chunk will not be discarded.
1487 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1488 is encountered, an exception is generated.
1494 Perl_lex_read_unichar(pTHX_ U32 flags)
1497 if (flags & ~(LEX_KEEP_PREVIOUS))
1498 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1499 c = lex_peek_unichar(flags);
1502 COPLINE_INC_WITH_HERELINES;
1504 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1506 ++(PL_parser->bufptr);
1512 =for apidoc Amx|void|lex_read_space|U32 flags
1514 Reads optional spaces, in Perl style, in the text currently being
1515 lexed. The spaces may include ordinary whitespace characters and
1516 Perl-style comments. C<#line> directives are processed if encountered.
1517 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1518 at a non-space character (or the end of the input text).
1520 If spaces extend into the next chunk of input text, the next chunk will
1521 be read in. Normally the current chunk will be discarded at the same
1522 time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1523 chunk will not be discarded.
1528 #define LEX_NO_INCLINE 0x40000000
1529 #define LEX_NO_NEXT_CHUNK 0x80000000
1532 Perl_lex_read_space(pTHX_ U32 flags)
1535 const bool can_incline = !(flags & LEX_NO_INCLINE);
1536 bool need_incline = 0;
1537 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1538 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1541 sv_free(PL_skipwhite);
1542 PL_skipwhite = NULL;
1545 PL_skipwhite = newSVpvs("");
1546 #endif /* PERL_MAD */
1547 s = PL_parser->bufptr;
1548 bufend = PL_parser->bufend;
1554 } while (!(c == '\n' || (c == 0 && s == bufend)));
1555 } else if (c == '\n') {
1558 PL_parser->linestart = s;
1564 } else if (isSPACE(c)) {
1566 } else if (c == 0 && s == bufend) {
1571 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1572 #endif /* PERL_MAD */
1573 if (flags & LEX_NO_NEXT_CHUNK)
1575 PL_parser->bufptr = s;
1576 l = CopLINE(PL_curcop);
1577 CopLINE(PL_curcop) += PL_parser->herelines + 1;
1578 got_more = lex_next_chunk(flags);
1579 CopLINE_set(PL_curcop, l);
1580 s = PL_parser->bufptr;
1581 bufend = PL_parser->bufend;
1584 if (can_incline && need_incline && PL_parser->rsfp) {
1594 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1595 #endif /* PERL_MAD */
1596 PL_parser->bufptr = s;
1601 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1603 This function performs syntax checking on a prototype, C<proto>.
1604 If C<warn> is true, any illegal characters or mismatched brackets
1605 will trigger illegalproto warnings, declaring that they were
1606 detected in the prototype for C<name>.
1608 The return value is C<true> if this is a valid prototype, and
1609 C<false> if it is not, regardless of whether C<warn> was C<true> or
1612 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1619 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
1621 STRLEN len, origlen;
1622 char *p = proto ? SvPV(proto, len) : NULL;
1623 bool bad_proto = FALSE;
1624 bool in_brackets = FALSE;
1625 bool after_slash = FALSE;
1626 char greedy_proto = ' ';
1627 bool proto_after_greedy_proto = FALSE;
1628 bool must_be_last = FALSE;
1629 bool underscore = FALSE;
1630 bool bad_proto_after_underscore = FALSE;
1632 PERL_ARGS_ASSERT_VALIDATE_PROTO;
1638 for (; len--; p++) {
1641 proto_after_greedy_proto = TRUE;
1643 if (!strchr(";@%", *p))
1644 bad_proto_after_underscore = TRUE;
1647 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1654 in_brackets = FALSE;
1655 else if ((*p == '@' || *p == '%') &&
1658 must_be_last = TRUE;
1667 after_slash = FALSE;
1672 SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1675 ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1676 origlen, UNI_DISPLAY_ISPRINT)
1677 : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1679 if (proto_after_greedy_proto)
1680 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1681 "Prototype after '%c' for %"SVf" : %s",
1682 greedy_proto, SVfARG(name), p);
1684 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1685 "Missing ']' in prototype for %"SVf" : %s",
1688 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1689 "Illegal character in prototype for %"SVf" : %s",
1691 if (bad_proto_after_underscore)
1692 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1693 "Illegal character after '_' in prototype for %"SVf" : %s",
1697 return (! (proto_after_greedy_proto || bad_proto) );
1702 * This subroutine has nothing to do with tilting, whether at windmills
1703 * or pinball tables. Its name is short for "increment line". It
1704 * increments the current line number in CopLINE(PL_curcop) and checks
1705 * to see whether the line starts with a comment of the form
1706 * # line 500 "foo.pm"
1707 * If so, it sets the current line number and file to the values in the comment.
1711 S_incline(pTHX_ const char *s)
1719 PERL_ARGS_ASSERT_INCLINE;
1721 COPLINE_INC_WITH_HERELINES;
1722 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1723 && s+1 == PL_bufend && *s == ';') {
1724 /* fake newline in string eval */
1725 CopLINE_dec(PL_curcop);
1730 while (SPACE_OR_TAB(*s))
1732 if (strnEQ(s, "line", 4))
1736 if (SPACE_OR_TAB(*s))
1740 while (SPACE_OR_TAB(*s))
1748 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1750 while (SPACE_OR_TAB(*s))
1752 if (*s == '"' && (t = strchr(s+1, '"'))) {
1758 while (!isSPACE(*t))
1762 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1764 if (*e != '\n' && *e != '\0')
1765 return; /* false alarm */
1767 line_num = atoi(n)-1;
1770 const STRLEN len = t - s;
1772 if (!PL_rsfp && !PL_parser->filtered) {
1773 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1774 * to *{"::_<newfilename"} */
1775 /* However, the long form of evals is only turned on by the
1776 debugger - usually they're "(eval %lu)" */
1777 GV * const cfgv = CopFILEGV(PL_curcop);
1780 STRLEN tmplen2 = len;
1784 if (tmplen2 + 2 <= sizeof smallbuf)
1787 Newx(tmpbuf2, tmplen2 + 2, char);
1792 memcpy(tmpbuf2 + 2, s, tmplen2);
1795 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1797 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1798 /* adjust ${"::_<newfilename"} to store the new file name */
1799 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1800 /* The line number may differ. If that is the case,
1801 alias the saved lines that are in the array.
1802 Otherwise alias the whole array. */
1803 if (CopLINE(PL_curcop) == line_num) {
1804 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1805 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1807 else if (GvAV(cfgv)) {
1808 AV * const av = GvAV(cfgv);
1809 const I32 start = CopLINE(PL_curcop)+1;
1810 I32 items = AvFILLp(av) - start;
1812 AV * const av2 = GvAVn(gv2);
1813 SV **svp = AvARRAY(av) + start;
1814 I32 l = (I32)line_num+1;
1816 av_store(av2, l++, SvREFCNT_inc(*svp++));
1821 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1824 CopFILE_free(PL_curcop);
1825 CopFILE_setn(PL_curcop, s, len);
1827 CopLINE_set(PL_curcop, line_num);
1830 #define skipspace(s) skipspace_flags(s, 0)
1833 /* skip space before PL_thistoken */
1836 S_skipspace0(pTHX_ char *s)
1838 PERL_ARGS_ASSERT_SKIPSPACE0;
1845 PL_thiswhite = newSVpvs("");
1846 sv_catsv(PL_thiswhite, PL_skipwhite);
1847 sv_free(PL_skipwhite);
1850 PL_realtokenstart = s - SvPVX(PL_linestr);
1854 /* skip space after PL_thistoken */
1857 S_skipspace1(pTHX_ char *s)
1859 const char *start = s;
1860 I32 startoff = start - SvPVX(PL_linestr);
1862 PERL_ARGS_ASSERT_SKIPSPACE1;
1867 start = SvPVX(PL_linestr) + startoff;
1868 if (!PL_thistoken && PL_realtokenstart >= 0) {
1869 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1870 PL_thistoken = newSVpvn(tstart, start - tstart);
1872 PL_realtokenstart = -1;
1875 PL_nextwhite = newSVpvs("");
1876 sv_catsv(PL_nextwhite, PL_skipwhite);
1877 sv_free(PL_skipwhite);
1884 S_skipspace2(pTHX_ char *s, SV **svp)
1887 const I32 startoff = s - SvPVX(PL_linestr);
1889 PERL_ARGS_ASSERT_SKIPSPACE2;
1892 if (!PL_madskills || !svp)
1894 start = SvPVX(PL_linestr) + startoff;
1895 if (!PL_thistoken && PL_realtokenstart >= 0) {
1896 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1897 PL_thistoken = newSVpvn(tstart, start - tstart);
1898 PL_realtokenstart = -1;
1902 *svp = newSVpvs("");
1903 sv_setsv(*svp, PL_skipwhite);
1904 sv_free(PL_skipwhite);
1913 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1915 AV *av = CopFILEAVx(PL_curcop);
1918 if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1920 sv = *av_fetch(av, 0, 1);
1921 SvUPGRADE(sv, SVt_PVMG);
1923 if (!SvPOK(sv)) sv_setpvs(sv,"");
1925 sv_catsv(sv, orig_sv);
1927 sv_catpvn(sv, buf, len);
1932 if (PL_parser->preambling == NOLINE)
1933 av_store(av, CopLINE(PL_curcop), sv);
1939 * Called to gobble the appropriate amount and type of whitespace.
1940 * Skips comments as well.
1944 S_skipspace_flags(pTHX_ char *s, U32 flags)
1948 #endif /* PERL_MAD */
1949 PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1952 sv_free(PL_skipwhite);
1953 PL_skipwhite = NULL;
1955 #endif /* PERL_MAD */
1956 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1957 while (s < PL_bufend && SPACE_OR_TAB(*s))
1960 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1962 lex_read_space(flags | LEX_KEEP_PREVIOUS |
1963 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1964 LEX_NO_NEXT_CHUNK : 0));
1966 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1967 if (PL_linestart > PL_bufptr)
1968 PL_bufptr = PL_linestart;
1973 PL_skipwhite = newSVpvn(start, s-start);
1974 #endif /* PERL_MAD */
1980 * Check the unary operators to ensure there's no ambiguity in how they're
1981 * used. An ambiguous piece of code would be:
1983 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1984 * the +5 is its argument.
1994 if (PL_oldoldbufptr != PL_last_uni)
1996 while (isSPACE(*PL_last_uni))
1999 while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
2001 if ((t = strchr(s, '(')) && t < PL_bufptr)
2004 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
2005 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
2006 (int)(s - PL_last_uni), PL_last_uni);
2010 * LOP : macro to build a list operator. Its behaviour has been replaced
2011 * with a subroutine, S_lop() for which LOP is just another name.
2014 #define LOP(f,x) return lop(f,x,s)
2018 * Build a list operator (or something that might be one). The rules:
2019 * - if we have a next token, then it's a list operator [why?]
2020 * - if the next thing is an opening paren, then it's a function
2021 * - else it's a list operator
2025 S_lop(pTHX_ I32 f, int x, char *s)
2029 PERL_ARGS_ASSERT_LOP;
2035 PL_last_lop = PL_oldbufptr;
2036 PL_last_lop_op = (OPCODE)f;
2045 return REPORT(FUNC);
2048 return REPORT(FUNC);
2051 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
2052 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2053 return REPORT(LSTOP);
2060 * Sets up for an eventual force_next(). start_force(0) basically does
2061 * an unshift, while start_force(-1) does a push. yylex removes items
2066 S_start_force(pTHX_ int where)
2070 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
2071 where = PL_lasttoke;
2072 assert(PL_curforce < 0 || PL_curforce == where);
2073 if (PL_curforce != where) {
2074 for (i = PL_lasttoke; i > where; --i) {
2075 PL_nexttoke[i] = PL_nexttoke[i-1];
2079 if (PL_curforce < 0) /* in case of duplicate start_force() */
2080 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
2081 PL_curforce = where;
2084 curmad('^', newSVpvs(""));
2085 CURMAD('_', PL_nextwhite);
2090 S_curmad(pTHX_ char slot, SV *sv)
2096 if (PL_curforce < 0)
2097 where = &PL_thismad;
2099 where = &PL_nexttoke[PL_curforce].next_mad;
2105 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
2107 else if (PL_encoding) {
2108 sv_recode_to_utf8(sv, PL_encoding);
2113 /* keep a slot open for the head of the list? */
2114 if (slot != '_' && *where && (*where)->mad_key == '^') {
2115 (*where)->mad_key = slot;
2116 sv_free(MUTABLE_SV(((*where)->mad_val)));
2117 (*where)->mad_val = (void*)sv;
2120 addmad(newMADsv(slot, sv), where, 0);
2123 # define start_force(where) NOOP
2124 # define curmad(slot, sv) NOOP
2129 * When the lexer realizes it knows the next token (for instance,
2130 * it is reordering tokens for the parser) then it can call S_force_next
2131 * to know what token to return the next time the lexer is called. Caller
2132 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
2133 * and possibly PL_expect to ensure the lexer handles the token correctly.
2137 S_force_next(pTHX_ I32 type)
2142 PerlIO_printf(Perl_debug_log, "### forced token:\n");
2143 tokereport(type, &NEXTVAL_NEXTTOKE);
2147 if (PL_curforce < 0)
2148 start_force(PL_lasttoke);
2149 PL_nexttoke[PL_curforce].next_type = type;
2150 if (PL_lex_state != LEX_KNOWNEXT)
2151 PL_lex_defer = PL_lex_state;
2152 PL_lex_state = LEX_KNOWNEXT;
2153 PL_lex_expect = PL_expect;
2156 PL_nexttype[PL_nexttoke] = type;
2158 if (PL_lex_state != LEX_KNOWNEXT) {
2159 PL_lex_defer = PL_lex_state;
2160 PL_lex_expect = PL_expect;
2161 PL_lex_state = LEX_KNOWNEXT;
2169 * This subroutine handles postfix deref syntax after the arrow has already
2170 * been emitted. @* $* etc. are emitted as two separate token right here.
2171 * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2172 * only the first, leaving yylex to find the next.
2176 S_postderef(pTHX_ int const funny, char const next)
2179 assert(funny == DOLSHARP || strchr("$@%&*", funny));
2180 assert(strchr("*[{", next));
2182 PL_expect = XOPERATOR;
2183 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2184 assert('@' == funny || '$' == funny || DOLSHARP == funny);
2185 PL_lex_state = LEX_INTERPEND;
2186 start_force(PL_curforce);
2187 force_next(POSTJOIN);
2189 start_force(PL_curforce);
2194 if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
2195 && !PL_lex_brackets)
2197 PL_expect = XOPERATOR;
2206 int yyc = PL_parser->yychar;
2207 if (yyc != YYEMPTY) {
2210 NEXTVAL_NEXTTOKE = PL_parser->yylval;
2211 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2212 PL_lex_allbrackets--;
2214 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2215 } else if (yyc == '('/*)*/) {
2216 PL_lex_allbrackets--;
2221 PL_parser->yychar = YYEMPTY;
2226 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2229 SV * const sv = newSVpvn_utf8(start, len,
2232 && !is_ascii_string((const U8*)start, len)
2233 && is_utf8_string((const U8*)start, len));
2239 * When the lexer knows the next thing is a word (for instance, it has
2240 * just seen -> and it knows that the next char is a word char, then
2241 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2245 * char *start : buffer position (must be within PL_linestr)
2246 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
2247 * int check_keyword : if true, Perl checks to make sure the word isn't
2248 * a keyword (do this if the word is a label, e.g. goto FOO)
2249 * int allow_pack : if true, : characters will also be allowed (require,
2250 * use, etc. do this)
2251 * int allow_initial_tick : used by the "sub" lexer only.
2255 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2261 PERL_ARGS_ASSERT_FORCE_WORD;
2263 start = SKIPSPACE1(start);
2265 if (isIDFIRST_lazy_if(s,UTF) ||
2266 (allow_pack && *s == ':') )
2268 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2269 if (check_keyword) {
2270 char *s2 = PL_tokenbuf;
2271 if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
2273 if (keyword(s2, len, 0))
2276 start_force(PL_curforce);
2278 curmad('X', newSVpvn(start,s-start));
2279 if (token == METHOD) {
2284 PL_expect = XOPERATOR;
2288 curmad('g', newSVpvs( "forced" ));
2289 NEXTVAL_NEXTTOKE.opval
2290 = (OP*)newSVOP(OP_CONST,0,
2291 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2292 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2300 * Called when the lexer wants $foo *foo &foo etc, but the program
2301 * text only contains the "foo" portion. The first argument is a pointer
2302 * to the "foo", and the second argument is the type symbol to prefix.
2303 * Forces the next token to be a "WORD".
2304 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2308 S_force_ident(pTHX_ const char *s, int kind)
2312 PERL_ARGS_ASSERT_FORCE_IDENT;
2315 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2316 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2317 UTF ? SVf_UTF8 : 0));
2318 start_force(PL_curforce);
2319 NEXTVAL_NEXTTOKE.opval = o;
2322 o->op_private = OPpCONST_ENTERED;
2323 /* XXX see note in pp_entereval() for why we forgo typo
2324 warnings if the symbol must be introduced in an eval.
2326 gv_fetchpvn_flags(s, len,
2327 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2328 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2329 kind == '$' ? SVt_PV :
2330 kind == '@' ? SVt_PVAV :
2331 kind == '%' ? SVt_PVHV :
2339 S_force_ident_maybe_lex(pTHX_ char pit)
2341 start_force(PL_curforce);
2342 NEXTVAL_NEXTTOKE.ival = pit;
2347 Perl_str_to_version(pTHX_ SV *sv)
2352 const char *start = SvPV_const(sv,len);
2353 const char * const end = start + len;
2354 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
2356 PERL_ARGS_ASSERT_STR_TO_VERSION;
2358 while (start < end) {
2362 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2367 retval += ((NV)n)/nshift;
2376 * Forces the next token to be a version number.
2377 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2378 * and if "guessing" is TRUE, then no new token is created (and the caller
2379 * must use an alternative parsing method).
2383 S_force_version(pTHX_ char *s, int guessing)
2389 I32 startoff = s - SvPVX(PL_linestr);
2392 PERL_ARGS_ASSERT_FORCE_VERSION;
2400 while (isDIGIT(*d) || *d == '_' || *d == '.')
2404 start_force(PL_curforce);
2405 curmad('X', newSVpvn(s,d-s));
2408 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2410 #ifdef USE_LOCALE_NUMERIC
2411 char *loc = savepv(setlocale(LC_NUMERIC, NULL));
2412 setlocale(LC_NUMERIC, "C");
2414 s = scan_num(s, &pl_yylval);
2415 #ifdef USE_LOCALE_NUMERIC
2416 setlocale(LC_NUMERIC, loc);
2419 version = pl_yylval.opval;
2420 ver = cSVOPx(version)->op_sv;
2421 if (SvPOK(ver) && !SvNIOK(ver)) {
2422 SvUPGRADE(ver, SVt_PVNV);
2423 SvNV_set(ver, str_to_version(ver));
2424 SvNOK_on(ver); /* hint that it is a version */
2427 else if (guessing) {
2430 sv_free(PL_nextwhite); /* let next token collect whitespace */
2432 s = SvPVX(PL_linestr) + startoff;
2440 if (PL_madskills && !version) {
2441 sv_free(PL_nextwhite); /* let next token collect whitespace */
2443 s = SvPVX(PL_linestr) + startoff;
2446 /* NOTE: The parser sees the package name and the VERSION swapped */
2447 start_force(PL_curforce);
2448 NEXTVAL_NEXTTOKE.opval = version;
2455 * S_force_strict_version
2456 * Forces the next token to be a version number using strict syntax rules.
2460 S_force_strict_version(pTHX_ char *s)
2465 I32 startoff = s - SvPVX(PL_linestr);
2467 const char *errstr = NULL;
2469 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2471 while (isSPACE(*s)) /* leading whitespace */
2474 if (is_STRICT_VERSION(s,&errstr)) {
2476 s = (char *)scan_version(s, ver, 0);
2477 version = newSVOP(OP_CONST, 0, ver);
2479 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2480 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2484 yyerror(errstr); /* version required */
2489 if (PL_madskills && !version) {
2490 sv_free(PL_nextwhite); /* let next token collect whitespace */
2492 s = SvPVX(PL_linestr) + startoff;
2495 /* NOTE: The parser sees the package name and the VERSION swapped */
2496 start_force(PL_curforce);
2497 NEXTVAL_NEXTTOKE.opval = version;
2505 * Tokenize a quoted string passed in as an SV. It finds the next
2506 * chunk, up to end of string or a backslash. It may make a new
2507 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2512 S_tokeq(pTHX_ SV *sv)
2520 PERL_ARGS_ASSERT_TOKEQ;
2524 assert (!SvIsCOW(sv));
2525 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2529 /* This is relying on the SV being "well formed" with a trailing '\0' */
2530 while (s < send && !(*s == '\\' && s[1] == '\\'))
2535 if ( PL_hints & HINT_NEW_STRING ) {
2536 pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2537 SVs_TEMP | SvUTF8(sv));
2541 if (s + 1 < send && (s[1] == '\\'))
2542 s++; /* all that, just for this */
2547 SvCUR_set(sv, d - SvPVX_const(sv));
2549 if ( PL_hints & HINT_NEW_STRING )
2550 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2555 * Now come three functions related to double-quote context,
2556 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2557 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2558 * interact with PL_lex_state, and create fake ( ... ) argument lists
2559 * to handle functions and concatenation.
2563 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
2568 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2570 * Pattern matching will set PL_lex_op to the pattern-matching op to
2571 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2573 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2575 * Everything else becomes a FUNC.
2577 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2578 * had an OP_CONST or OP_READLINE). This just sets us up for a
2579 * call to S_sublex_push().
2583 S_sublex_start(pTHX)
2586 const I32 op_type = pl_yylval.ival;
2588 if (op_type == OP_NULL) {
2589 pl_yylval.opval = PL_lex_op;
2593 if (op_type == OP_CONST) {
2594 SV *sv = tokeq(PL_lex_stuff);
2596 if (SvTYPE(sv) == SVt_PVIV) {
2597 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2599 const char * const p = SvPV_const(sv, len);
2600 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2604 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
2605 PL_lex_stuff = NULL;
2609 PL_sublex_info.super_state = PL_lex_state;
2610 PL_sublex_info.sub_inwhat = (U16)op_type;
2611 PL_sublex_info.sub_op = PL_lex_op;
2612 PL_lex_state = LEX_INTERPPUSH;
2616 pl_yylval.opval = PL_lex_op;
2626 * Create a new scope to save the lexing state. The scope will be
2627 * ended in S_sublex_done. Returns a '(', starting the function arguments
2628 * to the uc, lc, etc. found before.
2629 * Sets PL_lex_state to LEX_INTERPCONCAT.
2637 const bool is_heredoc = PL_multi_close == '<';
2640 PL_lex_state = PL_sublex_info.super_state;
2641 SAVEI8(PL_lex_dojoin);
2642 SAVEI32(PL_lex_brackets);
2643 SAVEI32(PL_lex_allbrackets);
2644 SAVEI32(PL_lex_formbrack);
2645 SAVEI8(PL_lex_fakeeof);
2646 SAVEI32(PL_lex_casemods);
2647 SAVEI32(PL_lex_starts);
2648 SAVEI8(PL_lex_state);
2649 SAVESPTR(PL_lex_repl);
2650 SAVEVPTR(PL_lex_inpat);
2651 SAVEI16(PL_lex_inwhat);
2654 SAVECOPLINE(PL_curcop);
2655 SAVEI32(PL_multi_end);
2656 SAVEI32(PL_parser->herelines);
2657 PL_parser->herelines = 0;
2659 SAVEI8(PL_multi_close);
2660 SAVEPPTR(PL_bufptr);
2661 SAVEPPTR(PL_bufend);
2662 SAVEPPTR(PL_oldbufptr);
2663 SAVEPPTR(PL_oldoldbufptr);
2664 SAVEPPTR(PL_last_lop);
2665 SAVEPPTR(PL_last_uni);
2666 SAVEPPTR(PL_linestart);
2667 SAVESPTR(PL_linestr);
2668 SAVEGENERICPV(PL_lex_brackstack);
2669 SAVEGENERICPV(PL_lex_casestack);
2670 SAVEGENERICPV(PL_parser->lex_shared);
2671 SAVEBOOL(PL_parser->lex_re_reparsing);
2672 SAVEI32(PL_copline);
2674 /* The here-doc parser needs to be able to peek into outer lexing
2675 scopes to find the body of the here-doc. So we put PL_linestr and
2676 PL_bufptr into lex_shared, to ‘share’ those values.
2678 PL_parser->lex_shared->ls_linestr = PL_linestr;
2679 PL_parser->lex_shared->ls_bufptr = PL_bufptr;
2681 PL_linestr = PL_lex_stuff;
2682 PL_lex_repl = PL_sublex_info.repl;
2683 PL_lex_stuff = NULL;
2684 PL_sublex_info.repl = NULL;
2686 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2687 = SvPVX(PL_linestr);
2688 PL_bufend += SvCUR(PL_linestr);
2689 PL_last_lop = PL_last_uni = NULL;
2690 SAVEFREESV(PL_linestr);
2691 if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2693 PL_lex_dojoin = FALSE;
2694 PL_lex_brackets = PL_lex_formbrack = 0;
2695 PL_lex_allbrackets = 0;
2696 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2697 Newx(PL_lex_brackstack, 120, char);
2698 Newx(PL_lex_casestack, 12, char);
2699 PL_lex_casemods = 0;
2700 *PL_lex_casestack = '\0';
2702 PL_lex_state = LEX_INTERPCONCAT;
2704 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2705 PL_copline = NOLINE;
2707 Newxz(shared, 1, LEXSHARED);
2708 shared->ls_prev = PL_parser->lex_shared;
2709 PL_parser->lex_shared = shared;
2711 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2712 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2713 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2714 PL_lex_inpat = PL_sublex_info.sub_op;
2716 PL_lex_inpat = NULL;
2718 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2719 PL_in_eval &= ~EVAL_RE_REPARSING;
2726 * Restores lexer state after a S_sublex_push.
2733 if (!PL_lex_starts++) {
2734 SV * const sv = newSVpvs("");
2735 if (SvUTF8(PL_linestr))
2737 PL_expect = XOPERATOR;
2738 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
2742 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2743 PL_lex_state = LEX_INTERPCASEMOD;
2747 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2748 assert(PL_lex_inwhat != OP_TRANSR);
2749 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2750 PL_linestr = PL_lex_repl;
2752 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2753 PL_bufend += SvCUR(PL_linestr);
2754 PL_last_lop = PL_last_uni = NULL;
2755 PL_lex_dojoin = FALSE;
2756 PL_lex_brackets = 0;
2757 PL_lex_allbrackets = 0;
2758 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2759 PL_lex_casemods = 0;
2760 *PL_lex_casestack = '\0';
2762 if (SvEVALED(PL_lex_repl)) {
2763 PL_lex_state = LEX_INTERPNORMAL;
2765 /* we don't clear PL_lex_repl here, so that we can check later
2766 whether this is an evalled subst; that means we rely on the
2767 logic to ensure sublex_done() is called again only via the
2768 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2771 PL_lex_state = LEX_INTERPCONCAT;
2774 if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2775 CopLINE(PL_curcop) +=
2776 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xpad_cop_seq.xlow
2777 + PL_parser->herelines;
2778 PL_parser->herelines = 0;
2783 const line_t l = CopLINE(PL_curcop);
2788 PL_endwhite = newSVpvs("");
2789 sv_catsv(PL_endwhite, PL_thiswhite);
2793 sv_setpvs(PL_thistoken,"");
2795 PL_realtokenstart = -1;
2799 if (PL_multi_close == '<')
2800 PL_parser->herelines += l - PL_multi_end;
2801 PL_bufend = SvPVX(PL_linestr);
2802 PL_bufend += SvCUR(PL_linestr);
2803 PL_expect = XOPERATOR;
2804 PL_sublex_info.sub_inwhat = 0;
2809 PERL_STATIC_INLINE SV*
2810 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2812 /* <s> points to first character of interior of \N{}, <e> to one beyond the
2813 * interior, hence to the "}". Finds what the name resolves to, returning
2814 * an SV* containing it; NULL if no valid one found */
2816 SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2823 const U8* first_bad_char_loc;
2824 const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2826 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2828 if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2830 &first_bad_char_loc))
2832 /* If warnings are on, this will print a more detailed analysis of what
2833 * is wrong than the error message below */
2834 utf8n_to_uvchr(first_bad_char_loc,
2835 e - ((char *) first_bad_char_loc),
2838 /* We deliberately don't try to print the malformed character, which
2839 * might not print very well; it also may be just the first of many
2840 * malformations, so don't print what comes after it */
2841 yyerror(Perl_form(aTHX_
2842 "Malformed UTF-8 character immediately after '%.*s'",
2843 (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
2847 res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2848 /* include the <}> */
2849 e - backslash_ptr + 1);
2851 SvREFCNT_dec_NN(res);
2855 /* See if the charnames handler is the Perl core's, and if so, we can skip
2856 * the validation needed for a user-supplied one, as Perl's does its own
2858 table = GvHV(PL_hintgv); /* ^H */
2859 cvp = hv_fetchs(table, "charnames", FALSE);
2860 if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2861 SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2863 const char * const name = HvNAME(stash);
2864 if (HvNAMELEN(stash) == sizeof("_charnames")-1
2865 && strEQ(name, "_charnames")) {
2870 /* Here, it isn't Perl's charname handler. We can't rely on a
2871 * user-supplied handler to validate the input name. For non-ut8 input,
2872 * look to see that the first character is legal. Then loop through the
2873 * rest checking that each is a continuation */
2875 /* This code needs to be sync'ed with a regex in _charnames.pm which does
2879 if (! isALPHAU(*s)) {
2884 if (! isCHARNAME_CONT(*s)) {
2887 if (*s == ' ' && *(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2888 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2889 "A sequence of multiple spaces in a charnames "
2890 "alias definition is deprecated");
2894 if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2895 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2896 "Trailing white-space in a charnames alias "
2897 "definition is deprecated");
2901 /* Similarly for utf8. For invariants can check directly; for other
2902 * Latin1, can calculate their code point and check; otherwise use a
2904 if (UTF8_IS_INVARIANT(*s)) {
2905 if (! isALPHAU(*s)) {
2909 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2910 if (! isALPHAU(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1)))) {
2916 if (! PL_utf8_charname_begin) {
2917 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2918 PL_utf8_charname_begin = _core_swash_init("utf8",
2919 "_Perl_Charname_Begin",
2921 1, 0, NULL, &flags);
2923 if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2930 if (UTF8_IS_INVARIANT(*s)) {
2931 if (! isCHARNAME_CONT(*s)) {
2934 if (*s == ' ' && *(s-1) == ' '
2935 && ckWARN_d(WARN_DEPRECATED)) {
2936 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2937 "A sequence of multiple spaces in a charnam"
2938 "es alias definition is deprecated");
2942 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2943 if (! isCHARNAME_CONT(TWO_BYTE_UTF8_TO_NATIVE(*s, *(s+1))))
2950 if (! PL_utf8_charname_continue) {
2951 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2952 PL_utf8_charname_continue = _core_swash_init("utf8",
2953 "_Perl_Charname_Continue",
2955 1, 0, NULL, &flags);
2957 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2963 if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
2964 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2965 "Trailing white-space in a charnames alias "
2966 "definition is deprecated");
2970 if (SvUTF8(res)) { /* Don't accept malformed input */
2971 const U8* first_bad_char_loc;
2973 const char* const str = SvPV_const(res, len);
2974 if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2975 /* If warnings are on, this will print a more detailed analysis of
2976 * what is wrong than the error message below */
2977 utf8n_to_uvchr(first_bad_char_loc,
2978 (char *) first_bad_char_loc - str,
2981 /* We deliberately don't try to print the malformed character,
2982 * which might not print very well; it also may be just the first
2983 * of many malformations, so don't print what comes after it */
2986 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2987 (int) (e - backslash_ptr + 1), backslash_ptr,
2988 (int) ((char *) first_bad_char_loc - str), str
2998 int bad_char_size = ((UTF) ? UTF8SKIP(s) : 1);
3000 /* The final %.*s makes sure that should the trailing NUL be missing
3001 * that this print won't run off the end of the string */
3004 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
3005 (int)(s - backslash_ptr + bad_char_size), backslash_ptr,
3006 (int)(e - s + bad_char_size), s + bad_char_size
3008 UTF ? SVf_UTF8 : 0);
3016 Extracts the next constant part of a pattern, double-quoted string,
3017 or transliteration. This is terrifying code.
3019 For example, in parsing the double-quoted string "ab\x63$d", it would
3020 stop at the '$' and return an OP_CONST containing 'abc'.
3022 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3023 processing a pattern (PL_lex_inpat is true), a transliteration
3024 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
3026 Returns a pointer to the character scanned up to. If this is
3027 advanced from the start pointer supplied (i.e. if anything was
3028 successfully parsed), will leave an OP_CONST for the substring scanned
3029 in pl_yylval. Caller must intuit reason for not parsing further
3030 by looking at the next characters herself.
3034 \N{FOO} => \N{U+hex_for_character_FOO}
3035 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
3038 all other \-char, including \N and \N{ apart from \N{ABC}
3041 @ and $ where it appears to be a var, but not for $ as tail anchor
3046 In transliterations:
3047 characters are VERY literal, except for - not at the start or end
3048 of the string, which indicates a range. If the range is in bytes,
3049 scan_const expands the range to the full set of intermediate
3050 characters. If the range is in utf8, the hyphen is replaced with
3051 a certain range mark which will be handled by pmtrans() in op.c.
3053 In double-quoted strings:
3055 double-quoted style: \r and \n
3056 constants: \x31, etc.
3057 deprecated backrefs: \1 (in substitution replacements)
3058 case and quoting: \U \Q \E
3061 scan_const does *not* construct ops to handle interpolated strings.
3062 It stops processing as soon as it finds an embedded $ or @ variable
3063 and leaves it to the caller to work out what's going on.
3065 embedded arrays (whether in pattern or not) could be:
3066 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
3068 $ in double-quoted strings must be the symbol of an embedded scalar.
3070 $ in pattern could be $foo or could be tail anchor. Assumption:
3071 it's a tail anchor if $ is the last thing in the string, or if it's
3072 followed by one of "()| \r\n\t"
3074 \1 (backreferences) are turned into $1 in substitutions
3076 The structure of the code is
3077 while (there's a character to process) {
3078 handle transliteration ranges
3079 skip regexp comments /(?#comment)/ and codes /(?{code})/
3080 skip #-initiated comments in //x patterns
3081 check for embedded arrays
3082 check for embedded scalars
3084 deprecate \1 in substitution replacements
3085 handle string-changing backslashes \l \U \Q \E, etc.
3086 switch (what was escaped) {
3087 handle \- in a transliteration (becomes a literal -)
3088 if a pattern and not \N{, go treat as regular character
3089 handle \132 (octal characters)
3090 handle \x15 and \x{1234} (hex characters)
3091 handle \N{name} (named characters, also \N{3,5} in a pattern)
3092 handle \cV (control characters)
3093 handle printf-style backslashes (\f, \r, \n, etc)
3096 } (end if backslash)
3097 handle regular character
3098 } (end while character to read)
3103 S_scan_const(pTHX_ char *start)
3106 char *send = PL_bufend; /* end of the constant */
3107 SV *sv = newSV(send - start); /* sv for the constant. See
3108 note below on sizing. */
3109 char *s = start; /* start of the constant */
3110 char *d = SvPVX(sv); /* destination for copies */
3111 bool dorange = FALSE; /* are we in a translit range? */
3112 bool didrange = FALSE; /* did we just finish a range? */
3113 bool in_charclass = FALSE; /* within /[...]/ */
3114 bool has_utf8 = FALSE; /* Output constant is UTF8 */
3115 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
3116 to be UTF8? But, this can
3117 show as true when the source
3118 isn't utf8, as for example
3119 when it is entirely composed
3121 SV *res; /* result from charnames */
3123 /* Note on sizing: The scanned constant is placed into sv, which is
3124 * initialized by newSV() assuming one byte of output for every byte of
3125 * input. This routine expects newSV() to allocate an extra byte for a
3126 * trailing NUL, which this routine will append if it gets to the end of
3127 * the input. There may be more bytes of input than output (eg., \N{LATIN
3128 * CAPITAL LETTER A}), or more output than input if the constant ends up
3129 * recoded to utf8, but each time a construct is found that might increase
3130 * the needed size, SvGROW() is called. Its size parameter each time is
3131 * based on the best guess estimate at the time, namely the length used so
3132 * far, plus the length the current construct will occupy, plus room for
3133 * the trailing NUL, plus one byte for every input byte still unscanned */
3135 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3138 UV literal_endpoint = 0;
3139 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
3142 PERL_ARGS_ASSERT_SCAN_CONST;
3144 assert(PL_lex_inwhat != OP_TRANSR);
3145 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3146 /* If we are doing a trans and we know we want UTF8 set expectation */
3147 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
3148 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3151 /* Protect sv from errors and fatal warnings. */
3152 ENTER_with_name("scan_const");
3155 while (s < send || dorange) {
3157 /* get transliterations out of the way (they're most literal) */
3158 if (PL_lex_inwhat == OP_TRANS) {
3159 /* expand a range A-Z to the full set of characters. AIE! */
3161 I32 i; /* current expanded character */
3162 I32 min; /* first character in range */
3163 I32 max; /* last character in range */
3174 char * const c = (char*)utf8_hop((U8*)d, -1);
3178 *c = (char) ILLEGAL_UTF8_BYTE;
3179 /* mark the range as done, and continue */
3185 i = d - SvPVX_const(sv); /* remember current offset */
3188 SvLEN(sv) + (has_utf8 ?
3189 (512 - UTF_CONTINUATION_MARK +
3192 /* How many two-byte within 0..255: 128 in UTF-8,
3193 * 96 in UTF-8-mod. */
3195 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
3197 d = SvPVX(sv) + i; /* refresh d after realloc */
3201 for (j = 0; j <= 1; j++) {
3202 char * const c = (char*)utf8_hop((U8*)d, -1);
3203 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
3209 max = (U8)0xff; /* only to \xff */
3210 uvmax = uv; /* \x{100} to uvmax */
3212 d = c; /* eat endpoint chars */
3217 d -= 2; /* eat the first char and the - */
3218 min = (U8)*d; /* first char in range */
3219 max = (U8)d[1]; /* last char in range */
3226 "Invalid range \"%c-%c\" in transliteration operator",
3227 (char)min, (char)max);
3231 if (literal_endpoint == 2 &&
3232 ((isLOWER_A(min) && isLOWER_A(max)) ||
3233 (isUPPER_A(min) && isUPPER_A(max))))
3235 for (i = min; i <= max; i++) {
3242 for (i = min; i <= max; i++)
3245 append_utf8_from_native_byte(i, &d);
3253 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
3255 *d++ = (char) ILLEGAL_UTF8_BYTE;
3257 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
3261 /* mark the range as done, and continue */
3265 literal_endpoint = 0;
3270 /* range begins (ignore - as first or last char) */
3271 else if (*s == '-' && s+1 < send && s != start) {
3273 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
3280 *d++ = (char) ILLEGAL_UTF8_BYTE; /* use illegal utf8 byte--see pmtrans */
3290 literal_endpoint = 0;
3291 native_range = TRUE;
3296 /* if we get here, we're not doing a transliteration */
3298 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3301 while (s1 >= start && *s1-- == '\\')
3304 in_charclass = TRUE;
3307 else if (*s == ']' && PL_lex_inpat && in_charclass) {
3310 while (s1 >= start && *s1-- == '\\')
3313 in_charclass = FALSE;
3316 /* skip for regexp comments /(?#comment)/, except for the last
3317 * char, which will be done separately.
3318 * Stop on (?{..}) and friends */
3320 else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3322 while (s+1 < send && *s != ')')
3325 else if (!PL_lex_casemods &&
3326 ( s[2] == '{' /* This should match regcomp.c */
3327 || (s[2] == '?' && s[3] == '{')))
3333 /* likewise skip #-initiated comments in //x patterns */
3334 else if (*s == '#' && PL_lex_inpat && !in_charclass &&
3335 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
3336 while (s+1 < send && *s != '\n')
3340 /* no further processing of single-quoted regex */
3341 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3342 goto default_action;
3344 /* check for embedded arrays
3345 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3347 else if (*s == '@' && s[1]) {
3348 if (isWORDCHAR_lazy_if(s+1,UTF))
3350 if (strchr(":'{$", s[1]))
3352 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3353 break; /* in regexp, neither @+ nor @- are interpolated */
3356 /* check for embedded scalars. only stop if we're sure it's a
3359 else if (*s == '$') {
3360 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
3362 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3364 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3365 "Possible unintended interpolation of $\\ in regex");
3367 break; /* in regexp, $ might be tail anchor */
3371 /* End of else if chain - OP_TRANS rejoin rest */
3374 if (*s == '\\' && s+1 < send) {
3375 char* e; /* Can be used for ending '}', etc. */
3379 /* warn on \1 - \9 in substitution replacements, but note that \11
3380 * is an octal; and \19 is \1 followed by '9' */
3381 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
3382 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
3384 /* diag_listed_as: \%d better written as $%d */
3385 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3390 /* string-change backslash escapes */
3391 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3395 /* In a pattern, process \N, but skip any other backslash escapes.
3396 * This is because we don't want to translate an escape sequence
3397 * into a meta symbol and have the regex compiler use the meta
3398 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3399 * in spite of this, we do have to process \N here while the proper
3400 * charnames handler is in scope. See bugs #56444 and #62056.
3401 * There is a complication because \N in a pattern may also stand
3402 * for 'match a non-nl', and not mean a charname, in which case its
3403 * processing should be deferred to the regex compiler. To be a
3404 * charname it must be followed immediately by a '{', and not look
3405 * like \N followed by a curly quantifier, i.e., not something like
3406 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3408 else if (PL_lex_inpat
3411 || regcurly(s + 1, FALSE)))
3414 goto default_action;
3419 /* quoted - in transliterations */
3421 if (PL_lex_inwhat == OP_TRANS) {
3428 if ((isALPHANUMERIC(*s)))
3429 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3430 "Unrecognized escape \\%c passed through",
3432 /* default action is to copy the quoted character */
3433 goto default_action;
3436 /* eg. \132 indicates the octal constant 0132 */
3437 case '0': case '1': case '2': case '3':
3438 case '4': case '5': case '6': case '7':
3440 I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3442 uv = grok_oct(s, &len, &flags, NULL);
3444 if (len < 3 && s < send && isDIGIT(*s)
3445 && ckWARN(WARN_MISC))
3447 Perl_warner(aTHX_ packWARN(WARN_MISC),
3448 "%s", form_short_octal_warning(s, len));
3451 goto NUM_ESCAPE_INSERT;
3453 /* eg. \o{24} indicates the octal constant \024 */
3458 bool valid = grok_bslash_o(&s, &uv, &error,
3459 TRUE, /* Output warning */
3460 FALSE, /* Not strict */
3461 TRUE, /* Output warnings for
3468 goto NUM_ESCAPE_INSERT;
3471 /* eg. \x24 indicates the hex constant 0x24 */
3476 bool valid = grok_bslash_x(&s, &uv, &error,
3477 TRUE, /* Output warning */
3478 FALSE, /* Not strict */
3479 TRUE, /* Output warnings for
3489 /* Insert oct or hex escaped character. There will always be
3490 * enough room in sv since such escapes will be longer than any
3491 * UTF-8 sequence they can end up as, except if they force us
3492 * to recode the rest of the string into utf8 */
3494 /* Here uv is the ordinal of the next character being added */
3495 if (!UVCHR_IS_INVARIANT(uv)) {
3496 if (!has_utf8 && uv > 255) {
3497 /* Might need to recode whatever we have accumulated so
3498 * far if it contains any chars variant in utf8 or
3501 SvCUR_set(sv, d - SvPVX_const(sv));
3504 /* See Note on sizing above. */
3505 sv_utf8_upgrade_flags_grow(sv,
3506 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3507 UNISKIP(uv) + (STRLEN)(send - s) + 1);
3508 d = SvPVX(sv) + SvCUR(sv);
3513 d = (char*)uvchr_to_utf8((U8*)d, uv);
3514 if (PL_lex_inwhat == OP_TRANS &&
3515 PL_sublex_info.sub_op) {
3516 PL_sublex_info.sub_op->op_private |=
3517 (PL_lex_repl ? OPpTRANS_FROM_UTF
3521 if (uv > 255 && !dorange)
3522 native_range = FALSE;
3535 /* In a non-pattern \N must be a named character, like \N{LATIN
3536 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3537 * mean to match a non-newline. For non-patterns, named
3538 * characters are converted to their string equivalents. In
3539 * patterns, named characters are not converted to their
3540 * ultimate forms for the same reasons that other escapes
3541 * aren't. Instead, they are converted to the \N{U+...} form
3542 * to get the value from the charnames that is in effect right
3543 * now, while preserving the fact that it was a named character
3544 * so that the regex compiler knows this */
3546 /* The structure of this section of code (besides checking for
3547 * errors and upgrading to utf8) is:
3548 * Further disambiguate between the two meanings of \N, and if
3549 * not a charname, go process it elsewhere
3550 * If of form \N{U+...}, pass it through if a pattern;
3551 * otherwise convert to utf8
3552 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3553 * pattern; otherwise convert to utf8 */
3555 /* Here, s points to the 'N'; the test below is guaranteed to
3556 * succeed if we are being called on a pattern as we already
3557 * know from a test above that the next character is a '{'.
3558 * On a non-pattern \N must mean 'named sequence, which
3559 * requires braces */
3562 yyerror("Missing braces on \\N{}");
3567 /* If there is no matching '}', it is an error. */
3568 if (! (e = strchr(s, '}'))) {
3569 if (! PL_lex_inpat) {
3570 yyerror("Missing right brace on \\N{}");
3572 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3577 /* Here it looks like a named character */
3579 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3580 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3581 | PERL_SCAN_DISALLOW_PREFIX;
3584 /* For \N{U+...}, the '...' is a unicode value even on
3585 * EBCDIC machines */
3586 s += 2; /* Skip to next char after the 'U+' */
3588 uv = grok_hex(s, &len, &flags, NULL);
3589 if (len == 0 || len != (STRLEN)(e - s)) {
3590 yyerror("Invalid hexadecimal number in \\N{U+...}");
3597 /* On non-EBCDIC platforms, pass through to the regex
3598 * compiler unchanged. The reason we evaluated the
3599 * number above is to make sure there wasn't a syntax
3600 * error. But on EBCDIC we convert to native so
3601 * downstream code can continue to assume it's native
3603 s -= 5; /* Include the '\N{U+' */
3605 d += my_snprintf(d, e - s + 1 + 1, /* includes the }
3608 (unsigned int) UNI_TO_NATIVE(uv));
3610 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3614 else { /* Not a pattern: convert the hex to string */
3616 /* If destination is not in utf8, unconditionally
3617 * recode it to be so. This is because \N{} implies
3618 * Unicode semantics, and scalars have to be in utf8
3619 * to guarantee those semantics */
3621 SvCUR_set(sv, d - SvPVX_const(sv));
3624 /* See Note on sizing above. */
3625 sv_utf8_upgrade_flags_grow(
3627 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3628 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3629 d = SvPVX(sv) + SvCUR(sv);
3633 /* Add the (Unicode) code point to the output. */
3634 if (UNI_IS_INVARIANT(uv)) {
3635 *d++ = (char) LATIN1_TO_NATIVE(uv);
3638 d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3642 else /* Here is \N{NAME} but not \N{U+...}. */
3643 if ((res = get_and_check_backslash_N_name(s, e)))
3646 const char *str = SvPV_const(res, len);
3649 if (! len) { /* The name resolved to an empty string */
3650 Copy("\\N{}", d, 4, char);
3654 /* In order to not lose information for the regex
3655 * compiler, pass the result in the specially made
3656 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3657 * the code points in hex of each character
3658 * returned by charnames */
3660 const char *str_end = str + len;
3661 const STRLEN off = d - SvPVX_const(sv);
3663 if (! SvUTF8(res)) {
3664 /* For the non-UTF-8 case, we can determine the
3665 * exact length needed without having to parse
3666 * through the string. Each character takes up
3667 * 2 hex digits plus either a trailing dot or
3669 d = off + SvGROW(sv, off
3671 + 6 /* For the "\N{U+", and
3673 + (STRLEN)(send - e));
3674 Copy("\\N{U+", d, 5, char);
3676 while (str < str_end) {
3678 my_snprintf(hex_string, sizeof(hex_string),
3679 "%02X.", (U8) *str);
3680 Copy(hex_string, d, 3, char);
3684 d--; /* We will overwrite below the final
3685 dot with a right brace */
3688 STRLEN char_length; /* cur char's byte length */
3690 /* and the number of bytes after this is
3691 * translated into hex digits */
3692 STRLEN output_length;
3694 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3695 * for max('U+', '.'); and 1 for NUL */
3696 char hex_string[2 * UTF8_MAXBYTES + 5];
3698 /* Get the first character of the result. */
3699 U32 uv = utf8n_to_uvchr((U8 *) str,
3703 /* Convert first code point to hex, including
3704 * the boiler plate before it. */
3706 my_snprintf(hex_string, sizeof(hex_string),
3710 /* Make sure there is enough space to hold it */
3711 d = off + SvGROW(sv, off
3713 + (STRLEN)(send - e)
3714 + 2); /* '}' + NUL */
3716 Copy(hex_string, d, output_length, char);
3719 /* For each subsequent character, append dot and
3720 * its ordinal in hex */
3721 while ((str += char_length) < str_end) {
3722 const STRLEN off = d - SvPVX_const(sv);
3723 U32 uv = utf8n_to_uvchr((U8 *) str,
3728 my_snprintf(hex_string,
3733 d = off + SvGROW(sv, off
3735 + (STRLEN)(send - e)
3736 + 2); /* '}' + NUL */
3737 Copy(hex_string, d, output_length, char);
3742 *d++ = '}'; /* Done. Add the trailing brace */
3745 else { /* Here, not in a pattern. Convert the name to a
3748 /* If destination is not in utf8, unconditionally
3749 * recode it to be so. This is because \N{} implies
3750 * Unicode semantics, and scalars have to be in utf8
3751 * to guarantee those semantics */
3753 SvCUR_set(sv, d - SvPVX_const(sv));
3756 /* See Note on sizing above. */
3757 sv_utf8_upgrade_flags_grow(sv,
3758 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3759 len + (STRLEN)(send - s) + 1);
3760 d = SvPVX(sv) + SvCUR(sv);
3762 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3764 /* See Note on sizing above. (NOTE: SvCUR() is not
3765 * set correctly here). */
3766 const STRLEN off = d - SvPVX_const(sv);
3767 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3769 Copy(str, d, len, char);
3775 } /* End \N{NAME} */
3778 native_range = FALSE; /* \N{} is defined to be Unicode */
3780 s = e + 1; /* Point to just after the '}' */
3783 /* \c is a control character */
3787 *d++ = grok_bslash_c(*s++, has_utf8, 1);
3790 yyerror("Missing control char name in \\c");
3794 /* printf-style backslashes, formfeeds, newlines, etc */
3811 *d++ = ASCII_TO_NATIVE('\033');
3820 } /* end if (backslash) */
3827 /* If we started with encoded form, or already know we want it,
3828 then encode the next character */
3829 if (! NATIVE_BYTE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
3833 /* One might think that it is wasted effort in the case of the
3834 * source being utf8 (this_utf8 == TRUE) to take the next character
3835 * in the source, convert it to an unsigned value, and then convert
3836 * it back again. But the source has not been validated here. The
3837 * routine that does the conversion checks for errors like
3840 const UV nextuv = (this_utf8)
3841 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
3843 const STRLEN need = UNISKIP(nextuv);
3845 SvCUR_set(sv, d - SvPVX_const(sv));
3848 /* See Note on sizing above. */
3849 sv_utf8_upgrade_flags_grow(sv,
3850 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3851 need + (STRLEN)(send - s) + 1);
3852 d = SvPVX(sv) + SvCUR(sv);
3854 } else if (need > len) {
3855 /* encoded value larger than old, may need extra space (NOTE:
3856 * SvCUR() is not set correctly here). See Note on sizing
3858 const STRLEN off = d - SvPVX_const(sv);
3859 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
3863 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
3865 if (uv > 255 && !dorange)
3866 native_range = FALSE;
3872 } /* while loop to process each character */
3874 /* terminate the string and set up the sv */
3876 SvCUR_set(sv, d - SvPVX_const(sv));
3877 if (SvCUR(sv) >= SvLEN(sv))
3878 Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3879 " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
3882 if (PL_encoding && !has_utf8) {
3883 sv_recode_to_utf8(sv, PL_encoding);
3889 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3890 PL_sublex_info.sub_op->op_private |=
3891 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3895 /* shrink the sv if we allocated more than we used */
3896 if (SvCUR(sv) + 5 < SvLEN(sv)) {
3897 SvPV_shrink_to_cur(sv);
3900 /* return the substring (via pl_yylval) only if we parsed anything */
3903 for (; s2 < s; s2++) {
3905 COPLINE_INC_WITH_HERELINES;
3907 SvREFCNT_inc_simple_void_NN(sv);
3908 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
3909 && ! PL_parser->lex_re_reparsing)
3911 const char *const key = PL_lex_inpat ? "qr" : "q";
3912 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3916 if (PL_lex_inwhat == OP_TRANS) {
3919 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3922 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3930 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3933 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3935 LEAVE_with_name("scan_const");
3940 * Returns TRUE if there's more to the expression (e.g., a subscript),
3943 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3945 * ->[ and ->{ return TRUE
3946 * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
3947 * { and [ outside a pattern are always subscripts, so return TRUE
3948 * if we're outside a pattern and it's not { or [, then return FALSE
3949 * if we're in a pattern and the first char is a {
3950 * {4,5} (any digits around the comma) returns FALSE
3951 * if we're in a pattern and the first char is a [
3953 * [SOMETHING] has a funky algorithm to decide whether it's a
3954 * character class or not. It has to deal with things like
3955 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3956 * anything else returns TRUE
3959 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
3962 S_intuit_more(pTHX_ char *s)
3966 PERL_ARGS_ASSERT_INTUIT_MORE;
3968 if (PL_lex_brackets)
3970 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3972 if (*s == '-' && s[1] == '>'
3973 && FEATURE_POSTDEREF_QQ_IS_ENABLED
3974 && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
3975 ||(s[2] == '@' && strchr("*[{",s[3])) ))
3977 if (*s != '{' && *s != '[')
3982 /* In a pattern, so maybe we have {n,m}. */
3984 if (regcurly(s, FALSE)) {
3990 /* On the other hand, maybe we have a character class */
3993 if (*s == ']' || *s == '^')
3996 /* this is terrifying, and it works */
3999 const char * const send = strchr(s,']');
4000 unsigned char un_char, last_un_char;
4001 char tmpbuf[sizeof PL_tokenbuf * 4];
4003 if (!send) /* has to be an expression */
4005 weight = 2; /* let's weigh the evidence */
4009 else if (isDIGIT(*s)) {
4011 if (isDIGIT(s[1]) && s[2] == ']')
4017 Zero(seen,256,char);
4019 for (; s < send; s++) {
4020 last_un_char = un_char;
4021 un_char = (unsigned char)*s;
4026 weight -= seen[un_char] * 10;
4027 if (isWORDCHAR_lazy_if(s+1,UTF)) {
4029 char *tmp = PL_bufend;
4030 PL_bufend = (char*)send;
4031 scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4033 len = (int)strlen(tmpbuf);
4034 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
4035 UTF ? SVf_UTF8 : 0, SVt_PV))
4040 else if (*s == '$' && s[1] &&
4041 strchr("[#!%*<>()-=",s[1])) {
4042 if (/*{*/ strchr("])} =",s[2]))
4051 if (strchr("wds]",s[1]))
4053 else if (seen[(U8)'\''] || seen[(U8)'"'])
4055 else if (strchr("rnftbxcav",s[1]))
4057 else if (isDIGIT(s[1])) {
4059 while (s[1] && isDIGIT(s[1]))
4069 if (strchr("aA01! ",last_un_char))
4071 if (strchr("zZ79~",s[1]))
4073 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4074 weight -= 5; /* cope with negative subscript */
4077 if (!isWORDCHAR(last_un_char)
4078 && !(last_un_char == '$' || last_un_char == '@'
4079 || last_un_char == '&')
4080 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4085 if (keyword(tmpbuf, d - tmpbuf, 0))
4088 if (un_char == last_un_char + 1)
4090 weight -= seen[un_char];
4095 if (weight >= 0) /* probably a character class */
4105 * Does all the checking to disambiguate
4107 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
4108 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4110 * First argument is the stuff after the first token, e.g. "bar".
4112 * Not a method if foo is a filehandle.
4113 * Not a method if foo is a subroutine prototyped to take a filehandle.
4114 * Not a method if it's really "Foo $bar"
4115 * Method if it's "foo $bar"
4116 * Not a method if it's really "print foo $bar"
4117 * Method if it's really "foo package::" (interpreted as package->foo)
4118 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4119 * Not a method if bar is a filehandle or package, but is quoted with
4124 S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
4127 char *s = start + (*start == '$');
4128 char tmpbuf[sizeof PL_tokenbuf];
4135 PERL_ARGS_ASSERT_INTUIT_METHOD;
4137 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4139 if (cv && SvPOK(cv)) {
4140 const char *proto = CvPROTO(cv);
4142 while (*proto && (isSPACE(*proto) || *proto == ';'))
4149 if (*start == '$') {
4150 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
4151 isUPPER(*PL_tokenbuf))
4154 len = start - SvPVX(PL_linestr);
4158 start = SvPVX(PL_linestr) + len;
4162 return *s == '(' ? FUNCMETH : METHOD;
4165 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4166 /* start is the beginning of the possible filehandle/object,
4167 * and s is the end of it
4168 * tmpbuf is a copy of it (but with single quotes as double colons)
4171 if (!keyword(tmpbuf, len, 0)) {
4172 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4176 soff = s - SvPVX(PL_linestr);
4180 indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
4181 if (indirgv && GvCVu(indirgv))
4183 /* filehandle or package name makes it a method */
4184 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4186 soff = s - SvPVX(PL_linestr);
4189 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4190 return 0; /* no assumptions -- "=>" quotes bareword */
4192 start_force(PL_curforce);
4193 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
4194 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4195 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4197 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
4198 ( UTF ? SVf_UTF8 : 0 )));
4203 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
4205 return *s == '(' ? FUNCMETH : METHOD;
4211 /* Encoded script support. filter_add() effectively inserts a
4212 * 'pre-processing' function into the current source input stream.
4213 * Note that the filter function only applies to the current source file
4214 * (e.g., it will not affect files 'require'd or 'use'd by this one).
4216 * The datasv parameter (which may be NULL) can be used to pass
4217 * private data to this instance of the filter. The filter function
4218 * can recover the SV using the FILTER_DATA macro and use it to
4219 * store private buffers and state information.
4221 * The supplied datasv parameter is upgraded to a PVIO type
4222 * and the IoDIRP/IoANY field is used to store the function pointer,
4223 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4224 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4225 * private use must be set using malloc'd pointers.
4229 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4238 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4239 Perl_croak(aTHX_ "Source filters apply only to byte streams");
4241 if (!PL_rsfp_filters)
4242 PL_rsfp_filters = newAV();
4245 SvUPGRADE(datasv, SVt_PVIO);
4246 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4247 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4248 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4249 FPTR2DPTR(void *, IoANY(datasv)),
4250 SvPV_nolen(datasv)));
4251 av_unshift(PL_rsfp_filters, 1);
4252 av_store(PL_rsfp_filters, 0, datasv) ;
4254 !PL_parser->filtered
4255 && PL_parser->lex_flags & LEX_EVALBYTES
4256 && PL_bufptr < PL_bufend
4258 const char *s = PL_bufptr;
4259 while (s < PL_bufend) {
4261 SV *linestr = PL_parser->linestr;
4262 char *buf = SvPVX(linestr);
4263 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4264 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4265 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4266 STRLEN const linestart_pos = PL_parser->linestart - buf;
4267 STRLEN const last_uni_pos =
4268 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4269 STRLEN const last_lop_pos =
4270 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4271 av_push(PL_rsfp_filters, linestr);
4272 PL_parser->linestr =
4273 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4274 buf = SvPVX(PL_parser->linestr);
4275 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4276 PL_parser->bufptr = buf + bufptr_pos;
4277 PL_parser->oldbufptr = buf + oldbufptr_pos;
4278 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4279 PL_parser->linestart = buf + linestart_pos;
4280 if (PL_parser->last_uni)
4281 PL_parser->last_uni = buf + last_uni_pos;
4282 if (PL_parser->last_lop)
4283 PL_parser->last_lop = buf + last_lop_pos;
4284 SvLEN(linestr) = SvCUR(linestr);
4285 SvCUR(linestr) = s-SvPVX(linestr);
4286 PL_parser->filtered = 1;
4296 /* Delete most recently added instance of this filter function. */
4298 Perl_filter_del(pTHX_ filter_t funcp)
4303 PERL_ARGS_ASSERT_FILTER_DEL;
4306 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4307 FPTR2DPTR(void*, funcp)));
4309 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4311 /* if filter is on top of stack (usual case) just pop it off */
4312 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4313 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4314 sv_free(av_pop(PL_rsfp_filters));
4318 /* we need to search for the correct entry and clear it */
4319 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4323 /* Invoke the idxth filter function for the current rsfp. */
4324 /* maxlen 0 = read one text line */
4326 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4331 /* This API is bad. It should have been using unsigned int for maxlen.
4332 Not sure if we want to change the API, but if not we should sanity
4333 check the value here. */
4334 unsigned int correct_length = maxlen < 0 ? PERL_INT_MAX : maxlen;
4336 PERL_ARGS_ASSERT_FILTER_READ;
4338 if (!PL_parser || !PL_rsfp_filters)
4340 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
4341 /* Provide a default input filter to make life easy. */
4342 /* Note that we append to the line. This is handy. */
4343 DEBUG_P(PerlIO_printf(Perl_debug_log,
4344 "filter_read %d: from rsfp\n", idx));
4345 if (correct_length) {
4348 const int old_len = SvCUR(buf_sv);
4350 /* ensure buf_sv is large enough */
4351 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4352 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4353 correct_length)) <= 0) {
4354 if (PerlIO_error(PL_rsfp))
4355 return -1; /* error */
4357 return 0 ; /* end of file */
4359 SvCUR_set(buf_sv, old_len + len) ;
4360 SvPVX(buf_sv)[old_len + len] = '\0';
4363 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4364 if (PerlIO_error(PL_rsfp))
4365 return -1; /* error */
4367 return 0 ; /* end of file */
4370 return SvCUR(buf_sv);
4372 /* Skip this filter slot if filter has been deleted */
4373 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4374 DEBUG_P(PerlIO_printf(Perl_debug_log,
4375 "filter_read %d: skipped (filter deleted)\n",
4377 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4379 if (SvTYPE(datasv) != SVt_PVIO) {
4380 if (correct_length) {
4382 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4383 if (!remainder) return 0; /* eof */
4384 if (correct_length > remainder) correct_length = remainder;
4385 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4386 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4389 const char *s = SvEND(datasv);
4390 const char *send = SvPVX(datasv) + SvLEN(datasv);
4398 if (s == send) return 0; /* eof */
4399 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4400 SvCUR_set(datasv, s-SvPVX(datasv));
4402 return SvCUR(buf_sv);
4404 /* Get function pointer hidden within datasv */
4405 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4406 DEBUG_P(PerlIO_printf(Perl_debug_log,
4407 "filter_read %d: via function %p (%s)\n",
4408 idx, (void*)datasv, SvPV_nolen_const(datasv)));
4409 /* Call function. The function is expected to */
4410 /* call "FILTER_READ(idx+1, buf_sv)" first. */
4411 /* Return: <0:error, =0:eof, >0:not eof */
4412 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
4416 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4420 PERL_ARGS_ASSERT_FILTER_GETS;
4422 #ifdef PERL_CR_FILTER
4423 if (!PL_rsfp_filters) {
4424 filter_add(S_cr_textfilter,NULL);
4427 if (PL_rsfp_filters) {
4429 SvCUR_set(sv, 0); /* start with empty line */
4430 if (FILTER_READ(0, sv, 0) > 0)
4431 return ( SvPVX(sv) ) ;
4436 return (sv_gets(sv, PL_rsfp, append));
4440 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4445 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4447 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
4451 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
4452 (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4454 return GvHV(gv); /* Foo:: */
4457 /* use constant CLASS => 'MyClass' */
4458 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4459 if (gv && GvCV(gv)) {
4460 SV * const sv = cv_const_sv(GvCV(gv));
4462 pkgname = SvPV_const(sv, len);
4465 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4471 * The intent of this yylex wrapper is to minimize the changes to the
4472 * tokener when we aren't interested in collecting madprops. It remains
4473 * to be seen how successful this strategy will be...
4480 char *s = PL_bufptr;
4482 /* make sure PL_thiswhite is initialized */
4486 /* previous token ate up our whitespace? */
4487 if (!PL_lasttoke && PL_nextwhite) {
4488 PL_thiswhite = PL_nextwhite;
4492 /* isolate the token, and figure out where it is without whitespace */
4493 PL_realtokenstart = -1;
4497 assert(PL_curforce < 0);
4499 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
4500 if (!PL_thistoken) {
4501 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
4502 PL_thistoken = newSVpvs("");
4504 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
4505 PL_thistoken = newSVpvn(tstart, s - tstart);
4508 if (PL_thismad) /* install head */
4509 CURMAD('X', PL_thistoken);
4512 /* last whitespace of a sublex? */
4513 if (optype == ')' && PL_endwhite) {
4514 CURMAD('X', PL_endwhite);
4519 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
4520 if (!PL_thiswhite && !PL_endwhite && !optype) {
4521 sv_free(PL_thistoken);
4526 /* put off final whitespace till peg */
4527 if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
4528 PL_nextwhite = PL_thiswhite;
4531 else if (PL_thisopen) {
4532 CURMAD('q', PL_thisopen);
4534 sv_free(PL_thistoken);
4538 /* Store actual token text as madprop X */
4539 CURMAD('X', PL_thistoken);
4543 /* add preceding whitespace as madprop _ */
4544 CURMAD('_', PL_thiswhite);
4548 /* add quoted material as madprop = */
4549 CURMAD('=', PL_thisstuff);
4553 /* add terminating quote as madprop Q */
4554 CURMAD('Q', PL_thisclose);
4558 /* special processing based on optype */
4562 /* opval doesn't need a TOKEN since it can already store mp */
4572 if (pl_yylval.opval)
4573 append_madprops(PL_thismad, pl_yylval.opval, 0);
4581 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4594 /* remember any fake bracket that lexer is about to discard */
4595 if (PL_lex_brackets == 1 &&
4596 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4599 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4602 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4603 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4606 break; /* don't bother looking for trailing comment */
4615 /* attach a trailing comment to its statement instead of next token */
4619 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4621 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4623 if (*s == '\n' || *s == '#') {
4624 while (s < PL_bufend && *s != '\n')
4628 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4629 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4642 /* Create new token struct. Note: opvals return early above. */
4643 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
4650 S_tokenize_use(pTHX_ int is_use, char *s) {
4653 PERL_ARGS_ASSERT_TOKENIZE_USE;
4655 if (PL_expect != XSTATE)
4656 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4657 is_use ? "use" : "no"));
4660 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4661 s = force_version(s, TRUE);
4662 if (*s == ';' || *s == '}'
4663 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
4664 start_force(PL_curforce);
4665 NEXTVAL_NEXTTOKE.opval = NULL;
4668 else if (*s == 'v') {
4669 s = force_word(s,WORD,FALSE,TRUE);
4670 s = force_version(s, FALSE);
4674 s = force_word(s,WORD,FALSE,TRUE);
4675 s = force_version(s, FALSE);
4677 pl_yylval.ival = is_use;
4681 static const char* const exp_name[] =
4682 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4683 "ATTRTERM", "TERMBLOCK", "POSTDEREF", "TERMORDORDOR"
4687 #define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4689 S_word_takes_any_delimeter(char *p, STRLEN len)
4691 return (len == 1 && strchr("msyq", p[0])) ||
4693 (p[0] == 't' && p[1] == 'r') ||
4694 (p[0] == 'q' && strchr("qwxr", p[1]))));
4698 S_check_scalar_slice(pTHX_ char *s)
4701 while (*s == ' ' || *s == '\t') s++;
4702 if (*s == 'q' && s[1] == 'w'
4703 && !isWORDCHAR_lazy_if(s+2,UTF))
4705 while (*s && (isWORDCHAR_lazy_if(s,UTF) || strchr(" \t$#+-'\"", *s)))
4706 s += UTF ? UTF8SKIP(s) : 1;
4707 if (*s == '}' || *s == ']')
4708 pl_yylval.ival = OPpSLICEWARNING;
4714 Works out what to call the token just pulled out of the input
4715 stream. The yacc parser takes care of taking the ops we return and
4716 stitching them into a tree.
4719 The type of the next token
4722 Switch based on the current state:
4723 - if we already built the token before, use it
4724 - if we have a case modifier in a string, deal with that
4725 - handle other cases of interpolation inside a string
4726 - scan the next line if we are inside a format
4727 In the normal state switch on the next character:
4729 if alphabetic, go to key lookup
4730 unrecoginized character - croak
4731 - 0/4/26: handle end-of-line or EOF
4732 - cases for whitespace
4733 - \n and #: handle comments and line numbers
4734 - various operators, brackets and sigils
4737 - 'v': vstrings (or go to key lookup)
4738 - 'x' repetition operator (or go to key lookup)
4739 - other ASCII alphanumerics (key lookup begins here):
4742 scan built-in keyword (but do nothing with it yet)
4743 check for statement label
4744 check for lexical subs
4745 goto just_a_word if there is one
4746 see whether built-in keyword is overridden
4747 switch on keyword number:
4748 - default: just_a_word:
4749 not a built-in keyword; handle bareword lookup
4750 disambiguate between method and sub call
4751 fall back to bareword
4752 - cases for built-in keywords
4760 char *s = PL_bufptr;
4764 const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil);
4768 /* orig_keyword, gvp, and gv are initialized here because
4769 * jump to the label just_a_word_zero can bypass their
4770 * initialization later. */
4771 I32 orig_keyword = 0;
4776 SV* tmp = newSVpvs("");
4777 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4778 (IV)CopLINE(PL_curcop),
4779 lex_state_names[PL_lex_state],
4780 exp_name[PL_expect],
4781 pv_display(tmp, s, strlen(s), 0, 60));
4785 switch (PL_lex_state) {
4787 case LEX_INTERPNORMAL:
4790 /* when we've already built the next token, just pull it out of the queue */
4794 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
4796 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
4797 PL_nexttoke[PL_lasttoke].next_mad = 0;
4798 if (PL_thismad && PL_thismad->mad_key == '_') {
4799 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
4800 PL_thismad->mad_val = 0;
4801 mad_free(PL_thismad);
4806 PL_lex_state = PL_lex_defer;
4807 PL_expect = PL_lex_expect;
4808 PL_lex_defer = LEX_NORMAL;
4809 if (!PL_nexttoke[PL_lasttoke].next_type)
4814 pl_yylval = PL_nextval[PL_nexttoke];
4816 PL_lex_state = PL_lex_defer;
4817 PL_expect = PL_lex_expect;
4818 PL_lex_defer = LEX_NORMAL;
4824 next_type = PL_nexttoke[PL_lasttoke].next_type;
4826 next_type = PL_nexttype[PL_nexttoke];
4828 if (next_type & (7<<24)) {
4829 if (next_type & (1<<24)) {
4830 if (PL_lex_brackets > 100)
4831 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4832 PL_lex_brackstack[PL_lex_brackets++] =
4833 (char) ((next_type >> 16) & 0xff);
4835 if (next_type & (2<<24))
4836 PL_lex_allbrackets++;
4837 if (next_type & (4<<24))
4838 PL_lex_allbrackets--;
4839 next_type &= 0xffff;
4841 return REPORT(next_type == 'p' ? pending_ident() : next_type);
4844 /* interpolated case modifiers like \L \U, including \Q and \E.
4845 when we get here, PL_bufptr is at the \
4847 case LEX_INTERPCASEMOD:
4849 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4851 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4852 PL_bufptr, PL_bufend, *PL_bufptr);
4854 /* handle \E or end of string */
4855 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4857 if (PL_lex_casemods) {
4858 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4859 PL_lex_casestack[PL_lex_casemods] = '\0';
4861 if (PL_bufptr != PL_bufend
4862 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4863 || oldmod == 'F')) {
4865 PL_lex_state = LEX_INTERPCONCAT;
4868 PL_thistoken = newSVpvs("\\E");
4871 PL_lex_allbrackets--;
4874 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4875 /* Got an unpaired \E */
4876 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4877 "Useless use of \\E");
4880 while (PL_bufptr != PL_bufend &&
4881 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
4884 PL_thiswhite = newSVpvs("");
4885 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4890 if (PL_bufptr != PL_bufend)
4893 PL_lex_state = LEX_INTERPCONCAT;
4897 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4898 "### Saw case modifier\n"); });
4900 if (s[1] == '\\' && s[2] == 'E') {
4904 PL_thiswhite = newSVpvs("");
4905 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4909 PL_lex_state = LEX_INTERPCONCAT;
4914 if (!PL_madskills) /* when just compiling don't need correct */
4915 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4916 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
4917 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4918 (strchr(PL_lex_casestack, 'L')
4919 || strchr(PL_lex_casestack, 'U')
4920 || strchr(PL_lex_casestack, 'F'))) {
4921 PL_lex_casestack[--PL_lex_casemods] = '\0';
4922 PL_lex_allbrackets--;
4925 if (PL_lex_casemods > 10)
4926 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4927 PL_lex_casestack[PL_lex_casemods++] = *s;
4928 PL_lex_casestack[PL_lex_casemods] = '\0';
4929 PL_lex_state = LEX_INTERPCONCAT;
4930 start_force(PL_curforce);
4931 NEXTVAL_NEXTTOKE.ival = 0;
4932 force_next((2<<24)|'(');
4933 start_force(PL_curforce);
4935 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4937 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4939 NEXTVAL_NEXTTOKE.ival = OP_LC;
4941 NEXTVAL_NEXTTOKE.ival = OP_UC;
4943 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4945 NEXTVAL_NEXTTOKE.ival = OP_FC;
4947 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4949 SV* const tmpsv = newSVpvs("\\ ");
4950 /* replace the space with the character we want to escape
4952 SvPVX(tmpsv)[1] = *s;
4958 if (PL_lex_starts) {
4964 sv_free(PL_thistoken);
4965 PL_thistoken = newSVpvs("");
4968 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4969 if (PL_lex_casemods == 1 && PL_lex_inpat)
4978 case LEX_INTERPPUSH:
4979 return REPORT(sublex_push());
4981 case LEX_INTERPSTART:
4982 if (PL_bufptr == PL_bufend)
4983 return REPORT(sublex_done());
4984 DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4985 "### Interpolated variable\n"); });
4987 /* for /@a/, we leave the joining for the regex engine to do
4988 * (unless we're within \Q etc) */
4989 PL_lex_dojoin = (*PL_bufptr == '@'
4990 && (!PL_lex_inpat || PL_lex_casemods));
4991 PL_lex_state = LEX_INTERPNORMAL;
4992 if (PL_lex_dojoin) {
4993 start_force(PL_curforce);
4994 NEXTVAL_NEXTTOKE.ival = 0;
4996 start_force(PL_curforce);
4997 force_ident("\"", '$');
4998 start_force(PL_curforce);
4999 NEXTVAL_NEXTTOKE.ival = 0;
5001 start_force(PL_curforce);
5002 NEXTVAL_NEXTTOKE.ival = 0;
5003 force_next((2<<24)|'(');
5004 start_force(PL_curforce);
5005 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
5008 /* Convert (?{...}) and friends to 'do {...}' */
5009 if (PL_lex_inpat && *PL_bufptr == '(') {
5010 PL_parser->lex_shared->re_eval_start = PL_bufptr;
5012 if (*PL_bufptr != '{')
5014 start_force(PL_curforce);
5015 /* XXX probably need a CURMAD(something) here */
5016 PL_expect = XTERMBLOCK;
5020 if (PL_lex_starts++) {
5025 sv_free(PL_thistoken);
5026 PL_thistoken = newSVpvs("");
5029 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5030 if (!PL_lex_casemods && PL_lex_inpat)
5037 case LEX_INTERPENDMAYBE:
5038 if (intuit_more(PL_bufptr)) {
5039 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
5045 if (PL_lex_dojoin) {
5046 const U8 dojoin_was = PL_lex_dojoin;
5047 PL_lex_dojoin = FALSE;
5048 PL_lex_state = LEX_INTERPCONCAT;
5052 sv_free(PL_thistoken);
5053 PL_thistoken = newSVpvs("");
5056 PL_lex_allbrackets--;
5057 return REPORT(dojoin_was == 1 ? ')' : POSTJOIN);
5059 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
5060 && SvEVALED(PL_lex_repl))
5062 if (PL_bufptr != PL_bufend)
5063 Perl_croak(aTHX_ "Bad evalled substitution pattern");
5066 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
5067 re_eval_str. If the here-doc body’s length equals the previous
5068 value of re_eval_start, re_eval_start will now be null. So
5069 check re_eval_str as well. */
5070 if (PL_parser->lex_shared->re_eval_start
5071 || PL_parser->lex_shared->re_eval_str) {
5073 if (*PL_bufptr != ')')
5074 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
5076 /* having compiled a (?{..}) expression, return the original
5077 * text too, as a const */
5078 if (PL_parser->lex_shared->re_eval_str) {
5079 sv = PL_parser->lex_shared->re_eval_str;
5080 PL_parser->lex_shared->re_eval_str = NULL;
5082 PL_bufptr - PL_parser->lex_shared->re_eval_start);
5083 SvPV_shrink_to_cur(sv);
5085 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
5086 PL_bufptr - PL_parser->lex_shared->re_eval_start);
5087 start_force(PL_curforce);
5088 /* XXX probably need a CURMAD(something) here */
5089 NEXTVAL_NEXTTOKE.opval =
5090 (OP*)newSVOP(OP_CONST, 0,
5093 PL_parser->lex_shared->re_eval_start = NULL;
5099 case LEX_INTERPCONCAT:
5101 if (PL_lex_brackets)
5102 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
5103 (long) PL_lex_brackets);
5105 if (PL_bufptr == PL_bufend)
5106 return REPORT(sublex_done());
5108 /* m'foo' still needs to be parsed for possible (?{...}) */
5109 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
5110 SV *sv = newSVsv(PL_linestr);
5112 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5116 s = scan_const(PL_bufptr);
5118 PL_lex_state = LEX_INTERPCASEMOD;
5120 PL_lex_state = LEX_INTERPSTART;
5123 if (s != PL_bufptr) {
5124 start_force(PL_curforce);
5126 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
5128 NEXTVAL_NEXTTOKE = pl_yylval;
5131 if (PL_lex_starts++) {
5135 sv_free(PL_thistoken);
5136 PL_thistoken = newSVpvs("");
5139 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5140 if (!PL_lex_casemods && PL_lex_inpat)
5153 s = scan_formline(PL_bufptr);
5154 if (!PL_lex_formbrack)
5163 /* We really do *not* want PL_linestr ever becoming a COW. */
5164 assert (!SvIsCOW(PL_linestr));
5166 PL_oldoldbufptr = PL_oldbufptr;
5168 PL_parser->saw_infix_sigil = 0;
5173 sv_free(PL_thistoken);
5176 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5180 if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
5183 SV *dsv = newSVpvs_flags("", SVs_TEMP);
5184 const char *c = UTF ? sv_uni_display(dsv, newSVpvn_flags(s,
5186 SVs_TEMP | SVf_UTF8),
5187 10, UNI_DISPLAY_ISPRINT)
5188 : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
5189 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
5190 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
5191 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
5195 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %"UTF8f"<-- HERE near column %d", c,
5196 UTF8fARG(UTF, (s - d), d),
5201 goto fake_eof; /* emulate EOF on ^D or ^Z */
5207 if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
5210 if (PL_lex_brackets &&
5211 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
5212 yyerror((const char *)
5214 ? "Format not terminated"
5215 : "Missing right curly or square bracket"));
5217 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5218 "### Tokener got EOF\n");
5222 if (s++ < PL_bufend)
5223 goto retry; /* ignore stray nulls */
5226 if (!PL_in_eval && !PL_preambled) {
5227 PL_preambled = TRUE;
5233 /* Generate a string of Perl code to load the debugger.
5234 * If PERL5DB is set, it will return the contents of that,
5235 * otherwise a compile-time require of perl5db.pl. */
5237 const char * const pdb = PerlEnv_getenv("PERL5DB");
5240 sv_setpv(PL_linestr, pdb);
5241 sv_catpvs(PL_linestr,";");
5243 SETERRNO(0,SS_NORMAL);
5244 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
5246 PL_parser->preambling = CopLINE(PL_curcop);
5248 sv_setpvs(PL_linestr,"");
5249 if (PL_preambleav) {
5250 SV **svp = AvARRAY(PL_preambleav);
5251 SV **const end = svp + AvFILLp(PL_preambleav);
5253 sv_catsv(PL_linestr, *svp);
5255 sv_catpvs(PL_linestr, ";");
5257 sv_free(MUTABLE_SV(PL_preambleav));
5258 PL_preambleav = NULL;
5261 sv_catpvs(PL_linestr,
5262 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
5263 if (PL_minus_n || PL_minus_p) {
5264 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
5266 sv_catpvs(PL_linestr,"chomp;");
5269 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
5270 || *PL_splitstr == '"')
5271 && strchr(PL_splitstr + 1, *PL_splitstr))
5272 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
5274 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
5275 bytes can be used as quoting characters. :-) */
5276 const char *splits = PL_splitstr;
5277 sv_catpvs(PL_linestr, "our @F=split(q\0");
5280 if (*splits == '\\')
5281 sv_catpvn(PL_linestr, splits, 1);
5282 sv_catpvn(PL_linestr, splits, 1);
5283 } while (*splits++);
5284 /* This loop will embed the trailing NUL of
5285 PL_linestr as the last thing it does before
5287 sv_catpvs(PL_linestr, ");");
5291 sv_catpvs(PL_linestr,"our @F=split(' ');");
5294 sv_catpvs(PL_linestr, "\n");
5295 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5296 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5297 PL_last_lop = PL_last_uni = NULL;
5298 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5299 update_debugger_info(PL_linestr, NULL, 0);
5304 bof = PL_rsfp ? TRUE : FALSE;
5307 fake_eof = LEX_FAKE_EOF;
5309 PL_bufptr = PL_bufend;
5310 COPLINE_INC_WITH_HERELINES;
5311 if (!lex_next_chunk(fake_eof)) {
5312 CopLINE_dec(PL_curcop);
5314 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
5316 CopLINE_dec(PL_curcop);
5319 PL_realtokenstart = -1;
5322 /* If it looks like the start of a BOM or raw UTF-16,
5323 * check if it in fact is. */
5324 if (bof && PL_rsfp &&
5326 *(U8*)s == BOM_UTF8_FIRST_BYTE ||
5329 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
5330 bof = (offset == (Off_t)SvCUR(PL_linestr));
5331 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5332 /* offset may include swallowed CR */
5334 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
5337 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5338 s = swallow_bom((U8*)s);
5341 if (PL_parser->in_pod) {
5342 /* Incest with pod. */
5345 sv_catsv(PL_thiswhite, PL_linestr);
5347 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
5348 sv_setpvs(PL_linestr, "");
5349 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5350 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5351 PL_last_lop = PL_last_uni = NULL;
5352 PL_parser->in_pod = 0;
5355 if (PL_rsfp || PL_parser->filtered)
5357 } while (PL_parser->in_pod);
5358 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5359 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5360 PL_last_lop = PL_last_uni = NULL;
5361 if (CopLINE(PL_curcop) == 1) {
5362 while (s < PL_bufend && isSPACE(*s))
5364 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5368 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5372 if (*s == '#' && *(s+1) == '!')
5374 #ifdef ALTERNATE_SHEBANG
5376 static char const as[] = ALTERNATE_SHEBANG;
5377 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5378 d = s + (sizeof(as) - 1);
5380 #endif /* ALTERNATE_SHEBANG */
5389 while (*d && !isSPACE(*d))
5393 #ifdef ARG_ZERO_IS_SCRIPT
5394 if (ipathend > ipath) {
5396 * HP-UX (at least) sets argv[0] to the script name,
5397 * which makes $^X incorrect. And Digital UNIX and Linux,
5398 * at least, set argv[0] to the basename of the Perl
5399 * interpreter. So, having found "#!", we'll set it right.
5401 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5403 assert(SvPOK(x) || SvGMAGICAL(x));
5404 if (sv_eq(x, CopFILESV(PL_curcop))) {
5405 sv_setpvn(x, ipath, ipathend - ipath);
5411 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
5412 const char * const lstart = SvPV_const(x,llen);
5414 bstart += blen - llen;
5415 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5416 sv_setpvn(x, ipath, ipathend - ipath);
5421 TAINT_NOT; /* $^X is always tainted, but that's OK */
5423 #endif /* ARG_ZERO_IS_SCRIPT */
5428 d = instr(s,"perl -");
5430 d = instr(s,"perl");
5432 /* avoid getting into infinite loops when shebang
5433 * line contains "Perl" rather than "perl" */
5435 for (d = ipathend-4; d >= ipath; --d) {
5436 if ((*d == 'p' || *d == 'P')
5437 && !ibcmp(d, "perl", 4))
5447 #ifdef ALTERNATE_SHEBANG
5449 * If the ALTERNATE_SHEBANG on this system starts with a
5450 * character that can be part of a Perl expression, then if
5451 * we see it but not "perl", we're probably looking at the
5452 * start of Perl code, not a request to hand off to some
5453 * other interpreter. Similarly, if "perl" is there, but
5454 * not in the first 'word' of the line, we assume the line
5455 * contains the start of the Perl program.
5457 if (d && *s != '#') {
5458 const char *c = ipath;
5459 while (*c && !strchr("; \t\r\n\f\v#", *c))
5462 d = NULL; /* "perl" not in first word; ignore */
5464 *s = '#'; /* Don't try to parse shebang line */
5466 #endif /* ALTERNATE_SHEBANG */
5471 !instr(s,"indir") &&
5472 instr(PL_origargv[0],"perl"))
5479 while (s < PL_bufend && isSPACE(*s))
5481 if (s < PL_bufend) {
5482 Newx(newargv,PL_origargc+3,char*);
5484 while (s < PL_bufend && !isSPACE(*s))
5487 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5490 newargv = PL_origargv;
5493 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5495 Perl_croak(aTHX_ "Can't exec %s", ipath);
5498 while (*d && !isSPACE(*d))
5500 while (SPACE_OR_TAB(*d))
5504 const bool switches_done = PL_doswitches;
5505 const U32 oldpdb = PL_perldb;
5506 const bool oldn = PL_minus_n;
5507 const bool oldp = PL_minus_p;
5511 bool baduni = FALSE;
5513 const char *d2 = d1 + 1;
5514 if (parse_unicode_opts((const char **)&d2)
5518 if (baduni || *d1 == 'M' || *d1 == 'm') {
5519 const char * const m = d1;
5520 while (*d1 && !isSPACE(*d1))
5522 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5525 d1 = moreswitches(d1);
5527 if (PL_doswitches && !switches_done) {
5528 int argc = PL_origargc;
5529 char **argv = PL_origargv;
5532 } while (argc && argv[0][0] == '-' && argv[0][1]);
5533 init_argv_symbols(argc,argv);
5535 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
5536 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5537 /* if we have already added "LINE: while (<>) {",
5538 we must not do it again */
5540 sv_setpvs(PL_linestr, "");
5541 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5542 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5543 PL_last_lop = PL_last_uni = NULL;
5544 PL_preambled = FALSE;
5545 if (PERLDB_LINE || PERLDB_SAVESRC)
5546 (void)gv_fetchfile(PL_origfilename);
5553 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5554 PL_lex_state = LEX_FORMLINE;
5555 start_force(PL_curforce);
5556 NEXTVAL_NEXTTOKE.ival = 0;
5557 force_next(FORMRBRACK);
5562 #ifdef PERL_STRICT_CR
5563 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5565 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5567 case ' ': case '\t': case '\f': case 013:
5569 PL_realtokenstart = -1;
5572 PL_thiswhite = newSVpvs("");
5573 sv_catpvn(PL_thiswhite, s, 1);
5581 PL_realtokenstart = -1;
5585 if (PL_lex_state != LEX_NORMAL ||
5586 (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5587 if (*s == '#' && s == PL_linestart && PL_in_eval
5588 && !PL_rsfp && !PL_parser->filtered) {
5589 /* handle eval qq[#line 1 "foo"\n ...] */
5590 CopLINE_dec(PL_curcop);
5593 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5595 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5599 const bool in_comment = *s == '#';
5601 while (d < PL_bufend && *d != '\n')
5605 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5606 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5610 PL_thiswhite = newSVpvn(s, d - s);
5613 if (in_comment && d == PL_bufend
5614 && PL_lex_state == LEX_INTERPNORMAL
5615 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5616 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5619 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5620 PL_lex_state = LEX_FORMLINE;
5621 start_force(PL_curforce);
5622 NEXTVAL_NEXTTOKE.ival = 0;
5623 force_next(FORMRBRACK);
5629 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5630 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
5633 TOKEN(PEG); /* make sure any #! line is accessible */
5639 if (PL_madskills) d = s;
5640 while (s < PL_bufend && *s != '\n')
5648 else if (s > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5649 Perl_croak(aTHX_ "panic: input overflow");
5651 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
5653 PL_thiswhite = newSVpvs("");
5654 if (CopLINE(PL_curcop) == 1) {
5655 sv_setpvs(PL_thiswhite, "");
5658 sv_catpvn(PL_thiswhite, d, s - d);
5665 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5673 while (s < PL_bufend && SPACE_OR_TAB(*s))
5676 if (strnEQ(s,"=>",2)) {
5677 s = force_word(PL_bufptr,WORD,FALSE,FALSE);
5678 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5679 OPERATOR('-'); /* unary minus */
5682 case 'r': ftst = OP_FTEREAD; break;
5683 case 'w': ftst = OP_FTEWRITE; break;
5684 case 'x': ftst = OP_FTEEXEC; break;
5685 case 'o': ftst = OP_FTEOWNED; break;
5686 case 'R': ftst = OP_FTRREAD; break;
5687 case 'W': ftst = OP_FTRWRITE; break;
5688 case 'X': ftst = OP_FTREXEC; break;
5689 case 'O': ftst = OP_FTROWNED; break;
5690 case 'e': ftst = OP_FTIS; break;
5691 case 'z': ftst = OP_FTZERO; break;
5692 case 's': ftst = OP_FTSIZE; break;
5693 case 'f': ftst = OP_FTFILE; break;
5694 case 'd': ftst = OP_FTDIR; break;
5695 case 'l': ftst = OP_FTLINK; break;
5696 case 'p': ftst = OP_FTPIPE; break;
5697 case 'S': ftst = OP_FTSOCK; break;
5698 case 'u': ftst = OP_FTSUID; break;
5699 case 'g': ftst = OP_FTSGID; break;
5700 case 'k': ftst = OP_FTSVTX; break;
5701 case 'b': ftst = OP_FTBLK; break;
5702 case 'c': ftst = OP_FTCHR; break;
5703 case 't': ftst = OP_FTTTY; break;
5704 case 'T': ftst = OP_FTTEXT; break;
5705 case 'B': ftst = OP_FTBINARY; break;
5706 case 'M': case 'A': case 'C':
5707 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5709 case 'M': ftst = OP_FTMTIME; break;
5710 case 'A': ftst = OP_FTATIME; break;
5711 case 'C': ftst = OP_FTCTIME; break;
5719 PL_last_uni = PL_oldbufptr;
5720 PL_last_lop_op = (OPCODE)ftst;
5721 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5722 "### Saw file test %c\n", (int)tmp);
5727 /* Assume it was a minus followed by a one-letter named
5728 * subroutine call (or a -bareword), then. */
5729 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5730 "### '-%c' looked like a file test but was not\n",
5737 const char tmp = *s++;
5740 if (PL_expect == XOPERATOR)
5745 else if (*s == '>') {
5748 if (FEATURE_POSTDEREF_IS_ENABLED && (
5749 ((*s == '$' || *s == '&') && s[1] == '*')
5750 ||(*s == '$' && s[1] == '#' && s[2] == '*')
5751 ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
5752 ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5755 Perl_ck_warner_d(aTHX_
5756 packWARN(WARN_EXPERIMENTAL__POSTDEREF),
5757 "Postfix dereference is experimental"
5759 PL_expect = XPOSTDEREF;
5762 if (isIDFIRST_lazy_if(s,UTF)) {
5763 s = force_word(s,METHOD,FALSE,TRUE);
5771 if (PL_expect == XOPERATOR) {
5772 if (*s == '=' && !PL_lex_allbrackets &&
5773 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5780 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5782 OPERATOR('-'); /* unary minus */
5788 const char tmp = *s++;
5791 if (PL_expect == XOPERATOR)
5796 if (PL_expect == XOPERATOR) {
5797 if (*s == '=' && !PL_lex_allbrackets &&
5798 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5805 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5812 if (PL_expect == XPOSTDEREF) POSTDEREF('*');
5813 if (PL_expect != XOPERATOR) {
5814 s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5815 PL_expect = XOPERATOR;
5816 force_ident(PL_tokenbuf, '*');
5824 if (*s == '=' && !PL_lex_allbrackets &&
5825 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5831 if (*s == '=' && !PL_lex_allbrackets &&
5832 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5836 PL_parser->saw_infix_sigil = 1;
5841 if (PL_expect == XOPERATOR) {
5842 if (s[1] == '=' && !PL_lex_allbrackets &&
5843 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5846 PL_parser->saw_infix_sigil = 1;
5849 else if (PL_expect == XPOSTDEREF) POSTDEREF('%');
5850 PL_tokenbuf[0] = '%';
5851 s = scan_ident(s, PL_tokenbuf + 1,
5852 sizeof PL_tokenbuf - 1, FALSE);
5854 if (!PL_tokenbuf[1]) {
5857 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
5859 PL_tokenbuf[0] = '@';
5861 PL_expect = XOPERATOR;
5862 force_ident_maybe_lex('%');
5866 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5867 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5872 if (PL_lex_brackets > 100)
5873 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5874 PL_lex_brackstack[PL_lex_brackets++] = 0;
5875 PL_lex_allbrackets++;
5877 const char tmp = *s++;
5882 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5884 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5887 Perl_ck_warner_d(aTHX_
5888 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5889 "Smartmatch is experimental");
5895 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5902 goto just_a_word_zero_gv;
5905 switch (PL_expect) {
5911 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5913 PL_bufptr = s; /* update in case we back off */
5916 "Use of := for an empty attribute list is not allowed");
5923 PL_expect = XTERMBLOCK;
5926 stuffstart = s - SvPVX(PL_linestr) - 1;
5930 while (isIDFIRST_lazy_if(s,UTF)) {
5933 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5934 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5935 if (tmp < 0) tmp = -tmp;
5950 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5952 d = scan_str(d,TRUE,TRUE,FALSE,FALSE,NULL);
5953 COPLINE_SET_FROM_MULTI_END;
5955 /* MUST advance bufptr here to avoid bogus
5956 "at end of line" context messages from yyerror().
5958 PL_bufptr = s + len;
5959 yyerror("Unterminated attribute parameter in attribute list");
5963 return REPORT(0); /* EOF indicator */
5967 sv_catsv(sv, PL_lex_stuff);
5968 attrs = op_append_elem(OP_LIST, attrs,
5969 newSVOP(OP_CONST, 0, sv));
5970 SvREFCNT_dec(PL_lex_stuff);
5971 PL_lex_stuff = NULL;
5974 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5976 if (PL_in_my == KEY_our) {
5977 deprecate(":unique");
5980 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5983 /* NOTE: any CV attrs applied here need to be part of
5984 the CVf_BUILTIN_ATTRS define in cv.h! */
5985 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5987 CvLVALUE_on(PL_compcv);
5989 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5991 deprecate(":locked");
5993 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5995 CvMETHOD_on(PL_compcv);
5997 /* After we've set the flags, it could be argued that
5998 we don't need to do the attributes.pm-based setting
5999 process, and shouldn't bother appending recognized
6000 flags. To experiment with that, uncomment the
6001 following "else". (Note that's already been
6002 uncommented. That keeps the above-applied built-in
6003 attributes from being intercepted (and possibly
6004 rejected) by a package's attribute routines, but is
6005 justified by the performance win for the common case
6006 of applying only built-in attributes.) */
6008 attrs = op_append_elem(OP_LIST, attrs,
6009 newSVOP(OP_CONST, 0,
6013 if (*s == ':' && s[1] != ':')
6016 break; /* require real whitespace or :'s */
6017 /* XXX losing whitespace on sequential attributes here */
6021 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
6022 if (*s != ';' && *s != '}' && *s != tmp
6023 && (tmp != '=' || *s != ')')) {
6024 const char q = ((*s == '\'') ? '"' : '\'');
6025 /* If here for an expression, and parsed no attrs, back
6027 if (tmp == '=' && !attrs) {
6031 /* MUST advance bufptr here to avoid bogus "at end of line"
6032 context messages from yyerror().
6035 yyerror( (const char *)
6037 ? Perl_form(aTHX_ "Invalid separator character "
6038 "%c%c%c in attribute list", q, *s, q)
6039 : "Unterminated attribute list" ) );
6047 start_force(PL_curforce);
6048 NEXTVAL_NEXTTOKE.opval = attrs;
6049 CURMAD('_', PL_nextwhite);
6054 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
6055 (s - SvPVX(PL_linestr)) - stuffstart);
6060 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
6064 PL_lex_allbrackets--;
6068 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6069 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
6073 PL_lex_allbrackets++;
6076 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6082 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6085 PL_lex_allbrackets--;
6091 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6094 if (PL_lex_brackets <= 0)
6095 /* diag_listed_as: Unmatched right %s bracket */
6096 yyerror("Unmatched right square bracket");
6099 PL_lex_allbrackets--;
6100 if (PL_lex_state == LEX_INTERPNORMAL) {
6101 if (PL_lex_brackets == 0) {
6102 if (*s == '-' && s[1] == '>')
6103 PL_lex_state = LEX_INTERPENDMAYBE;
6104 else if (*s != '[' && *s != '{')
6105 PL_lex_state = LEX_INTERPEND;
6112 if (PL_lex_brackets > 100) {
6113 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6115 switch (PL_expect) {
6117 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6118 PL_lex_allbrackets++;
6119 OPERATOR(HASHBRACK);
6121 while (s < PL_bufend && SPACE_OR_TAB(*s))
6124 PL_tokenbuf[0] = '\0';
6125 if (d < PL_bufend && *d == '-') {
6126 PL_tokenbuf[0] = '-';
6128 while (d < PL_bufend && SPACE_OR_TAB(*d))
6131 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
6132 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6134 while (d < PL_bufend && SPACE_OR_TAB(*d))
6137 const char minus = (PL_tokenbuf[0] == '-');
6138 s = force_word(s + minus, WORD, FALSE, TRUE);
6146 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6147 PL_lex_allbrackets++;
6152 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6153 PL_lex_allbrackets++;
6158 if (PL_oldoldbufptr == PL_last_lop)
6159 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6161 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6162 PL_lex_allbrackets++;
6165 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6167 /* This hack is to get the ${} in the message. */
6169 yyerror("syntax error");
6172 OPERATOR(HASHBRACK);
6174 /* This hack serves to disambiguate a pair of curlies
6175 * as being a block or an anon hash. Normally, expectation
6176 * determines that, but in cases where we're not in a
6177 * position to expect anything in particular (like inside
6178 * eval"") we have to resolve the ambiguity. This code
6179 * covers the case where the first term in the curlies is a
6180 * quoted string. Most other cases need to be explicitly
6181 * disambiguated by prepending a "+" before the opening
6182 * curly in order to force resolution as an anon hash.
6184 * XXX should probably propagate the outer expectation
6185 * into eval"" to rely less on this hack, but that could
6186 * potentially break current behavior of eval"".
6190 if (*s == '\'' || *s == '"' || *s == '`') {
6191 /* common case: get past first string, handling escapes */
6192 for (t++; t < PL_bufend && *t != *s;)
6193 if (*t++ == '\\' && (*t == '\\' || *t == *s))
6197 else if (*s == 'q') {
6200 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6201 && !isWORDCHAR(*t))))
6203 /* skip q//-like construct */
6205 char open, close, term;
6208 while (t < PL_bufend && isSPACE(*t))
6210 /* check for q => */
6211 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6212 OPERATOR(HASHBRACK);
6216 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
6220 for (t++; t < PL_bufend; t++) {
6221 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6223 else if (*t == open)
6227 for (t++; t < PL_bufend; t++) {
6228 if (*t == '\\' && t+1 < PL_bufend)
6230 else if (*t == close && --brackets <= 0)
6232 else if (*t == open)
6239 /* skip plain q word */
6240 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
6243 else if (isWORDCHAR_lazy_if(t,UTF)) {
6245 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
6248 while (t < PL_bufend && isSPACE(*t))
6250 /* if comma follows first term, call it an anon hash */
6251 /* XXX it could be a comma expression with loop modifiers */
6252 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6253 || (*t == '=' && t[1] == '>')))
6254 OPERATOR(HASHBRACK);
6255 if (PL_expect == XREF)
6258 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6264 pl_yylval.ival = CopLINE(PL_curcop);
6265 if (isSPACE(*s) || *s == '#')
6266 PL_copline = NOLINE; /* invalidate current command line number */
6267 TOKEN(formbrack ? '=' : '{');
6269 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6273 if (PL_lex_brackets <= 0)
6274 /* diag_listed_as: Unmatched right %s bracket */
6275 yyerror("Unmatched right curly bracket");
6277 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6278 PL_lex_allbrackets--;
6279 if (PL_lex_state == LEX_INTERPNORMAL) {
6280 if (PL_lex_brackets == 0) {
6281 if (PL_expect & XFAKEBRACK) {
6282 PL_expect &= XENUMMASK;
6283 PL_lex_state = LEX_INTERPEND;
6288 PL_thiswhite = newSVpvs("");
6289 sv_catpvs(PL_thiswhite,"}");
6292 return yylex(); /* ignore fake brackets */
6294 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6295 && SvEVALED(PL_lex_repl))
6296 PL_lex_state = LEX_INTERPEND;
6297 else if (*s == '-' && s[1] == '>')
6298 PL_lex_state = LEX_INTERPENDMAYBE;
6299 else if (*s != '[' && *s != '{')
6300 PL_lex_state = LEX_INTERPEND;
6303 if (PL_expect & XFAKEBRACK) {
6304 PL_expect &= XENUMMASK;
6306 return yylex(); /* ignore fake brackets */
6308 start_force(PL_curforce);
6310 curmad('X', newSVpvn(s-1,1));
6311 CURMAD('_', PL_thiswhite);
6313 force_next(formbrack ? '.' : '}');
6314 if (formbrack) LEAVE;
6316 if (PL_madskills && !PL_thistoken)
6317 PL_thistoken = newSVpvs("");
6319 if (formbrack == 2) { /* means . where arguments were expected */
6320 start_force(PL_curforce);
6326 if (PL_expect == XPOSTDEREF) POSTDEREF('&');
6329 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6330 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6337 if (PL_expect == XOPERATOR) {
6338 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
6339 && isIDFIRST_lazy_if(s,UTF))
6341 CopLINE_dec(PL_curcop);
6342 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6343 CopLINE_inc(PL_curcop);
6345 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6346 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6350 PL_parser->saw_infix_sigil = 1;
6354 PL_tokenbuf[0] = '&';
6355 s = scan_ident(s - 1, PL_tokenbuf + 1,
6356 sizeof PL_tokenbuf - 1, TRUE);
6357 if (PL_tokenbuf[1]) {
6358 PL_expect = XOPERATOR;
6359 force_ident_maybe_lex('&');
6363 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6369 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6370 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6377 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6378 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6386 const char tmp = *s++;
6388 if (!PL_lex_allbrackets &&
6389 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6396 if (!PL_lex_allbrackets &&
6397 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
6405 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6406 && strchr("+-*/%.^&|<",tmp))
6407 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6408 "Reversed %c= operator",(int)tmp);
6410 if (PL_expect == XSTATE && isALPHA(tmp) &&
6411 (s == PL_linestart+1 || s[-2] == '\n') )
6413 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6414 || PL_lex_state != LEX_NORMAL) {
6419 if (strnEQ(s,"=cut",4)) {
6435 PL_thiswhite = newSVpvs("");
6436 sv_catpvn(PL_thiswhite, PL_linestart,
6437 PL_bufend - PL_linestart);
6441 PL_parser->in_pod = 1;
6445 if (PL_expect == XBLOCK) {
6447 #ifdef PERL_STRICT_CR
6448 while (SPACE_OR_TAB(*t))
6450 while (SPACE_OR_TAB(*t) || *t == '\r')
6453 if (*t == '\n' || *t == '#') {
6456 SAVEI8(PL_parser->form_lex_state);
6457 SAVEI32(PL_lex_formbrack);
6458 PL_parser->form_lex_state = PL_lex_state;
6459 PL_lex_formbrack = PL_lex_brackets + 1;
6463 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6472 const char tmp = *s++;
6474 /* was this !=~ where !~ was meant?
6475 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6477 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6478 const char *t = s+1;
6480 while (t < PL_bufend && isSPACE(*t))
6483 if (*t == '/' || *t == '?' ||
6484 ((*t == 'm' || *t == 's' || *t == 'y')
6485 && !isWORDCHAR(t[1])) ||
6486 (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6487 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6488 "!=~ should be !~");
6490 if (!PL_lex_allbrackets &&
6491 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6503 if (PL_expect != XOPERATOR) {
6504 if (s[1] != '<' && !strchr(s,'>'))
6507 s = scan_heredoc(s);
6509 s = scan_inputsymbol(s);
6510 PL_expect = XOPERATOR;
6511 TOKEN(sublex_start());
6517 if (*s == '=' && !PL_lex_allbrackets &&
6518 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6522 SHop(OP_LEFT_SHIFT);
6527 if (!PL_lex_allbrackets &&
6528 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6535 if (!PL_lex_allbrackets &&
6536 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6544 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6552 const char tmp = *s++;
6554 if (*s == '=' && !PL_lex_allbrackets &&
6555 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6559 SHop(OP_RIGHT_SHIFT);
6561 else if (tmp == '=') {
6562 if (!PL_lex_allbrackets &&
6563 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6571 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6580 if (PL_expect == XOPERATOR) {
6581 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6582 return deprecate_commaless_var_list();
6585 else if (PL_expect == XPOSTDEREF) {
6588 POSTDEREF(DOLSHARP);
6593 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
6594 PL_tokenbuf[0] = '@';
6595 s = scan_ident(s + 1, PL_tokenbuf + 1,
6596 sizeof PL_tokenbuf - 1, FALSE);
6597 if (PL_expect == XOPERATOR)
6598 no_op("Array length", s);
6599 if (!PL_tokenbuf[1])
6601 PL_expect = XOPERATOR;
6602 force_ident_maybe_lex('#');
6606 PL_tokenbuf[0] = '$';
6607 s = scan_ident(s, PL_tokenbuf + 1,
6608 sizeof PL_tokenbuf - 1, FALSE);
6609 if (PL_expect == XOPERATOR)
6611 if (!PL_tokenbuf[1]) {
6613 yyerror("Final $ should be \\$ or $name");
6619 const char tmp = *s;
6620 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6623 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6624 && intuit_more(s)) {
6626 PL_tokenbuf[0] = '@';
6627 if (ckWARN(WARN_SYNTAX)) {
6630 while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
6633 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
6634 while (t < PL_bufend && *t != ']')
6636 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6637 "Multidimensional syntax %.*s not supported",
6638 (int)((t - PL_bufptr) + 1), PL_bufptr);
6642 else if (*s == '{') {
6644 PL_tokenbuf[0] = '%';
6645 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6646 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6648 char tmpbuf[sizeof PL_tokenbuf];
6651 } while (isSPACE(*t));
6652 if (isIDFIRST_lazy_if(t,UTF)) {
6654 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6659 && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
6660 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6661 "You need to quote \"%"UTF8f"\"",
6662 UTF8fARG(UTF, len, tmpbuf));
6668 PL_expect = XOPERATOR;
6669 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6670 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6671 if (!islop || PL_last_lop_op == OP_GREPSTART)
6672 PL_expect = XOPERATOR;
6673 else if (strchr("$@\"'`q", *s))
6674 PL_expect = XTERM; /* e.g. print $fh "foo" */
6675 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6676 PL_expect = XTERM; /* e.g. print $fh &sub */
6677 else if (isIDFIRST_lazy_if(s,UTF)) {
6678 char tmpbuf[sizeof PL_tokenbuf];
6680 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6681 if ((t2 = keyword(tmpbuf, len, 0))) {
6682 /* binary operators exclude handle interpretations */
6694 PL_expect = XTERM; /* e.g. print $fh length() */
6699 PL_expect = XTERM; /* e.g. print $fh subr() */
6702 else if (isDIGIT(*s))
6703 PL_expect = XTERM; /* e.g. print $fh 3 */
6704 else if (*s == '.' && isDIGIT(s[1]))
6705 PL_expect = XTERM; /* e.g. print $fh .3 */
6706 else if ((*s == '?' || *s == '-' || *s == '+')
6707 && !isSPACE(s[1]) && s[1] != '=')
6708 PL_expect = XTERM; /* e.g. print $fh -1 */
6709 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6711 PL_expect = XTERM; /* e.g. print $fh /.../
6712 XXX except DORDOR operator
6714 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6716 PL_expect = XTERM; /* print $fh <<"EOF" */
6719 force_ident_maybe_lex('$');
6723 if (PL_expect == XOPERATOR)
6725 else if (PL_expect == XPOSTDEREF) POSTDEREF('@');
6726 PL_tokenbuf[0] = '@';
6727 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6729 if (!PL_tokenbuf[1]) {
6732 if (PL_lex_state == LEX_NORMAL)
6734 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
6736 PL_tokenbuf[0] = '%';
6738 /* Warn about @ where they meant $. */
6739 if (*s == '[' || *s == '{') {
6740 if (ckWARN(WARN_SYNTAX)) {
6741 S_check_scalar_slice(aTHX_ s);
6745 PL_expect = XOPERATOR;
6746 force_ident_maybe_lex('@');
6749 case '/': /* may be division, defined-or, or pattern */
6750 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
6751 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6752 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6757 case '?': /* may either be conditional or pattern */
6758 if (PL_expect == XOPERATOR) {
6761 if (!PL_lex_allbrackets &&
6762 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6766 PL_lex_allbrackets++;
6772 /* A // operator. */
6773 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6774 (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6775 LEX_FAKEEOF_LOGIC)) {
6783 if (*s == '=' && !PL_lex_allbrackets &&
6784 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6793 /* Disable warning on "study /blah/" */
6794 if (PL_oldoldbufptr == PL_last_uni
6795 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6796 || memNE(PL_last_uni, "study", 5)
6797 || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
6801 deprecate("?PATTERN? without explicit operator");
6802 s = scan_pat(s,OP_MATCH);
6803 TERM(sublex_start());
6807 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6808 #ifdef PERL_STRICT_CR
6811 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6813 && (s == PL_linestart || s[-1] == '\n') )
6816 formbrack = 2; /* dot seen where arguments expected */
6819 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6823 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6826 if (!PL_lex_allbrackets &&
6827 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6834 pl_yylval.ival = OPf_SPECIAL;
6840 if (*s == '=' && !PL_lex_allbrackets &&
6841 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6848 case '0': case '1': case '2': case '3': case '4':
6849 case '5': case '6': case '7': case '8': case '9':
6850 s = scan_num(s, &pl_yylval);
6851 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6852 if (PL_expect == XOPERATOR)
6857 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
6858 COPLINE_SET_FROM_MULTI_END;
6859 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6860 if (PL_expect == XOPERATOR) {
6861 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6862 return deprecate_commaless_var_list();
6869 pl_yylval.ival = OP_CONST;
6870 TERM(sublex_start());
6873 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
6876 printbuf("### Saw string before %s\n", s);
6878 PerlIO_printf(Perl_debug_log,
6879 "### Saw unterminated string\n");
6881 if (PL_expect == XOPERATOR) {
6882 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
6883 return deprecate_commaless_var_list();
6890 pl_yylval.ival = OP_CONST;
6891 /* FIXME. I think that this can be const if char *d is replaced by
6892 more localised variables. */
6893 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6894 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6895 pl_yylval.ival = OP_STRINGIFY;
6899 if (pl_yylval.ival == OP_CONST)
6900 COPLINE_SET_FROM_MULTI_END;
6901 TERM(sublex_start());
6904 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
6905 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
6906 if (PL_expect == XOPERATOR)
6907 no_op("Backticks",s);
6910 pl_yylval.ival = OP_BACKTICK;
6911 TERM(sublex_start());
6915 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6917 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6919 if (PL_expect == XOPERATOR)
6920 no_op("Backslash",s);
6924 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
6925 char *start = s + 2;
6926 while (isDIGIT(*start) || *start == '_')
6928 if (*start == '.' && isDIGIT(start[1])) {
6929 s = scan_num(s, &pl_yylval);
6932 else if ((*start == ':' && start[1] == ':')
6933 || (PL_expect == XSTATE && *start == ':'))
6935 else if (PL_expect == XSTATE) {
6937 while (d < PL_bufend && isSPACE(*d)) d++;
6938 if (*d == ':') goto keylookup;
6940 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6941 if (!isALPHA(*start) && (PL_expect == XTERM
6942 || PL_expect == XREF || PL_expect == XSTATE
6943 || PL_expect == XTERMORDORDOR)) {
6944 GV *const gv = gv_fetchpvn_flags(s, start - s,
6945 UTF ? SVf_UTF8 : 0, SVt_PVCV);
6947 s = scan_num(s, &pl_yylval);
6954 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
7007 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7009 /* Some keywords can be followed by any delimiter, including ':' */
7010 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
7012 /* x::* is just a word, unless x is "CORE" */
7013 if (!anydelim && *s == ':' && s[1] == ':') {
7014 if (strEQ(PL_tokenbuf, "CORE")) goto case_KEY_CORE;
7019 while (d < PL_bufend && isSPACE(*d))
7020 d++; /* no comments skipped here, or s### is misparsed */
7022 /* Is this a word before a => operator? */
7023 if (*d == '=' && d[1] == '>') {
7027 = (OP*)newSVOP(OP_CONST, 0,
7028 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7029 pl_yylval.opval->op_private = OPpCONST_BARE;
7033 /* Check for plugged-in keyword */
7037 char *saved_bufptr = PL_bufptr;
7039 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
7041 if (result == KEYWORD_PLUGIN_DECLINE) {
7042 /* not a plugged-in keyword */
7043 PL_bufptr = saved_bufptr;
7044 } else if (result == KEYWORD_PLUGIN_STMT) {
7045 pl_yylval.opval = o;
7048 return REPORT(PLUGSTMT);
7049 } else if (result == KEYWORD_PLUGIN_EXPR) {
7050 pl_yylval.opval = o;
7052 PL_expect = XOPERATOR;
7053 return REPORT(PLUGEXPR);
7055 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
7060 /* Check for built-in keyword */
7061 tmp = keyword(PL_tokenbuf, len, 0);
7063 /* Is this a label? */
7064 if (!anydelim && PL_expect == XSTATE
7065 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
7067 pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
7068 pl_yylval.pval[len] = '\0';
7069 pl_yylval.pval[len+1] = UTF ? 1 : 0;
7074 /* Check for lexical sub */
7075 if (PL_expect != XOPERATOR) {
7076 char tmpbuf[sizeof PL_tokenbuf + 1];
7078 Copy(PL_tokenbuf, tmpbuf+1, len, char);
7079 off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
7080 if (off != NOT_IN_PAD) {
7081 assert(off); /* we assume this is boolean-true below */
7082 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
7083 HV * const stash = PAD_COMPNAME_OURSTASH(off);
7084 HEK * const stashname = HvNAME_HEK(stash);
7085 sv = newSVhek(stashname);
7086 sv_catpvs(sv, "::");
7087 sv_catpvn_flags(sv, PL_tokenbuf, len,
7088 (UTF ? SV_CATUTF8 : SV_CATBYTES));
7089 gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
7099 rv2cv_op = newOP(OP_PADANY, 0);
7100 rv2cv_op->op_targ = off;
7101 cv = find_lexical_cv(off);
7109 if (tmp < 0) { /* second-class keyword? */
7110 GV *ogv = NULL; /* override (winner) */
7111 GV *hgv = NULL; /* hidden (loser) */
7112 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
7114 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7115 (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
7119 if (GvIMPORTED_CV(gv))
7121 else if (! CvMETHOD(cv))
7125 (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
7129 ? GvCVu(gv) && GvIMPORTED_CV(gv)
7130 : SvPCS_IMPORTED(gv)
7131 && (gv_init(gv, PL_globalstash, PL_tokenbuf,
7140 tmp = 0; /* overridden by import or by GLOBAL */
7143 && -tmp==KEY_lock /* XXX generalizable kludge */
7146 tmp = 0; /* any sub overrides "weak" keyword */
7148 else { /* no override */
7150 if (tmp == KEY_dump) {
7151 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
7152 "dump() better written as CORE::dump()");
7156 if (hgv && tmp != KEY_x) /* never ambiguous */
7157 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7158 "Ambiguous call resolved as CORE::%s(), "
7159 "qualify as such or use &",
7164 if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
7165 && (!anydelim || *s != '#')) {
7166 /* no override, and not s### either; skipspace is safe here
7167 * check for => on following line */
7169 STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
7170 STRLEN soff = s - SvPVX(PL_linestr);
7171 s = skipspace_flags(s, LEX_NO_INCLINE);
7172 arrow = *s == '=' && s[1] == '>';
7173 PL_bufptr = SvPVX(PL_linestr) + bufoff;
7174 s = SvPVX(PL_linestr) + soff;
7182 default: /* not a keyword */
7183 /* Trade off - by using this evil construction we can pull the
7184 variable gv into the block labelled keylookup. If not, then
7185 we have to give it function scope so that the goto from the
7186 earlier ':' case doesn't bypass the initialisation. */
7188 just_a_word_zero_gv:
7200 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7201 const char penultchar =
7202 lastchar && PL_bufptr - 2 >= PL_linestart
7206 SV *nextPL_nextwhite = 0;
7210 /* Get the rest if it looks like a package qualifier */
7212 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7214 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7217 Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
7218 UTF8fARG(UTF, len, PL_tokenbuf),
7219 *s == '\'' ? "'" : "::");
7224 if (PL_expect == XOPERATOR) {
7225 if (PL_bufptr == PL_linestart) {
7226 CopLINE_dec(PL_curcop);
7227 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7228 CopLINE_inc(PL_curcop);
7231 no_op("Bareword",s);
7234 /* Look for a subroutine with this name in current package,
7235 unless this is a lexical sub, or name is "Foo::",
7236 in which case Foo is a bareword
7237 (and a package name). */
7239 if (len > 2 && !PL_madskills &&
7240 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
7242 if (ckWARN(WARN_BAREWORD)
7243 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7244 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7245 "Bareword \"%"UTF8f"\" refers to nonexistent package",
7246 UTF8fARG(UTF, len, PL_tokenbuf));
7248 PL_tokenbuf[len] = '\0';
7254 /* Mustn't actually add anything to a symbol table.
7255 But also don't want to "initialise" any placeholder
7256 constants that might already be there into full
7257 blown PVGVs with attached PVCV. */
7258 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7259 GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
7265 /* if we saw a global override before, get the right name */
7268 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
7269 len ? len : strlen(PL_tokenbuf));
7271 SV * const tmp_sv = sv;
7272 sv = newSVpvs("CORE::GLOBAL::");
7273 sv_catsv(sv, tmp_sv);
7274 SvREFCNT_dec(tmp_sv);
7278 if (PL_madskills && !PL_thistoken) {
7279 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
7280 PL_thistoken = newSVpvn(start,s - start);
7281 PL_realtokenstart = s - SvPVX(PL_linestr);
7285 /* Presume this is going to be a bareword of some sort. */
7287 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
7288 pl_yylval.opval->op_private = OPpCONST_BARE;
7290 /* And if "Foo::", then that's what it certainly is. */
7296 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
7297 const_op->op_private = OPpCONST_BARE;
7298 rv2cv_op = newCVREF(0, const_op);
7299 cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0);
7302 /* See if it's the indirect object for a list operator. */
7304 if (PL_oldoldbufptr &&
7305 PL_oldoldbufptr < PL_bufptr &&
7306 (PL_oldoldbufptr == PL_last_lop
7307 || PL_oldoldbufptr == PL_last_uni) &&
7308 /* NO SKIPSPACE BEFORE HERE! */
7309 (PL_expect == XREF ||
7310 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
7312 bool immediate_paren = *s == '(';
7314 /* (Now we can afford to cross potential line boundary.) */
7315 s = SKIPSPACE2(s,nextPL_nextwhite);
7317 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
7320 /* Two barewords in a row may indicate method call. */
7322 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
7323 (tmp = intuit_method(s, gv, cv))) {
7325 if (tmp == METHOD && !PL_lex_allbrackets &&
7326 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7327 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7331 /* If not a declared subroutine, it's an indirect object. */
7332 /* (But it's an indir obj regardless for sort.) */
7333 /* Also, if "_" follows a filetest operator, it's a bareword */
7336 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
7338 (PL_last_lop_op != OP_MAPSTART &&
7339 PL_last_lop_op != OP_GREPSTART))))
7340 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7341 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
7344 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7349 PL_expect = XOPERATOR;
7352 s = SKIPSPACE2(s,nextPL_nextwhite);
7353 PL_nextwhite = nextPL_nextwhite;
7358 /* Is this a word before a => operator? */
7359 if (*s == '=' && s[1] == '>' && !pkgname) {
7362 /* This is our own scalar, created a few lines above,
7364 SvREADONLY_off(cSVOPx(pl_yylval.opval)->op_sv);
7365 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
7366 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
7367 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
7368 SvREADONLY_on(cSVOPx(pl_yylval.opval)->op_sv);
7372 /* If followed by a paren, it's certainly a subroutine. */
7377 while (SPACE_OR_TAB(*d))
7379 if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
7386 PL_nextwhite = PL_thiswhite;
7389 start_force(PL_curforce);
7391 NEXTVAL_NEXTTOKE.opval =
7392 off ? rv2cv_op : pl_yylval.opval;
7393 PL_expect = XOPERATOR;
7396 PL_nextwhite = nextPL_nextwhite;
7397 curmad('X', PL_thistoken);
7398 PL_thistoken = newSVpvs("");
7402 op_free(pl_yylval.opval), force_next(PRIVATEREF);
7403 else op_free(rv2cv_op), force_next(WORD);
7408 /* If followed by var or block, call it a method (unless sub) */
7410 if ((*s == '$' || *s == '{') && !cv) {
7412 PL_last_lop = PL_oldbufptr;
7413 PL_last_lop_op = OP_METHOD;
7414 if (!PL_lex_allbrackets &&
7415 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7416 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7420 /* If followed by a bareword, see if it looks like indir obj. */
7423 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
7424 && (tmp = intuit_method(s, gv, cv))) {
7426 if (tmp == METHOD && !PL_lex_allbrackets &&
7427 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7428 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7432 /* Not a method, so call it a subroutine (if defined) */
7435 if (lastchar == '-' && penultchar != '-') {
7436 const STRLEN l = len ? len : strlen(PL_tokenbuf);
7437 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7438 "Ambiguous use of -%"UTF8f" resolved as -&%"UTF8f"()",
7439 UTF8fARG(UTF, l, PL_tokenbuf),
7440 UTF8fARG(UTF, l, PL_tokenbuf));
7442 /* Check for a constant sub */
7443 if ((sv = cv_const_sv_or_av(cv))) {
7446 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7447 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7448 if (SvTYPE(sv) == SVt_PVAV)
7449 pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7452 pl_yylval.opval->op_private = 0;
7453 pl_yylval.opval->op_folded = 1;
7454 pl_yylval.opval->op_flags |= OPf_SPECIAL;
7459 op_free(pl_yylval.opval);
7461 off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7462 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7463 PL_last_lop = PL_oldbufptr;
7464 PL_last_lop_op = OP_ENTERSUB;
7465 /* Is there a prototype? */
7472 STRLEN protolen = CvPROTOLEN(cv);
7473 const char *proto = CvPROTO(cv);
7475 proto = S_strip_spaces(aTHX_ proto, &protolen);
7478 if ((optional = *proto == ';'))
7481 while (*proto == ';');
7485 *proto == '$' || *proto == '_'
7486 || *proto == '*' || *proto == '+'
7491 *proto == '\\' && proto[1] && proto[2] == '\0'
7494 UNIPROTO(UNIOPSUB,optional);
7495 if (*proto == '\\' && proto[1] == '[') {
7496 const char *p = proto + 2;
7497 while(*p && *p != ']')
7499 if(*p == ']' && !p[1])
7500 UNIPROTO(UNIOPSUB,optional);
7502 if (*proto == '&' && *s == '{') {
7504 sv_setpvs(PL_subname, "__ANON__");
7506 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7507 if (!PL_lex_allbrackets &&
7508 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7509 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7516 PL_nextwhite = PL_thiswhite;
7519 start_force(PL_curforce);
7520 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7523 PL_nextwhite = nextPL_nextwhite;
7524 curmad('X', PL_thistoken);
7525 PL_thistoken = newSVpvs("");
7527 force_next(off ? PRIVATEREF : WORD);
7528 if (!PL_lex_allbrackets &&
7529 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7530 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7535 /* Guess harder when madskills require "best effort". */
7536 if (PL_madskills && (!gv || !GvCVu(gv))) {
7537 int probable_sub = 0;
7538 if (strchr("\"'`$@%0123456789!*+{[<", *s))
7540 else if (isALPHA(*s)) {
7544 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
7545 if (!keyword(tmpbuf, tmplen, 0))
7548 while (d < PL_bufend && isSPACE(*d))
7550 if (*d == '=' && d[1] == '>')
7555 gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
7557 op_free(pl_yylval.opval);
7559 off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
7560 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7561 PL_last_lop = PL_oldbufptr;
7562 PL_last_lop_op = OP_ENTERSUB;
7563 PL_nextwhite = PL_thiswhite;
7565 start_force(PL_curforce);
7566 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7568 PL_nextwhite = nextPL_nextwhite;
7569 curmad('X', PL_thistoken);
7570 PL_thistoken = newSVpvs("");
7571 force_next(off ? PRIVATEREF : WORD);
7572 if (!PL_lex_allbrackets &&
7573 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7574 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7578 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7580 force_next(off ? PRIVATEREF : WORD);
7581 if (!PL_lex_allbrackets &&
7582 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7583 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7588 /* Call it a bare word */
7590 if (PL_hints & HINT_STRICT_SUBS)
7591 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7594 /* after "print" and similar functions (corresponding to
7595 * "F? L" in opcode.pl), whatever wasn't already parsed as
7596 * a filehandle should be subject to "strict subs".
7597 * Likewise for the optional indirect-object argument to system
7598 * or exec, which can't be a bareword */
7599 if ((PL_last_lop_op == OP_PRINT
7600 || PL_last_lop_op == OP_PRTF
7601 || PL_last_lop_op == OP_SAY
7602 || PL_last_lop_op == OP_SYSTEM
7603 || PL_last_lop_op == OP_EXEC)
7604 && (PL_hints & HINT_STRICT_SUBS))
7605 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7606 if (lastchar != '-') {
7607 if (ckWARN(WARN_RESERVED)) {
7611 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7613 /* PL_warn_reserved is constant */
7614 GCC_DIAG_IGNORE(-Wformat-nonliteral);
7615 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7625 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7626 && saw_infix_sigil) {
7627 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7628 "Operator or semicolon missing before %c%"UTF8f,
7630 UTF8fARG(UTF, strlen(PL_tokenbuf),
7632 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7633 "Ambiguous use of %c resolved as operator %c",
7634 lastchar, lastchar);
7641 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7646 (OP*)newSVOP(OP_CONST, 0,
7647 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7650 case KEY___PACKAGE__:
7652 (OP*)newSVOP(OP_CONST, 0,
7654 ? newSVhek(HvNAME_HEK(PL_curstash))
7661 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7662 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
7665 gv = (GV *)*hv_fetchs(stash, "DATA", 1);
7667 gv_init(gv,stash,"DATA",4,0);
7670 GvIOp(gv) = newIO();
7671 IoIFP(GvIOp(gv)) = PL_rsfp;
7672 #if defined(HAS_FCNTL) && defined(F_SETFD)
7674 const int fd = PerlIO_fileno(PL_rsfp);
7675 fcntl(fd,F_SETFD,fd >= 3);
7678 /* Mark this internal pseudo-handle as clean */
7679 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7680 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7681 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7683 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7684 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7685 /* if the script was opened in binmode, we need to revert
7686 * it to text mode for compatibility; but only iff it has CRs
7687 * XXX this is a questionable hack at best. */
7688 if (PL_bufend-PL_bufptr > 2
7689 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7692 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7693 loc = PerlIO_tell(PL_rsfp);
7694 (void)PerlIO_seek(PL_rsfp, 0L, 0);
7697 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7699 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7700 #endif /* NETWARE */
7702 PerlIO_seek(PL_rsfp, loc, 0);
7706 #ifdef PERLIO_LAYERS
7709 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7710 else if (PL_encoding) {
7717 XPUSHs(PL_encoding);
7719 call_method("name", G_SCALAR);
7723 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
7724 Perl_form(aTHX_ ":encoding(%"SVf")",
7733 if (PL_realtokenstart >= 0) {
7734 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7736 PL_endwhite = newSVpvs("");
7737 sv_catsv(PL_endwhite, PL_thiswhite);
7739 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7740 PL_realtokenstart = -1;
7742 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7752 FUN0OP(newPVOP(OP_RUNCV,0,NULL));
7761 if (PL_expect == XSTATE) {
7772 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7773 if ((*s == ':' && s[1] == ':')
7774 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7778 Copy(PL_bufptr, PL_tokenbuf, olen, char);
7782 Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
7783 UTF8fARG(UTF, len, PL_tokenbuf));
7786 else if (tmp == KEY_require || tmp == KEY_do
7788 /* that's a way to remember we saw "CORE::" */
7800 LOP(OP_ACCEPT,XTERM);
7803 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7808 LOP(OP_ATAN2,XTERM);
7814 LOP(OP_BINMODE,XTERM);
7817 LOP(OP_BLESS,XTERM);
7826 /* We have to disambiguate the two senses of
7827 "continue". If the next token is a '{' then
7828 treat it as the start of a continue block;
7829 otherwise treat it as a control operator.
7839 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7849 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7858 if (!PL_cryptseen) {
7859 PL_cryptseen = TRUE;
7863 LOP(OP_CRYPT,XTERM);
7866 LOP(OP_CHMOD,XTERM);
7869 LOP(OP_CHOWN,XTERM);
7872 LOP(OP_CONNECT,XTERM);
7892 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7894 if (len && (len != 4 || strNE(PL_tokenbuf+1, "CORE"))
7895 && !keyword(PL_tokenbuf + 1, len, 0)) {
7898 force_ident_maybe_lex('&');
7903 if (orig_keyword == KEY_do) {
7912 PL_hints |= HINT_BLOCK_SCOPE;
7922 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7923 STR_WITH_LEN("NDBM_File::"),
7924 STR_WITH_LEN("DB_File::"),
7925 STR_WITH_LEN("GDBM_File::"),
7926 STR_WITH_LEN("SDBM_File::"),
7927 STR_WITH_LEN("ODBM_File::"),
7929 LOP(OP_DBMOPEN,XTERM);
7935 PL_expect = XOPERATOR;
7936 s = force_word(s,WORD,TRUE,FALSE);
7943 pl_yylval.ival = CopLINE(PL_curcop);
7947 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7961 if (*s == '{') { /* block eval */
7962 PL_expect = XTERMBLOCK;
7963 UNIBRACK(OP_ENTERTRY);
7965 else { /* string eval */
7967 UNIBRACK(OP_ENTEREVAL);
7972 UNIBRACK(-OP_ENTEREVAL);
7986 case KEY_endhostent:
7992 case KEY_endservent:
7995 case KEY_endprotoent:
8006 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8008 pl_yylval.ival = CopLINE(PL_curcop);
8010 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
8013 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
8016 if ((PL_bufend - p) >= 3 &&
8017 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
8019 else if ((PL_bufend - p) >= 4 &&
8020 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
8023 /* skip optional package name, as in "for my abc $x (..)" */
8024 if (isIDFIRST_lazy_if(p,UTF)) {
8025 p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8029 Perl_croak(aTHX_ "Missing $ on loop variable");
8031 s = SvPVX(PL_linestr) + soff;
8037 LOP(OP_FORMLINE,XTERM);
8046 LOP(OP_FCNTL,XTERM);
8052 LOP(OP_FLOCK,XTERM);
8055 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8060 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8065 LOP(OP_GREPSTART, XREF);
8068 PL_expect = XOPERATOR;
8069 s = force_word(s,WORD,TRUE,FALSE);
8084 case KEY_getpriority:
8085 LOP(OP_GETPRIORITY,XTERM);
8087 case KEY_getprotobyname:
8090 case KEY_getprotobynumber:
8091 LOP(OP_GPBYNUMBER,XTERM);
8093 case KEY_getprotoent:
8105 case KEY_getpeername:
8106 UNI(OP_GETPEERNAME);
8108 case KEY_gethostbyname:
8111 case KEY_gethostbyaddr:
8112 LOP(OP_GHBYADDR,XTERM);
8114 case KEY_gethostent:
8117 case KEY_getnetbyname:
8120 case KEY_getnetbyaddr:
8121 LOP(OP_GNBYADDR,XTERM);
8126 case KEY_getservbyname:
8127 LOP(OP_GSBYNAME,XTERM);
8129 case KEY_getservbyport:
8130 LOP(OP_GSBYPORT,XTERM);
8132 case KEY_getservent:
8135 case KEY_getsockname:
8136 UNI(OP_GETSOCKNAME);
8138 case KEY_getsockopt:
8139 LOP(OP_GSOCKOPT,XTERM);
8154 pl_yylval.ival = CopLINE(PL_curcop);
8155 Perl_ck_warner_d(aTHX_
8156 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8157 "given is experimental");
8162 orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB,
8170 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8172 pl_yylval.ival = CopLINE(PL_curcop);
8176 LOP(OP_INDEX,XTERM);
8182 LOP(OP_IOCTL,XTERM);
8194 PL_expect = XOPERATOR;
8195 s = force_word(s,WORD,TRUE,FALSE);
8212 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8217 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8231 LOP(OP_LISTEN,XTERM);
8240 s = scan_pat(s,OP_MATCH);
8241 TERM(sublex_start());
8244 LOP(OP_MAPSTART, XREF);
8247 LOP(OP_MKDIR,XTERM);
8250 LOP(OP_MSGCTL,XTERM);
8253 LOP(OP_MSGGET,XTERM);
8256 LOP(OP_MSGRCV,XTERM);
8259 LOP(OP_MSGSND,XTERM);
8264 PL_in_my = (U16)tmp;
8266 if (isIDFIRST_lazy_if(s,UTF)) {
8270 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8271 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
8273 if (!FEATURE_LEXSUBS_IS_ENABLED)
8275 "Experimental \"%s\" subs not enabled",
8276 tmp == KEY_my ? "my" :
8277 tmp == KEY_state ? "state" : "our");
8278 Perl_ck_warner_d(aTHX_
8279 packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
8280 "The lexical_subs feature is experimental");
8283 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
8284 if (!PL_in_my_stash) {
8287 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
8288 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
8291 if (PL_madskills) { /* just add type to declarator token */
8292 sv_catsv(PL_thistoken, PL_nextwhite);
8294 sv_catpvn(PL_thistoken, start, s - start);
8302 PL_expect = XOPERATOR;
8303 s = force_word(s,WORD,TRUE,FALSE);
8307 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8312 s = tokenize_use(0, s);
8316 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
8319 if (!PL_lex_allbrackets &&
8320 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8321 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8327 if (isIDFIRST_lazy_if(s,UTF)) {
8329 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
8331 for (t=d; isSPACE(*t);)
8333 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8335 && !(t[0] == '=' && t[1] == '>')
8336 && !(t[0] == ':' && t[1] == ':')
8337 && !keyword(s, d-s, 0)
8339 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8340 "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
8341 UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8347 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8349 pl_yylval.ival = OP_OR;
8359 LOP(OP_OPEN_DIR,XTERM);
8362 checkcomma(s,PL_tokenbuf,"filehandle");
8366 checkcomma(s,PL_tokenbuf,"filehandle");
8385 s = force_word(s,WORD,FALSE,TRUE);
8387 s = force_strict_version(s);
8388 PL_lex_expect = XBLOCK;
8392 LOP(OP_PIPE_OP,XTERM);
8395 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8396 COPLINE_SET_FROM_MULTI_END;
8399 pl_yylval.ival = OP_CONST;
8400 TERM(sublex_start());
8407 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8408 COPLINE_SET_FROM_MULTI_END;
8411 PL_expect = XOPERATOR;
8412 if (SvCUR(PL_lex_stuff)) {
8413 int warned_comma = !ckWARN(WARN_QW);
8414 int warned_comment = warned_comma;
8415 d = SvPV_force(PL_lex_stuff, len);
8417 for (; isSPACE(*d) && len; --len, ++d)
8422 if (!warned_comma || !warned_comment) {
8423 for (; !isSPACE(*d) && len; --len, ++d) {
8424 if (!warned_comma && *d == ',') {
8425 Perl_warner(aTHX_ packWARN(WARN_QW),
8426 "Possible attempt to separate words with commas");
8429 else if (!warned_comment && *d == '#') {
8430 Perl_warner(aTHX_ packWARN(WARN_QW),
8431 "Possible attempt to put comments in qw() list");
8437 for (; !isSPACE(*d) && len; --len, ++d)
8440 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8441 words = op_append_elem(OP_LIST, words,
8442 newSVOP(OP_CONST, 0, tokeq(sv)));
8447 words = newNULLLIST();
8449 SvREFCNT_dec(PL_lex_stuff);
8450 PL_lex_stuff = NULL;
8452 PL_expect = XOPERATOR;
8453 pl_yylval.opval = sawparens(words);
8458 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8461 pl_yylval.ival = OP_STRINGIFY;
8462 if (SvIVX(PL_lex_stuff) == '\'')
8463 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
8464 TERM(sublex_start());
8467 s = scan_pat(s,OP_QR);
8468 TERM(sublex_start());
8471 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8474 pl_yylval.ival = OP_BACKTICK;
8475 TERM(sublex_start());
8482 PL_expect = XOPERATOR;
8484 s = force_version(s, FALSE);
8486 else if (*s != 'v' || !isDIGIT(s[1])
8487 || (s = force_version(s, TRUE), *s == 'v'))
8489 *PL_tokenbuf = '\0';
8490 s = force_word(s,WORD,TRUE,TRUE);
8491 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
8492 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8493 GV_ADD | (UTF ? SVf_UTF8 : 0));
8495 yyerror("<> should be quotes");
8497 if (orig_keyword == KEY_require) {
8505 PL_last_uni = PL_oldbufptr;
8506 PL_last_lop_op = OP_REQUIRE;
8508 return REPORT( (int)REQUIRE );
8514 PL_expect = XOPERATOR;
8515 s = force_word(s,WORD,TRUE,FALSE);
8519 LOP(OP_RENAME,XTERM);
8528 LOP(OP_RINDEX,XTERM);
8537 UNIDOR(OP_READLINE);
8540 UNIDOR(OP_BACKTICK);
8549 LOP(OP_REVERSE,XTERM);
8552 UNIDOR(OP_READLINK);
8559 if (pl_yylval.opval)
8560 TERM(sublex_start());
8562 TOKEN(1); /* force error */
8565 checkcomma(s,PL_tokenbuf,"filehandle");
8575 LOP(OP_SELECT,XTERM);
8581 LOP(OP_SEMCTL,XTERM);
8584 LOP(OP_SEMGET,XTERM);
8587 LOP(OP_SEMOP,XTERM);
8593 LOP(OP_SETPGRP,XTERM);
8595 case KEY_setpriority:
8596 LOP(OP_SETPRIORITY,XTERM);
8598 case KEY_sethostent:
8604 case KEY_setservent:
8607 case KEY_setprotoent:
8617 LOP(OP_SEEKDIR,XTERM);
8619 case KEY_setsockopt:
8620 LOP(OP_SSOCKOPT,XTERM);
8626 LOP(OP_SHMCTL,XTERM);
8629 LOP(OP_SHMGET,XTERM);
8632 LOP(OP_SHMREAD,XTERM);
8635 LOP(OP_SHMWRITE,XTERM);
8638 LOP(OP_SHUTDOWN,XTERM);
8647 LOP(OP_SOCKET,XTERM);
8649 case KEY_socketpair:
8650 LOP(OP_SOCKPAIR,XTERM);
8653 checkcomma(s,PL_tokenbuf,"subroutine name");
8656 s = force_word(s,WORD,TRUE,TRUE);
8660 LOP(OP_SPLIT,XTERM);
8663 LOP(OP_SPRINTF,XTERM);
8666 LOP(OP_SPLICE,XTERM);
8681 LOP(OP_SUBSTR,XTERM);
8687 char * const tmpbuf = PL_tokenbuf + 1;
8688 expectation attrful;
8689 bool have_name, have_proto;
8690 const int key = tmp;
8692 SV *format_name = NULL;
8698 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
8699 SV *subtoken = PL_madskills
8700 ? newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr))
8705 s = SKIPSPACE2(s,tmpwhite);
8711 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
8712 (*s == ':' && s[1] == ':'))
8715 SV *nametoke = NULL;
8719 attrful = XATTRBLOCK;
8720 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8724 nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
8726 if (key == KEY_format)
8727 format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
8730 if (memchr(tmpbuf, ':', len) || key != KEY_sub
8732 PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
8734 sv_setpvn(PL_subname, tmpbuf, len);
8736 sv_setsv(PL_subname,PL_curstname);
8737 sv_catpvs(PL_subname,"::");
8738 sv_catpvn(PL_subname,tmpbuf,len);
8740 if (SvUTF8(PL_linestr))
8741 SvUTF8_on(PL_subname);
8747 CURMAD('X', nametoke);
8748 CURMAD('_', tmpwhite);
8749 force_ident_maybe_lex('&');
8751 s = SKIPSPACE2(d,tmpwhite);
8757 if (key == KEY_my || key == KEY_our || key==KEY_state)
8760 /* diag_listed_as: Missing name in "%s sub" */
8762 "Missing name in \"%s\"", PL_bufptr);
8764 PL_expect = XTERMBLOCK;
8765 attrful = XATTRTERM;
8766 sv_setpvs(PL_subname,"?");
8770 if (key == KEY_format) {
8772 PL_thistoken = subtoken;
8776 start_force(PL_curforce);
8777 NEXTVAL_NEXTTOKE.opval
8778 = (OP*)newSVOP(OP_CONST,0, format_name);
8779 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
8786 /* Look for a prototype */
8788 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
8789 COPLINE_SET_FROM_MULTI_END;
8791 Perl_croak(aTHX_ "Prototype not terminated");
8792 (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
8797 CURMAD('q', PL_thisopen);
8798 CURMAD('_', tmpwhite);
8799 CURMAD('=', PL_thisstuff);
8800 CURMAD('Q', PL_thisclose);
8801 NEXTVAL_NEXTTOKE.opval =
8802 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8803 PL_lex_stuff = NULL;
8806 s = SKIPSPACE2(s,tmpwhite);
8814 if (*s == ':' && s[1] != ':')
8815 PL_expect = attrful;
8816 else if (*s != '{' && key == KEY_sub) {
8818 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8819 else if (*s != ';' && *s != '}')
8820 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8827 curmad('^', newSVpvs(""));
8828 CURMAD('_', tmpwhite);
8832 PL_thistoken = subtoken;
8833 PERL_UNUSED_VAR(have_proto);
8836 NEXTVAL_NEXTTOKE.opval =
8837 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
8838 PL_lex_stuff = NULL;
8844 sv_setpvs(PL_subname, "__ANON__");
8846 sv_setpvs(PL_subname, "__ANON__::__ANON__");
8850 force_ident_maybe_lex('&');
8856 LOP(OP_SYSTEM,XREF);
8859 LOP(OP_SYMLINK,XTERM);
8862 LOP(OP_SYSCALL,XTERM);
8865 LOP(OP_SYSOPEN,XTERM);
8868 LOP(OP_SYSSEEK,XTERM);
8871 LOP(OP_SYSREAD,XTERM);
8874 LOP(OP_SYSWRITE,XTERM);
8879 TERM(sublex_start());
8900 LOP(OP_TRUNCATE,XTERM);
8912 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8914 pl_yylval.ival = CopLINE(PL_curcop);
8918 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8920 pl_yylval.ival = CopLINE(PL_curcop);
8924 LOP(OP_UNLINK,XTERM);
8930 LOP(OP_UNPACK,XTERM);
8933 LOP(OP_UTIME,XTERM);
8939 LOP(OP_UNSHIFT,XTERM);
8942 s = tokenize_use(1, s);
8952 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8954 pl_yylval.ival = CopLINE(PL_curcop);
8955 Perl_ck_warner_d(aTHX_
8956 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8957 "when is experimental");
8961 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8963 pl_yylval.ival = CopLINE(PL_curcop);
8967 PL_hints |= HINT_BLOCK_SCOPE;
8974 LOP(OP_WAITPID,XTERM);
8980 /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8981 * we use the same number on EBCDIC */
8982 gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8986 if (PL_expect == XOPERATOR) {
8987 if (*s == '=' && !PL_lex_allbrackets &&
8988 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8996 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8998 pl_yylval.ival = OP_XOR;
9007 Looks up an identifier in the pad or in a package
9010 PRIVATEREF if this is a lexical name.
9011 WORD if this belongs to a package.
9014 if we're in a my declaration
9015 croak if they tried to say my($foo::bar)
9016 build the ops for a my() declaration
9017 if it's an access to a my() variable
9018 build ops for access to a my() variable
9019 if in a dq string, and they've said @foo and we can't find @foo
9021 build ops for a bareword
9025 S_pending_ident(pTHX)
9029 const char pit = (char)pl_yylval.ival;
9030 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
9031 /* All routes through this function want to know if there is a colon. */
9032 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
9034 DEBUG_T({ PerlIO_printf(Perl_debug_log,
9035 "### Pending identifier '%s'\n", PL_tokenbuf); });
9037 /* if we're in a my(), we can't allow dynamics here.
9038 $foo'bar has already been turned into $foo::bar, so
9039 just check for colons.
9041 if it's a legal name, the OP is a PADANY.
9044 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9046 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
9047 "variable %s in \"our\"",
9048 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
9049 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
9053 /* PL_no_myglob is constant */
9054 GCC_DIAG_IGNORE(-Wformat-nonliteral);
9055 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
9056 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
9057 UTF ? SVf_UTF8 : 0);
9061 pl_yylval.opval = newOP(OP_PADANY, 0);
9062 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
9063 UTF ? SVf_UTF8 : 0);
9069 build the ops for accesses to a my() variable.
9074 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9075 UTF ? SVf_UTF8 : 0);
9076 if (tmp != NOT_IN_PAD) {
9077 /* might be an "our" variable" */
9078 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9079 /* build ops for a bareword */
9080 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9081 HEK * const stashname = HvNAME_HEK(stash);
9082 SV * const sym = newSVhek(stashname);
9083 sv_catpvs(sym, "::");
9084 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9085 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
9086 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9090 ? (GV_ADDMULTI | GV_ADDINEVAL)
9093 ((PL_tokenbuf[0] == '$') ? SVt_PV
9094 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9099 pl_yylval.opval = newOP(OP_PADANY, 0);
9100 pl_yylval.opval->op_targ = tmp;
9106 Whine if they've said @foo in a doublequoted string,
9107 and @foo isn't a variable we can find in the symbol
9110 if (ckWARN(WARN_AMBIGUOUS) &&
9111 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
9112 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
9113 ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
9114 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9115 /* DO NOT warn for @- and @+ */
9116 && !( PL_tokenbuf[2] == '\0' &&
9117 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
9120 /* Downgraded from fatal to warning 20000522 mjd */
9121 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9122 "Possible unintended interpolation of %"UTF8f
9124 UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9128 /* build ops for a bareword */
9129 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
9130 newSVpvn_flags(PL_tokenbuf + 1,
9132 UTF ? SVf_UTF8 : 0 ));
9133 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9135 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
9136 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
9137 | ( UTF ? SVf_UTF8 : 0 ),
9138 ((PL_tokenbuf[0] == '$') ? SVt_PV
9139 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9145 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9149 PERL_ARGS_ASSERT_CHECKCOMMA;
9151 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
9152 if (ckWARN(WARN_SYNTAX)) {
9155 for (w = s+2; *w && level; w++) {
9163 /* the list of chars below is for end of statements or
9164 * block / parens, boolean operators (&&, ||, //) and branch
9165 * constructs (or, and, if, until, unless, while, err, for).
9166 * Not a very solid hack... */
9167 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9168 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9169 "%s (...) interpreted as function",name);
9172 while (s < PL_bufend && isSPACE(*s))
9176 while (s < PL_bufend && isSPACE(*s))
9178 if (isIDFIRST_lazy_if(s,UTF)) {
9179 const char * const w = s;
9180 s += UTF ? UTF8SKIP(s) : 1;
9181 while (isWORDCHAR_lazy_if(s,UTF))
9182 s += UTF ? UTF8SKIP(s) : 1;
9183 while (s < PL_bufend && isSPACE(*s))
9187 if (keyword(w, s - w, 0))
9190 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9191 if (gv && GvCVu(gv))
9193 Perl_croak(aTHX_ "No comma allowed after %s", what);
9198 /* S_new_constant(): do any overload::constant lookup.
9200 Either returns sv, or mortalizes/frees sv and returns a new SV*.
9201 Best used as sv=new_constant(..., sv, ...).
9202 If s, pv are NULL, calls subroutine with one argument,
9203 and <type> is used with error messages only.
9204 <type> is assumed to be well formed UTF-8 */
9207 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9208 SV *sv, SV *pv, const char *type, STRLEN typelen)
9211 HV * table = GvHV(PL_hintgv); /* ^H */
9216 const char *why1 = "", *why2 = "", *why3 = "";
9218 PERL_ARGS_ASSERT_NEW_CONSTANT;
9219 /* We assume that this is true: */
9220 if (*key == 'c') { assert (strEQ(key, "charnames")); }
9223 /* charnames doesn't work well if there have been errors found */
9224 if (PL_error_count > 0 && *key == 'c')
9226 SvREFCNT_dec_NN(sv);
9227 return &PL_sv_undef;
9230 sv_2mortal(sv); /* Parent created it permanently */
9232 || ! (PL_hints & HINT_LOCALIZE_HH)
9233 || ! (cvp = hv_fetch(table, key, keylen, FALSE))
9238 /* Here haven't found what we're looking for. If it is charnames,
9239 * perhaps it needs to be loaded. Try doing that before giving up */
9241 Perl_load_module(aTHX_
9243 newSVpvs("_charnames"),
9244 /* version parameter; no need to specify it, as if
9245 * we get too early a version, will fail anyway,
9246 * not being able to find '_charnames' */
9251 assert(sp == PL_stack_sp);
9252 table = GvHV(PL_hintgv);
9254 && (PL_hints & HINT_LOCALIZE_HH)
9255 && (cvp = hv_fetch(table, key, keylen, FALSE))
9261 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9262 msg = Perl_form(aTHX_
9263 "Constant(%.*s) unknown",
9264 (int)(type ? typelen : len),
9270 why3 = "} is not defined";
9273 msg = Perl_form(aTHX_
9274 /* The +3 is for '\N{'; -4 for that, plus '}' */
9275 "Unknown charname '%.*s'", (int)typelen - 4, type + 3
9279 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
9280 (int)(type ? typelen : len),
9281 (type ? type: s), why1, why2, why3);
9284 yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9285 return SvREFCNT_inc_simple_NN(sv);
9290 pv = newSVpvn_flags(s, len, SVs_TEMP);
9292 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9294 typesv = &PL_sv_undef;
9296 PUSHSTACKi(PERLSI_OVERLOAD);
9308 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9312 /* Check the eval first */
9313 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9315 const char * errstr;
9316 sv_catpvs(errsv, "Propagated");
9317 errstr = SvPV_const(errsv, errlen);
9318 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9320 res = SvREFCNT_inc_simple_NN(sv);
9324 SvREFCNT_inc_simple_void_NN(res);
9333 why1 = "Call to &{$^H{";
9335 why3 = "}} did not return a defined value";
9337 (void)sv_2mortal(sv);
9344 PERL_STATIC_INLINE void
9345 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) {
9347 PERL_ARGS_ASSERT_PARSE_IDENT;
9351 Perl_croak(aTHX_ "%s", ident_too_long);
9352 if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
9353 /* The UTF-8 case must come first, otherwise things
9354 * like c\N{COMBINING TILDE} would start failing, as the
9355 * isWORDCHAR_A case below would gobble the 'c' up.
9358 char *t = *s + UTF8SKIP(*s);
9359 while (isIDCONT_utf8((U8*)t))
9361 if (*d + (t - *s) > e)
9362 Perl_croak(aTHX_ "%s", ident_too_long);
9363 Copy(*s, *d, t - *s, char);
9367 else if ( isWORDCHAR_A(**s) ) {
9370 } while (isWORDCHAR_A(**s) && *d < e);
9372 else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
9377 else if (allow_package && **s == ':' && (*s)[1] == ':'
9378 /* Disallow things like Foo::$bar. For the curious, this is
9379 * the code path that triggers the "Bad name after" warning
9380 * when looking for barewords.
9382 && (*s)[2] != '$') {
9392 /* Returns a NUL terminated string, with the length of the string written to
9396 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9400 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9401 bool is_utf8 = cBOOL(UTF);
9403 PERL_ARGS_ASSERT_SCAN_WORD;
9405 parse_ident(&s, &d, e, allow_package, is_utf8);
9412 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
9415 I32 herelines = PL_parser->herelines;
9416 SSize_t bracket = -1;
9419 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9420 bool is_utf8 = cBOOL(UTF);
9421 I32 orig_copline = 0, tmp_copline = 0;
9423 PERL_ARGS_ASSERT_SCAN_IDENT;
9428 while (isDIGIT(*s)) {
9430 Perl_croak(aTHX_ "%s", ident_too_long);
9435 parse_ident(&s, &d, e, 1, is_utf8);
9440 /* Either a digit variable, or parse_ident() found an identifier
9441 (anything valid as a bareword), so job done and return. */
9442 if (PL_lex_state != LEX_NORMAL)
9443 PL_lex_state = LEX_INTERPENDMAYBE;
9446 if (*s == '$' && s[1] &&
9447 (isIDFIRST_lazy_if(s+1,is_utf8)
9448 || isDIGIT_A((U8)s[1])
9451 || strnEQ(s+1,"::",2)) )
9453 /* Dereferencing a value in a scalar variable.
9454 The alternatives are different syntaxes for a scalar variable.
9455 Using ' as a leading package separator isn't allowed. :: is. */
9458 /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */
9460 bracket = s - SvPVX(PL_linestr);
9462 orig_copline = CopLINE(PL_curcop);
9463 if (s < PL_bufend && isSPACE(*s)) {
9468 /* Is the byte 'd' a legal single character identifier name? 'u' is true
9469 * iff Unicode semantics are to be used. The legal ones are any of:
9471 * b) ASCII punctuation
9472 * c) When not under Unicode rules, any upper Latin1 character
9473 * d) \c?, \c\, \c^, \c_, and \cA..\cZ, minus the ones that have traditionally
9474 * been matched by \s on ASCII platforms. That is: \c?, plus 1-32, minus
9476 #define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d)) \
9477 || isDIGIT_A((U8)(d)) \
9478 || (!(u) && !isASCII((U8)(d))) \
9479 || ((((U8)(d)) < 32) \
9480 && (((((U8)(d)) >= 14) \
9481 || (((U8)(d)) <= 8 && (d) != 0) \
9482 || (((U8)(d)) == 13)))) \
9483 || (((U8)(d)) == toCTRL('?')))
9485 && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8)))
9487 if ( isCNTRL_A((U8)*s) ) {
9488 deprecate("literal control characters in variable names");
9492 const STRLEN skip = UTF8SKIP(s);
9495 for ( i = 0; i < skip; i++ )
9503 /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
9504 if (*d == '^' && *s && isCONTROLVAR(*s)) {
9508 /* Warn about ambiguous code after unary operators if {...} notation isn't
9509 used. There's no difference in ambiguity; it's merely a heuristic
9510 about when not to warn. */
9511 else if (ck_uni && bracket == -1)
9513 if (bracket != -1) {
9514 /* If we were processing {...} notation then... */
9515 if (isIDFIRST_lazy_if(d,is_utf8)) {
9516 /* if it starts as a valid identifier, assume that it is one.
9517 (the later check for } being at the expected point will trap
9518 cases where this doesn't pan out.) */
9519 d += is_utf8 ? UTF8SKIP(d) : 1;
9520 parse_ident(&s, &d, e, 1, is_utf8);
9522 tmp_copline = CopLINE(PL_curcop);
9523 if (s < PL_bufend && isSPACE(*s)) {
9526 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9527 /* ${foo[0]} and ${foo{bar}} notation. */
9528 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9529 const char * const brack =
9531 ((*s == '[') ? "[...]" : "{...}");
9532 orig_copline = CopLINE(PL_curcop);
9533 CopLINE_set(PL_curcop, tmp_copline);
9534 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9535 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9536 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9537 funny, dest, brack, funny, dest, brack);
9538 CopLINE_set(PL_curcop, orig_copline);
9541 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9542 PL_lex_allbrackets++;
9546 /* Handle extended ${^Foo} variables
9547 * 1999-02-27 mjd-perl-patch@plover.com */
9548 else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
9552 while (isWORDCHAR(*s) && d < e) {
9556 Perl_croak(aTHX_ "%s", ident_too_long);
9561 tmp_copline = CopLINE(PL_curcop);
9562 if (s < PL_bufend && isSPACE(*s)) {
9566 /* Expect to find a closing } after consuming any trailing whitespace.
9570 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9571 PL_lex_state = LEX_INTERPEND;
9574 if (PL_lex_state == LEX_NORMAL) {
9575 if (ckWARN(WARN_AMBIGUOUS) &&
9576 (keyword(dest, d - dest, 0)
9577 || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0)))
9579 SV *tmp = newSVpvn_flags( dest, d - dest,
9580 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
9583 orig_copline = CopLINE(PL_curcop);
9584 CopLINE_set(PL_curcop, tmp_copline);
9585 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9586 "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9587 funny, tmp, funny, tmp);
9588 CopLINE_set(PL_curcop, orig_copline);
9593 /* Didn't find the closing } at the point we expected, so restore
9594 state such that the next thing to process is the opening { and */
9595 s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
9596 CopLINE_set(PL_curcop, orig_copline);
9597 PL_parser->herelines = herelines;
9601 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9602 PL_lex_state = LEX_INTERPEND;
9607 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
9609 /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
9610 * the parse starting at 's', based on the subset that are valid in this
9611 * context input to this routine in 'valid_flags'. Advances s. Returns
9612 * TRUE if the input should be treated as a valid flag, so the next char
9613 * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
9614 * first call on the current regex. This routine will set it to any
9615 * charset modifier found. The caller shouldn't change it. This way,
9616 * another charset modifier encountered in the parse can be detected as an
9617 * error, as we have decided to allow only one */
9620 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9622 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9623 if (isWORDCHAR_lazy_if(*s, UTF)) {
9624 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9625 UTF ? SVf_UTF8 : 0);
9627 /* Pretend that it worked, so will continue processing before
9636 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
9637 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
9638 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
9639 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
9640 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
9641 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9642 case LOCALE_PAT_MOD:
9644 goto multiple_charsets;
9646 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9649 case UNICODE_PAT_MOD:
9651 goto multiple_charsets;
9653 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9656 case ASCII_RESTRICT_PAT_MOD:
9658 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9662 /* Error if previous modifier wasn't an 'a', but if it was, see
9663 * if, and accept, a second occurrence (only) */
9665 || get_regex_charset(*pmfl)
9666 != REGEX_ASCII_RESTRICTED_CHARSET)
9668 goto multiple_charsets;
9670 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9674 case DEPENDS_PAT_MOD:
9676 goto multiple_charsets;
9678 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9687 if (*charset != c) {
9688 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9690 else if (c == 'a') {
9691 /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
9692 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9695 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9698 /* Pretend that it worked, so will continue processing before dieing */
9704 S_scan_pat(pTHX_ char *start, I32 type)
9709 const char * const valid_flags =
9710 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9711 char charset = '\0'; /* character set modifier */
9716 PERL_ARGS_ASSERT_SCAN_PAT;
9718 s = scan_str(start,!!PL_madskills,FALSE, (PL_in_eval & EVAL_RE_REPARSING),
9719 TRUE /* look for escaped bracketed metas */, NULL);
9722 const char * const delimiter = skipspace(start);
9726 ? "Search pattern not terminated or ternary operator parsed as search pattern"
9727 : "Search pattern not terminated" ));
9730 pm = (PMOP*)newPMOP(type, 0);
9731 if (PL_multi_open == '?') {
9732 /* This is the only point in the code that sets PMf_ONCE: */
9733 pm->op_pmflags |= PMf_ONCE;
9735 /* Hence it's safe to do this bit of PMOP book-keeping here, which
9736 allows us to restrict the list needed by reset to just the ??
9738 assert(type != OP_TRANS);
9740 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9743 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9746 elements = mg->mg_len / sizeof(PMOP**);
9747 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9748 ((PMOP**)mg->mg_ptr) [elements++] = pm;
9749 mg->mg_len = elements * sizeof(PMOP**);
9750 PmopSTASH_set(pm,PL_curstash);
9757 /* if qr/...(?{..}).../, then need to parse the pattern within a new
9758 * anon CV. False positives like qr/[(?{]/ are harmless */
9760 if (type == OP_QR) {
9762 char *e, *p = SvPV(PL_lex_stuff, len);
9764 for (; p < e; p++) {
9765 if (p[0] == '(' && p[1] == '?'
9766 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9768 pm->op_pmflags |= PMf_HAS_CV;
9772 pm->op_pmflags |= PMf_IS_QR;
9775 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
9777 if (PL_madskills && modstart != s) {
9778 SV* tmptoken = newSVpvn(modstart, s - modstart);
9779 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9782 /* issue a warning if /c is specified,but /g is not */
9783 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9785 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9786 "Use of /c modifier is meaningless without /g" );
9789 PL_lex_op = (OP*)pm;
9790 pl_yylval.ival = OP_MATCH;
9795 S_scan_subst(pTHX_ char *start)
9803 char charset = '\0'; /* character set modifier */
9809 PERL_ARGS_ASSERT_SCAN_SUBST;
9811 pl_yylval.ival = OP_NULL;
9813 s = scan_str(start,!!PL_madskills,FALSE,FALSE,
9814 TRUE /* look for escaped bracketed metas */, &t);
9817 Perl_croak(aTHX_ "Substitution pattern not terminated");
9822 CURMAD('q', PL_thisopen);
9823 CURMAD('_', PL_thiswhite);
9824 CURMAD('E', PL_thisstuff);
9825 CURMAD('Q', PL_thisclose);
9826 PL_realtokenstart = s - SvPVX(PL_linestr);
9830 first_start = PL_multi_start;
9831 first_line = CopLINE(PL_curcop);
9832 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
9835 SvREFCNT_dec(PL_lex_stuff);
9836 PL_lex_stuff = NULL;
9838 Perl_croak(aTHX_ "Substitution replacement not terminated");
9840 PL_multi_start = first_start; /* so whole substitution is taken together */
9842 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9846 CURMAD('z', PL_thisopen);
9847 CURMAD('R', PL_thisstuff);
9848 CURMAD('Z', PL_thisclose);
9854 if (*s == EXEC_PAT_MOD) {
9858 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9867 curmad('m', newSVpvn(modstart, s - modstart));
9868 append_madprops(PL_thismad, (OP*)pm, 0);
9872 if ((pm->op_pmflags & PMf_CONTINUE)) {
9873 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9877 SV * const repl = newSVpvs("");
9880 pm->op_pmflags |= PMf_EVAL;
9883 sv_catpvs(repl, "eval ");
9885 sv_catpvs(repl, "do ");
9887 sv_catpvs(repl, "{");
9888 sv_catsv(repl, PL_sublex_info.repl);
9889 sv_catpvs(repl, "}");
9891 SvREFCNT_dec(PL_sublex_info.repl);
9892 PL_sublex_info.repl = repl;
9894 if (CopLINE(PL_curcop) != first_line) {
9895 sv_upgrade(PL_sublex_info.repl, SVt_PVNV);
9896 ((XPVNV*)SvANY(PL_sublex_info.repl))->xnv_u.xpad_cop_seq.xlow =
9897 CopLINE(PL_curcop) - first_line;
9898 CopLINE_set(PL_curcop, first_line);
9901 PL_lex_op = (OP*)pm;
9902 pl_yylval.ival = OP_SUBST;
9907 S_scan_trans(pTHX_ char *start)
9915 bool nondestruct = 0;
9921 PERL_ARGS_ASSERT_SCAN_TRANS;
9923 pl_yylval.ival = OP_NULL;
9925 s = scan_str(start,!!PL_madskills,FALSE,FALSE,FALSE,&t);
9927 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9932 CURMAD('q', PL_thisopen);
9933 CURMAD('_', PL_thiswhite);
9934 CURMAD('E', PL_thisstuff);
9935 CURMAD('Q', PL_thisclose);
9936 PL_realtokenstart = s - SvPVX(PL_linestr);
9940 s = scan_str(s,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
9943 SvREFCNT_dec(PL_lex_stuff);
9944 PL_lex_stuff = NULL;
9946 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9949 CURMAD('z', PL_thisopen);
9950 CURMAD('R', PL_thisstuff);
9951 CURMAD('Z', PL_thisclose);
9954 complement = del = squash = 0;
9961 complement = OPpTRANS_COMPLEMENT;
9964 del = OPpTRANS_DELETE;
9967 squash = OPpTRANS_SQUASH;
9979 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9980 o->op_private &= ~OPpTRANS_ALL;
9981 o->op_private |= del|squash|complement|
9982 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9983 (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF : 0);
9986 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9991 curmad('m', newSVpvn(modstart, s - modstart));
9992 append_madprops(PL_thismad, o, 0);
10001 Takes a pointer to the first < in <<FOO.
10002 Returns a pointer to the byte following <<FOO.
10004 This function scans a heredoc, which involves different methods
10005 depending on whether we are in a string eval, quoted construct, etc.
10006 This is because PL_linestr could containing a single line of input, or
10007 a whole string being evalled, or the contents of the current quote-
10010 The two basic methods are:
10011 - Steal lines from the input stream
10012 - Scan the heredoc in PL_linestr and remove it therefrom
10014 In a file scope or filtered eval, the first method is used; in a
10015 string eval, the second.
10017 In a quote-like operator, we have to choose between the two,
10018 depending on where we can find a newline. We peek into outer lex-
10019 ing scopes until we find one with a newline in it. If we reach the
10020 outermost lexing scope and it is a file, we use the stream method.
10021 Otherwise it is treated as an eval.
10025 S_scan_heredoc(pTHX_ char *s)
10028 I32 op_type = OP_SCALAR;
10035 const bool infile = PL_rsfp || PL_parser->filtered;
10036 const line_t origline = CopLINE(PL_curcop);
10037 LEXSHARED *shared = PL_parser->lex_shared;
10039 I32 stuffstart = s - SvPVX(PL_linestr);
10042 PL_realtokenstart = -1;
10045 PERL_ARGS_ASSERT_SCAN_HEREDOC;
10048 d = PL_tokenbuf + 1;
10049 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
10050 *PL_tokenbuf = '\n';
10052 while (SPACE_OR_TAB(*peek))
10054 if (*peek == '`' || *peek == '\'' || *peek =='"') {
10057 s = delimcpy(d, e, s, PL_bufend, term, &len);
10058 if (s == PL_bufend)
10059 Perl_croak(aTHX_ "Unterminated delimiter for here document");
10065 /* <<\FOO is equivalent to <<'FOO' */
10069 if (!isWORDCHAR_lazy_if(s,UTF))
10070 deprecate("bare << to mean <<\"\"");
10071 for (; isWORDCHAR_lazy_if(s,UTF); s++) {
10076 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
10077 Perl_croak(aTHX_ "Delimiter for here document is too long");
10080 len = d - PL_tokenbuf;
10083 if (PL_madskills) {
10084 tstart = PL_tokenbuf + 1;
10085 PL_thisclose = newSVpvn(tstart, len - 1);
10086 tstart = SvPVX(PL_linestr) + stuffstart;
10087 PL_thisopen = newSVpvn(tstart, s - tstart);
10088 stuffstart = s - SvPVX(PL_linestr);
10091 #ifndef PERL_STRICT_CR
10092 d = strchr(s, '\r');
10094 char * const olds = s;
10096 while (s < PL_bufend) {
10102 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
10111 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10116 if (PL_madskills) {
10117 tstart = SvPVX(PL_linestr) + stuffstart;
10119 sv_catpvn(PL_thisstuff, tstart, s - tstart);
10121 PL_thisstuff = newSVpvn(tstart, s - tstart);
10124 stuffstart = s - SvPVX(PL_linestr);
10127 tmpstr = newSV_type(SVt_PVIV);
10128 SvGROW(tmpstr, 80);
10129 if (term == '\'') {
10130 op_type = OP_CONST;
10131 SvIV_set(tmpstr, -1);
10133 else if (term == '`') {
10134 op_type = OP_BACKTICK;
10135 SvIV_set(tmpstr, '\\');
10138 PL_multi_start = origline + 1 + PL_parser->herelines;
10139 PL_multi_open = PL_multi_close = '<';
10140 /* inside a string eval or quote-like operator */
10141 if (!infile || PL_lex_inwhat) {
10144 char * const olds = s;
10145 PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
10146 /* These two fields are not set until an inner lexing scope is
10147 entered. But we need them set here. */
10148 shared->ls_bufptr = s;
10149 shared->ls_linestr = PL_linestr;
10151 /* Look for a newline. If the current buffer does not have one,
10152 peek into the line buffer of the parent lexing scope, going
10153 up as many levels as necessary to find one with a newline
10156 while (!(s = (char *)memchr(
10157 (void *)shared->ls_bufptr, '\n',
10158 SvEND(shared->ls_linestr)-shared->ls_bufptr
10160 shared = shared->ls_prev;
10161 /* shared is only null if we have gone beyond the outermost
10162 lexing scope. In a file, we will have broken out of the
10163 loop in the previous iteration. In an eval, the string buf-
10164 fer ends with "\n;", so the while condition above will have
10165 evaluated to false. So shared can never be null. */
10167 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10168 most lexing scope. In a file, shared->ls_linestr at that
10169 level is just one line, so there is no body to steal. */
10170 if (infile && !shared->ls_prev) {
10176 s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10179 linestr = shared->ls_linestr;
10180 bufend = SvEND(linestr);
10182 while (s < bufend - len + 1 &&
10183 memNE(s,PL_tokenbuf,len) ) {
10185 ++PL_parser->herelines;
10187 if (s >= bufend - len + 1) {
10190 sv_setpvn(tmpstr,d+1,s-d);
10192 if (PL_madskills) {
10194 sv_catpvn(PL_thisstuff, d + 1, s - d);
10196 PL_thisstuff = newSVpvn(d + 1, s - d);
10197 stuffstart = s - SvPVX(PL_linestr);
10201 /* the preceding stmt passes a newline */
10202 PL_parser->herelines++;
10204 /* s now points to the newline after the heredoc terminator.
10205 d points to the newline before the body of the heredoc.
10208 /* We are going to modify linestr in place here, so set
10209 aside copies of the string if necessary for re-evals or
10211 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10212 check shared->re_eval_str. */
10213 if (shared->re_eval_start || shared->re_eval_str) {
10214 /* Set aside the rest of the regexp */
10215 if (!shared->re_eval_str)
10216 shared->re_eval_str =
10217 newSVpvn(shared->re_eval_start,
10218 bufend - shared->re_eval_start);
10219 shared->re_eval_start -= s-d;
10221 if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
10222 CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
10223 cx->blk_eval.cur_text == linestr)
10225 cx->blk_eval.cur_text = newSVsv(linestr);
10226 SvSCREAM_on(cx->blk_eval.cur_text);
10228 /* Copy everything from s onwards back to d. */
10229 Move(s,d,bufend-s + 1,char);
10230 SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10231 /* Setting PL_bufend only applies when we have not dug deeper
10232 into other scopes, because sublex_done sets PL_bufend to
10233 SvEND(PL_linestr). */
10234 if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
10241 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
10242 term = PL_tokenbuf[1];
10244 linestr_save = PL_linestr; /* must restore this afterwards */
10245 d = s; /* and this */
10246 PL_linestr = newSVpvs("");
10247 PL_bufend = SvPVX(PL_linestr);
10250 if (PL_madskills) {
10251 tstart = SvPVX(PL_linestr) + stuffstart;
10253 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10255 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10258 PL_bufptr = PL_bufend;
10259 CopLINE_set(PL_curcop,
10260 origline + 1 + PL_parser->herelines);
10261 if (!lex_next_chunk(LEX_NO_TERM)
10262 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
10263 SvREFCNT_dec(linestr_save);
10266 CopLINE_set(PL_curcop, origline);
10267 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10268 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10269 /* ^That should be enough to avoid this needing to grow: */
10270 sv_catpvs(PL_linestr, "\n\0");
10271 assert(s == SvPVX(PL_linestr));
10272 PL_bufend = SvEND(PL_linestr);
10276 stuffstart = s - SvPVX(PL_linestr);
10278 PL_parser->herelines++;
10279 PL_last_lop = PL_last_uni = NULL;
10280 #ifndef PERL_STRICT_CR
10281 if (PL_bufend - PL_linestart >= 2) {
10282 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
10283 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10285 PL_bufend[-2] = '\n';
10287 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10289 else if (PL_bufend[-1] == '\r')
10290 PL_bufend[-1] = '\n';
10292 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10293 PL_bufend[-1] = '\n';
10295 if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
10296 SvREFCNT_dec(PL_linestr);
10297 PL_linestr = linestr_save;
10298 PL_linestart = SvPVX(linestr_save);
10299 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10304 sv_catsv(tmpstr,PL_linestr);
10308 PL_multi_end = origline + PL_parser->herelines;
10309 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10310 SvPV_shrink_to_cur(tmpstr);
10313 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10315 else if (PL_encoding)
10316 sv_recode_to_utf8(tmpstr, PL_encoding);
10318 PL_lex_stuff = tmpstr;
10319 pl_yylval.ival = op_type;
10323 SvREFCNT_dec(tmpstr);
10324 CopLINE_set(PL_curcop, origline);
10325 missingterm(PL_tokenbuf + 1);
10328 /* scan_inputsymbol
10329 takes: current position in input buffer
10330 returns: new position in input buffer
10331 side-effects: pl_yylval and lex_op are set.
10336 <FH> read from filehandle
10337 <pkg::FH> read from package qualified filehandle
10338 <pkg'FH> read from package qualified filehandle
10339 <$fh> read from filehandle in $fh
10340 <*.h> filename glob
10345 S_scan_inputsymbol(pTHX_ char *start)
10348 char *s = start; /* current position in buffer */
10351 char *d = PL_tokenbuf; /* start of temp holding space */
10352 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
10354 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10356 end = strchr(s, '\n');
10359 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
10361 /* die if we didn't have space for the contents of the <>,
10362 or if it didn't end, or if we see a newline
10365 if (len >= (I32)sizeof PL_tokenbuf)
10366 Perl_croak(aTHX_ "Excessively long <> operator");
10368 Perl_croak(aTHX_ "Unterminated <> operator");
10373 Remember, only scalar variables are interpreted as filehandles by
10374 this code. Anything more complex (e.g., <$fh{$num}>) will be
10375 treated as a glob() call.
10376 This code makes use of the fact that except for the $ at the front,
10377 a scalar variable and a filehandle look the same.
10379 if (*d == '$' && d[1]) d++;
10381 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10382 while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
10383 d += UTF ? UTF8SKIP(d) : 1;
10385 /* If we've tried to read what we allow filehandles to look like, and
10386 there's still text left, then it must be a glob() and not a getline.
10387 Use scan_str to pull out the stuff between the <> and treat it
10388 as nothing more than a string.
10391 if (d - PL_tokenbuf != len) {
10392 pl_yylval.ival = OP_GLOB;
10393 s = scan_str(start,!!PL_madskills,FALSE,FALSE,FALSE,NULL);
10395 Perl_croak(aTHX_ "Glob not terminated");
10399 bool readline_overriden = FALSE;
10401 /* we're in a filehandle read situation */
10404 /* turn <> into <ARGV> */
10406 Copy("ARGV",d,5,char);
10408 /* Check whether readline() is overriden */
10409 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
10410 if ((gv_readline = gv_override("readline",8)))
10411 readline_overriden = TRUE;
10413 /* if <$fh>, create the ops to turn the variable into a
10417 /* try to find it in the pad for this block, otherwise find
10418 add symbol table ops
10420 const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
10421 if (tmp != NOT_IN_PAD) {
10422 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10423 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10424 HEK * const stashname = HvNAME_HEK(stash);
10425 SV * const sym = sv_2mortal(newSVhek(stashname));
10426 sv_catpvs(sym, "::");
10427 sv_catpv(sym, d+1);
10432 OP * const o = newOP(OP_PADSV, 0);
10434 PL_lex_op = readline_overriden
10435 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10436 op_append_elem(OP_LIST, o,
10437 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10438 : (OP*)newUNOP(OP_READLINE, 0, o);
10447 ? (GV_ADDMULTI | GV_ADDINEVAL)
10448 : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
10450 PL_lex_op = readline_overriden
10451 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10452 op_append_elem(OP_LIST,
10453 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10454 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10455 : (OP*)newUNOP(OP_READLINE, 0,
10456 newUNOP(OP_RV2SV, 0,
10457 newGVOP(OP_GV, 0, gv)));
10459 if (!readline_overriden)
10460 PL_lex_op->op_flags |= OPf_SPECIAL;
10461 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10462 pl_yylval.ival = OP_NULL;
10465 /* If it's none of the above, it must be a literal filehandle
10466 (<Foo::BAR> or <FOO>) so build a simple readline OP */
10468 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
10469 PL_lex_op = readline_overriden
10470 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
10471 op_append_elem(OP_LIST,
10472 newGVOP(OP_GV, 0, gv),
10473 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10474 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
10475 pl_yylval.ival = OP_NULL;
10485 start position in buffer
10486 keep_quoted preserve \ on the embedded delimiter(s)
10487 keep_delims preserve the delimiters around the string
10488 re_reparse compiling a run-time /(?{})/:
10489 collapse // to /, and skip encoding src
10490 deprecate_escaped_meta issue a deprecation warning for cer-
10491 tain paired metacharacters that appear
10493 delimp if non-null, this is set to the position of
10494 the closing delimiter, or just after it if
10495 the closing and opening delimiters differ
10496 (i.e., the opening delimiter of a substitu-
10498 returns: position to continue reading from buffer
10499 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10500 updates the read buffer.
10502 This subroutine pulls a string out of the input. It is called for:
10503 q single quotes q(literal text)
10504 ' single quotes 'literal text'
10505 qq double quotes qq(interpolate $here please)
10506 " double quotes "interpolate $here please"
10507 qx backticks qx(/bin/ls -l)
10508 ` backticks `/bin/ls -l`
10509 qw quote words @EXPORT_OK = qw( func() $spam )
10510 m// regexp match m/this/
10511 s/// regexp substitute s/this/that/
10512 tr/// string transliterate tr/this/that/
10513 y/// string transliterate y/this/that/
10514 ($*@) sub prototypes sub foo ($)
10515 (stuff) sub attr parameters sub foo : attr(stuff)
10516 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
10518 In most of these cases (all but <>, patterns and transliterate)
10519 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
10520 calls scan_str(). s/// makes yylex() call scan_subst() which calls
10521 scan_str(). tr/// and y/// make yylex() call scan_trans() which
10524 It skips whitespace before the string starts, and treats the first
10525 character as the delimiter. If the delimiter is one of ([{< then
10526 the corresponding "close" character )]}> is used as the closing
10527 delimiter. It allows quoting of delimiters, and if the string has
10528 balanced delimiters ([{<>}]) it allows nesting.
10530 On success, the SV with the resulting string is put into lex_stuff or,
10531 if that is already non-NULL, into lex_repl. The second case occurs only
10532 when parsing the RHS of the special constructs s/// and tr/// (y///).
10533 For convenience, the terminating delimiter character is stuffed into
10538 S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
10539 bool deprecate_escaped_meta, char **delimp
10543 SV *sv; /* scalar value: string */
10544 const char *tmps; /* temp string, used for delimiter matching */
10545 char *s = start; /* current position in the buffer */
10546 char term; /* terminating character */
10547 char *to; /* current position in the sv's data */
10548 I32 brackets = 1; /* bracket nesting level */
10549 bool has_utf8 = FALSE; /* is there any utf8 content? */
10550 I32 termcode; /* terminating char. code */
10551 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
10552 STRLEN termlen; /* length of terminating string */
10553 int last_off = 0; /* last position for nesting bracket */
10554 char *escaped_open = NULL;
10561 PERL_ARGS_ASSERT_SCAN_STR;
10563 /* skip space before the delimiter */
10569 if (PL_realtokenstart >= 0) {
10570 stuffstart = PL_realtokenstart;
10571 PL_realtokenstart = -1;
10574 stuffstart = start - SvPVX(PL_linestr);
10576 /* mark where we are, in case we need to report errors */
10579 /* after skipping whitespace, the next character is the terminator */
10582 termcode = termstr[0] = term;
10586 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10587 Copy(s, termstr, termlen, U8);
10588 if (!UTF8_IS_INVARIANT(term))
10592 /* mark where we are */
10593 PL_multi_start = CopLINE(PL_curcop);
10594 PL_multi_open = term;
10595 herelines = PL_parser->herelines;
10597 /* find corresponding closing delimiter */
10598 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
10599 termcode = termstr[0] = term = tmps[5];
10601 PL_multi_close = term;
10603 /* A warning is raised if the input parameter requires it for escaped (by a
10604 * backslash) paired metacharacters {} [] and () when the delimiters are
10605 * those same characters, and the backslash is ineffective. This doesn't
10606 * happen for <>, as they aren't metas. */
10607 if (deprecate_escaped_meta
10608 && (PL_multi_open == PL_multi_close
10609 || PL_multi_open == '<'
10610 || ! ckWARN_d(WARN_DEPRECATED)))
10612 deprecate_escaped_meta = FALSE;
10615 /* create a new SV to hold the contents. 79 is the SV's initial length.
10616 What a random number. */
10617 sv = newSV_type(SVt_PVIV);
10619 SvIV_set(sv, termcode);
10620 (void)SvPOK_only(sv); /* validate pointer */
10622 /* move past delimiter and try to read a complete string */
10624 sv_catpvn(sv, s, termlen);
10627 tstart = SvPVX(PL_linestr) + stuffstart;
10628 if (PL_madskills && !PL_thisopen && !keep_delims) {
10629 PL_thisopen = newSVpvn(tstart, s - tstart);
10630 stuffstart = s - SvPVX(PL_linestr);
10634 if (PL_encoding && !UTF && !re_reparse) {
10638 int offset = s - SvPVX_const(PL_linestr);
10639 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
10640 &offset, (char*)termstr, termlen);
10644 if (SvIsCOW(PL_linestr)) {
10645 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos;
10646 STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos;
10647 STRLEN last_lop_pos, re_eval_start_pos, s_pos;
10648 char *buf = SvPVX(PL_linestr);
10649 bufend_pos = PL_parser->bufend - buf;
10650 bufptr_pos = PL_parser->bufptr - buf;
10651 oldbufptr_pos = PL_parser->oldbufptr - buf;
10652 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
10653 linestart_pos = PL_parser->linestart - buf;
10654 last_uni_pos = PL_parser->last_uni
10655 ? PL_parser->last_uni - buf
10657 last_lop_pos = PL_parser->last_lop
10658 ? PL_parser->last_lop - buf
10660 re_eval_start_pos =
10661 PL_parser->lex_shared->re_eval_start ?
10662 PL_parser->lex_shared->re_eval_start - buf : 0;
10665 sv_force_normal(PL_linestr);
10667 buf = SvPVX(PL_linestr);
10668 PL_parser->bufend = buf + bufend_pos;
10669 PL_parser->bufptr = buf + bufptr_pos;
10670 PL_parser->oldbufptr = buf + oldbufptr_pos;
10671 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
10672 PL_parser->linestart = buf + linestart_pos;
10673 if (PL_parser->last_uni)
10674 PL_parser->last_uni = buf + last_uni_pos;
10675 if (PL_parser->last_lop)
10676 PL_parser->last_lop = buf + last_lop_pos;
10677 if (PL_parser->lex_shared->re_eval_start)
10678 PL_parser->lex_shared->re_eval_start =
10679 buf + re_eval_start_pos;
10682 ns = SvPVX_const(PL_linestr) + offset;
10683 svlast = SvEND(sv) - 1;
10685 for (; s < ns; s++) {
10686 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10687 COPLINE_INC_WITH_HERELINES;
10690 goto read_more_line;
10692 /* handle quoted delimiters */
10693 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
10695 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
10697 if ((svlast-1 - t) % 2) {
10698 if (!keep_quoted) {
10699 *(svlast-1) = term;
10701 SvCUR_set(sv, SvCUR(sv) - 1);
10706 if (PL_multi_open == PL_multi_close) {
10712 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
10713 /* At here, all closes are "was quoted" one,
10714 so we don't check PL_multi_close. */
10716 if (!keep_quoted && *(t+1) == PL_multi_open)
10721 else if (*t == PL_multi_open)
10729 SvCUR_set(sv, w - SvPVX_const(sv));
10731 last_off = w - SvPVX(sv);
10732 if (--brackets <= 0)
10737 if (!keep_delims) {
10738 SvCUR_set(sv, SvCUR(sv) - 1);
10744 /* extend sv if need be */
10745 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10746 /* set 'to' to the next character in the sv's string */
10747 to = SvPVX(sv)+SvCUR(sv);
10749 /* if open delimiter is the close delimiter read unbridle */
10750 if (PL_multi_open == PL_multi_close) {
10751 for (; s < PL_bufend; s++,to++) {
10752 /* embedded newlines increment the current line number */
10753 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10754 COPLINE_INC_WITH_HERELINES;
10755 /* handle quoted delimiters */
10756 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10759 || (re_reparse && s[1] == '\\'))
10762 /* any other quotes are simply copied straight through */
10766 /* terminate when run out of buffer (the for() condition), or
10767 have found the terminator */
10768 else if (*s == term) {
10771 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
10774 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10780 /* if the terminator isn't the same as the start character (e.g.,
10781 matched brackets), we have to allow more in the quoting, and
10782 be prepared for nested brackets.
10785 /* read until we run out of string, or we find the terminator */
10786 for (; s < PL_bufend; s++,to++) {
10787 /* embedded newlines increment the line count */
10788 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10789 COPLINE_INC_WITH_HERELINES;
10790 /* backslashes can escape the open or closing characters */
10791 if (*s == '\\' && s+1 < PL_bufend) {
10792 if (!keep_quoted &&
10793 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
10797 /* Here, 'deprecate_escaped_meta' is true iff the
10798 * delimiters are paired metacharacters, and 's' points
10799 * to an occurrence of one of them within the string,
10800 * which was preceded by a backslash. If this is a
10801 * context where the delimiter is also a metacharacter,
10802 * the backslash is useless, and deprecated. () and []
10803 * are meta in any context. {} are meta only when
10804 * appearing in a quantifier or in things like '\p{'
10805 * (but '\\p{' isn't meta). They also aren't meta
10806 * unless there is a matching closed, escaped char
10807 * later on within the string. If 's' points to an
10808 * open, set a flag; if to a close, test that flag, and
10809 * raise a warning if it was set */
10811 if (deprecate_escaped_meta) {
10812 if (*s == PL_multi_open) {
10816 /* Look for a closing '\}' */
10817 else if (regcurly(s, TRUE)) {
10820 /* Look for e.g. '\x{' */
10821 else if (s - start > 2
10822 && _generic_isCC(*(s-2),
10823 _CC_BACKSLASH_FOO_LBRACE_IS_META))
10824 { /* Exclude '\\x', '\\\\x', etc. */
10825 char *lookbehind = s - 4;
10826 bool is_meta = TRUE;
10827 while (lookbehind >= start
10828 && *lookbehind == '\\')
10830 is_meta = ! is_meta;
10838 else if (escaped_open) {
10839 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
10840 "Useless use of '\\'; doesn't escape metacharacter '%c'", PL_multi_open);
10841 escaped_open = NULL;
10848 /* allow nested opens and closes */
10849 else if (*s == PL_multi_close && --brackets <= 0)
10851 else if (*s == PL_multi_open)
10853 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10858 /* terminate the copied string and update the sv's end-of-string */
10860 SvCUR_set(sv, to - SvPVX_const(sv));
10863 * this next chunk reads more into the buffer if we're not done yet
10867 break; /* handle case where we are done yet :-) */
10869 #ifndef PERL_STRICT_CR
10870 if (to - SvPVX_const(sv) >= 2) {
10871 if ((to[-2] == '\r' && to[-1] == '\n') ||
10872 (to[-2] == '\n' && to[-1] == '\r'))
10876 SvCUR_set(sv, to - SvPVX_const(sv));
10878 else if (to[-1] == '\r')
10881 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10886 /* if we're out of file, or a read fails, bail and reset the current
10887 line marker so we can report where the unterminated string began
10890 if (PL_madskills) {
10891 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10893 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
10895 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
10898 COPLINE_INC_WITH_HERELINES;
10899 PL_bufptr = PL_bufend;
10900 if (!lex_next_chunk(0)) {
10902 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10911 /* at this point, we have successfully read the delimited string */
10913 if (!PL_encoding || UTF || re_reparse) {
10915 if (PL_madskills) {
10916 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10917 const int len = s - tstart;
10919 sv_catpvn(PL_thisstuff, tstart, len);
10921 PL_thisstuff = newSVpvn(tstart, len);
10922 if (!PL_thisclose && !keep_delims)
10923 PL_thisclose = newSVpvn(s,termlen);
10928 sv_catpvn(sv, s, termlen);
10933 if (PL_madskills) {
10934 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10935 const int len = s - tstart - termlen;
10937 sv_catpvn(PL_thisstuff, tstart, len);
10939 PL_thisstuff = newSVpvn(tstart, len);
10940 if (!PL_thisclose && !keep_delims)
10941 PL_thisclose = newSVpvn(s - termlen,termlen);
10945 if (has_utf8 || (PL_encoding && !re_reparse))
10948 PL_multi_end = CopLINE(PL_curcop);
10949 CopLINE_set(PL_curcop, PL_multi_start);
10950 PL_parser->herelines = herelines;
10952 /* if we allocated too much space, give some back */
10953 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10954 SvLEN_set(sv, SvCUR(sv) + 1);
10955 SvPV_renew(sv, SvLEN(sv));
10958 /* decide whether this is the first or second quoted string we've read
10963 PL_sublex_info.repl = sv;
10966 if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
10972 takes: pointer to position in buffer
10973 returns: pointer to new position in buffer
10974 side-effects: builds ops for the constant in pl_yylval.op
10976 Read a number in any of the formats that Perl accepts:
10978 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10979 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
10982 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
10984 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10987 If it reads a number without a decimal point or an exponent, it will
10988 try converting the number to an integer and see if it can do so
10989 without loss of precision.
10993 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10996 const char *s = start; /* current position in buffer */
10997 char *d; /* destination in temp buffer */
10998 char *e; /* end of temp buffer */
10999 NV nv; /* number read, as a double */
11000 SV *sv = NULL; /* place to put the converted number */
11001 bool floatit; /* boolean: int or float? */
11002 const char *lastub = NULL; /* position of last underbar */
11003 static const char* const number_too_long = "Number too long";
11005 PERL_ARGS_ASSERT_SCAN_NUM;
11007 /* We use the first character to decide what type of number this is */
11011 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
11013 /* if it starts with a 0, it could be an octal number, a decimal in
11014 0.13 disguise, or a hexadecimal number, or a binary number. */
11018 u holds the "number so far"
11019 shift the power of 2 of the base
11020 (hex == 4, octal == 3, binary == 1)
11021 overflowed was the number more than we can hold?
11023 Shift is used when we add a digit. It also serves as an "are
11024 we in octal/hex/binary?" indicator to disallow hex characters
11025 when in octal mode.
11030 bool overflowed = FALSE;
11031 bool just_zero = TRUE; /* just plain 0 or binary number? */
11032 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11033 static const char* const bases[5] =
11034 { "", "binary", "", "octal", "hexadecimal" };
11035 static const char* const Bases[5] =
11036 { "", "Binary", "", "Octal", "Hexadecimal" };
11037 static const char* const maxima[5] =
11039 "0b11111111111111111111111111111111",
11043 const char *base, *Base, *max;
11045 /* check for hex */
11046 if (s[1] == 'x' || s[1] == 'X') {
11050 } else if (s[1] == 'b' || s[1] == 'B') {
11055 /* check for a decimal in disguise */
11056 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
11058 /* so it must be octal */
11065 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11066 "Misplaced _ in number");
11070 base = bases[shift];
11071 Base = Bases[shift];
11072 max = maxima[shift];
11074 /* read the rest of the number */
11076 /* x is used in the overflow test,
11077 b is the digit we're adding on. */
11082 /* if we don't mention it, we're done */
11086 /* _ are ignored -- but warned about if consecutive */
11088 if (lastub && s == lastub + 1)
11089 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11090 "Misplaced _ in number");
11094 /* 8 and 9 are not octal */
11095 case '8': case '9':
11097 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11101 case '2': case '3': case '4':
11102 case '5': case '6': case '7':
11104 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11107 case '0': case '1':
11108 b = *s++ & 15; /* ASCII digit -> value of digit */
11112 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11113 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11114 /* make sure they said 0x */
11117 b = (*s++ & 7) + 9;
11119 /* Prepare to put the digit we have onto the end
11120 of the number so far. We check for overflows.
11126 x = u << shift; /* make room for the digit */
11128 if ((x >> shift) != u
11129 && !(PL_hints & HINT_NEW_BINARY)) {
11132 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11133 "Integer overflow in %s number",
11136 u = x | b; /* add the digit to the end */
11139 n *= nvshift[shift];
11140 /* If an NV has not enough bits in its
11141 * mantissa to represent an UV this summing of
11142 * small low-order numbers is a waste of time
11143 * (because the NV cannot preserve the
11144 * low-order bits anyway): we could just
11145 * remember when did we overflow and in the
11146 * end just multiply n by the right
11154 /* if we get here, we had success: make a scalar value from
11159 /* final misplaced underbar check */
11160 if (s[-1] == '_') {
11161 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11165 if (n > 4294967295.0)
11166 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11167 "%s number > %s non-portable",
11173 if (u > 0xffffffff)
11174 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11175 "%s number > %s non-portable",
11180 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
11181 sv = new_constant(start, s - start, "integer",
11182 sv, NULL, NULL, 0);
11183 else if (PL_hints & HINT_NEW_BINARY)
11184 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
11189 handle decimal numbers.
11190 we're also sent here when we read a 0 as the first digit
11192 case '1': case '2': case '3': case '4': case '5':
11193 case '6': case '7': case '8': case '9': case '.':
11196 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
11199 /* read next group of digits and _ and copy into d */
11200 while (isDIGIT(*s) || *s == '_') {
11201 /* skip underscores, checking for misplaced ones
11205 if (lastub && s == lastub + 1)
11206 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11207 "Misplaced _ in number");
11211 /* check for end of fixed-length buffer */
11213 Perl_croak(aTHX_ "%s", number_too_long);
11214 /* if we're ok, copy the character */
11219 /* final misplaced underbar check */
11220 if (lastub && s == lastub + 1) {
11221 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
11224 /* read a decimal portion if there is one. avoid
11225 3..5 being interpreted as the number 3. followed
11228 if (*s == '.' && s[1] != '.') {
11233 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11234 "Misplaced _ in number");
11238 /* copy, ignoring underbars, until we run out of digits.
11240 for (; isDIGIT(*s) || *s == '_'; s++) {
11241 /* fixed length buffer check */
11243 Perl_croak(aTHX_ "%s", number_too_long);
11245 if (lastub && s == lastub + 1)
11246 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11247 "Misplaced _ in number");
11253 /* fractional part ending in underbar? */
11254 if (s[-1] == '_') {
11255 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11256 "Misplaced _ in number");
11258 if (*s == '.' && isDIGIT(s[1])) {
11259 /* oops, it's really a v-string, but without the "v" */
11265 /* read exponent part, if present */
11266 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
11270 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
11271 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
11273 /* stray preinitial _ */
11275 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11276 "Misplaced _ in number");
11280 /* allow positive or negative exponent */
11281 if (*s == '+' || *s == '-')
11284 /* stray initial _ */
11286 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11287 "Misplaced _ in number");
11291 /* read digits of exponent */
11292 while (isDIGIT(*s) || *s == '_') {
11295 Perl_croak(aTHX_ "%s", number_too_long);
11299 if (((lastub && s == lastub + 1) ||
11300 (!isDIGIT(s[1]) && s[1] != '_')))
11301 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11302 "Misplaced _ in number");
11310 We try to do an integer conversion first if no characters
11311 indicating "float" have been found.
11316 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
11318 if (flags == IS_NUMBER_IN_UV) {
11320 sv = newSViv(uv); /* Prefer IVs over UVs. */
11323 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11324 if (uv <= (UV) IV_MIN)
11325 sv = newSViv(-(IV)uv);
11332 /* terminate the string */
11334 nv = Atof(PL_tokenbuf);
11339 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
11340 const char *const key = floatit ? "float" : "integer";
11341 const STRLEN keylen = floatit ? 5 : 7;
11342 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
11343 key, keylen, sv, NULL, NULL, 0);
11347 /* if it starts with a v, it could be a v-string */
11350 sv = newSV(5); /* preallocate storage space */
11351 ENTER_with_name("scan_vstring");
11353 s = scan_vstring(s, PL_bufend, sv);
11354 SvREFCNT_inc_simple_void_NN(sv);
11355 LEAVE_with_name("scan_vstring");
11359 /* make the op for the constant and return */
11362 lvalp->opval = newSVOP(OP_CONST, 0, sv);
11364 lvalp->opval = NULL;
11370 S_scan_formline(pTHX_ char *s)
11375 SV * const stuff = newSVpvs("");
11376 bool needargs = FALSE;
11377 bool eofmt = FALSE;
11379 char *tokenstart = s;
11380 SV* savewhite = NULL;
11382 if (PL_madskills) {
11383 savewhite = PL_thiswhite;
11388 PERL_ARGS_ASSERT_SCAN_FORMLINE;
11390 while (!needargs) {
11393 #ifdef PERL_STRICT_CR
11394 while (SPACE_OR_TAB(*t))
11397 while (SPACE_OR_TAB(*t) || *t == '\r')
11400 if (*t == '\n' || t == PL_bufend) {
11405 eol = (char *) memchr(s,'\n',PL_bufend-s);
11409 for (t = s; t < eol; t++) {
11410 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
11412 goto enough; /* ~~ must be first line in formline */
11414 if (*t == '@' || *t == '^')
11418 sv_catpvn(stuff, s, eol-s);
11419 #ifndef PERL_STRICT_CR
11420 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
11421 char *end = SvPVX(stuff) + SvCUR(stuff);
11424 SvCUR_set(stuff, SvCUR(stuff) - 1);
11432 if ((PL_rsfp || PL_parser->filtered)
11433 && PL_parser->form_lex_state == LEX_NORMAL) {
11436 if (PL_madskills) {
11438 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
11440 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
11443 PL_bufptr = PL_bufend;
11444 COPLINE_INC_WITH_HERELINES;
11445 got_some = lex_next_chunk(0);
11446 CopLINE_dec(PL_curcop);
11449 tokenstart = PL_bufptr;
11457 if (!SvCUR(stuff) || needargs)
11458 PL_lex_state = PL_parser->form_lex_state;
11459 if (SvCUR(stuff)) {
11460 PL_expect = XSTATE;
11462 start_force(PL_curforce);
11463 NEXTVAL_NEXTTOKE.ival = 0;
11464 force_next(FORMLBRACK);
11467 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
11469 else if (PL_encoding)
11470 sv_recode_to_utf8(stuff, PL_encoding);
11472 start_force(PL_curforce);
11473 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
11477 SvREFCNT_dec(stuff);
11479 PL_lex_formbrack = 0;
11482 if (PL_madskills) {
11484 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
11486 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
11487 PL_thiswhite = savewhite;
11494 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
11497 const I32 oldsavestack_ix = PL_savestack_ix;
11498 CV* const outsidecv = PL_compcv;
11500 SAVEI32(PL_subline);
11501 save_item(PL_subname);
11502 SAVESPTR(PL_compcv);
11504 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
11505 CvFLAGS(PL_compcv) |= flags;
11507 PL_subline = CopLINE(PL_curcop);
11508 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
11509 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
11510 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
11511 if (outsidecv && CvPADLIST(outsidecv))
11512 CvPADLIST(PL_compcv)->xpadl_outid =
11513 PadlistNAMES(CvPADLIST(outsidecv));
11515 return oldsavestack_ix;
11519 S_yywarn(pTHX_ const char *const s, U32 flags)
11523 PERL_ARGS_ASSERT_YYWARN;
11525 PL_in_eval |= EVAL_WARNONLY;
11526 yyerror_pv(s, flags);
11527 PL_in_eval &= ~EVAL_WARNONLY;
11532 Perl_yyerror(pTHX_ const char *const s)
11534 PERL_ARGS_ASSERT_YYERROR;
11535 return yyerror_pvn(s, strlen(s), 0);
11539 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11541 PERL_ARGS_ASSERT_YYERROR_PV;
11542 return yyerror_pvn(s, strlen(s), flags);
11546 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11549 const char *context = NULL;
11552 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
11553 int yychar = PL_parser->yychar;
11555 PERL_ARGS_ASSERT_YYERROR_PVN;
11557 if (!yychar || (yychar == ';' && !PL_rsfp))
11558 sv_catpvs(where_sv, "at EOF");
11559 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
11560 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
11561 PL_oldbufptr != PL_bufptr) {
11564 The code below is removed for NetWare because it abends/crashes on NetWare
11565 when the script has error such as not having the closing quotes like:
11566 if ($var eq "value)
11567 Checking of white spaces is anyway done in NetWare code.
11570 while (isSPACE(*PL_oldoldbufptr))
11573 context = PL_oldoldbufptr;
11574 contlen = PL_bufptr - PL_oldoldbufptr;
11576 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
11577 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
11580 The code below is removed for NetWare because it abends/crashes on NetWare
11581 when the script has error such as not having the closing quotes like:
11582 if ($var eq "value)
11583 Checking of white spaces is anyway done in NetWare code.
11586 while (isSPACE(*PL_oldbufptr))
11589 context = PL_oldbufptr;
11590 contlen = PL_bufptr - PL_oldbufptr;
11592 else if (yychar > 255)
11593 sv_catpvs(where_sv, "next token ???");
11594 else if (yychar == -2) { /* YYEMPTY */
11595 if (PL_lex_state == LEX_NORMAL ||
11596 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
11597 sv_catpvs(where_sv, "at end of line");
11598 else if (PL_lex_inpat)
11599 sv_catpvs(where_sv, "within pattern");
11601 sv_catpvs(where_sv, "within string");
11604 sv_catpvs(where_sv, "next char ");
11606 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11607 else if (isPRINT_LC(yychar)) {
11608 const char string = yychar;
11609 sv_catpvn(where_sv, &string, 1);
11612 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11614 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
11615 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
11616 OutCopFILE(PL_curcop),
11617 (IV)(PL_parser->preambling == NOLINE
11618 ? CopLINE(PL_curcop)
11619 : PL_parser->preambling));
11621 Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
11622 UTF8fARG(UTF, contlen, context));
11624 Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
11625 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
11626 Perl_sv_catpvf(aTHX_ msg,
11627 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
11628 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11631 if (PL_in_eval & EVAL_WARNONLY) {
11632 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
11636 if (PL_error_count >= 10) {
11638 if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
11639 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
11640 SVfARG(errsv), OutCopFILE(PL_curcop));
11642 Perl_croak(aTHX_ "%s has too many errors.\n",
11643 OutCopFILE(PL_curcop));
11646 PL_in_my_stash = NULL;
11651 S_swallow_bom(pTHX_ U8 *s)
11654 const STRLEN slen = SvCUR(PL_linestr);
11656 PERL_ARGS_ASSERT_SWALLOW_BOM;
11660 if (s[1] == 0xFE) {
11661 /* UTF-16 little-endian? (or UTF-32LE?) */
11662 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
11663 /* diag_listed_as: Unsupported script encoding %s */
11664 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11665 #ifndef PERL_NO_UTF16_FILTER
11666 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11668 if (PL_bufend > (char*)s) {
11669 s = add_utf16_textfilter(s, TRUE);
11672 /* diag_listed_as: Unsupported script encoding %s */
11673 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11678 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
11679 #ifndef PERL_NO_UTF16_FILTER
11680 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11682 if (PL_bufend > (char *)s) {
11683 s = add_utf16_textfilter(s, FALSE);
11686 /* diag_listed_as: Unsupported script encoding %s */
11687 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11691 case BOM_UTF8_FIRST_BYTE: {
11692 const STRLEN len = sizeof(BOM_UTF8_TAIL) - 1; /* Exclude trailing NUL */
11693 if (slen > len && memEQ(s+1, BOM_UTF8_TAIL, len)) {
11694 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11695 s += len + 1; /* UTF-8 */
11702 if (s[2] == 0xFE && s[3] == 0xFF) {
11703 /* UTF-32 big-endian */
11704 /* diag_listed_as: Unsupported script encoding %s */
11705 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11708 else if (s[2] == 0 && s[3] != 0) {
11711 * are a good indicator of UTF-16BE. */
11712 #ifndef PERL_NO_UTF16_FILTER
11713 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11714 s = add_utf16_textfilter(s, FALSE);
11716 /* diag_listed_as: Unsupported script encoding %s */
11717 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11723 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11726 * are a good indicator of UTF-16LE. */
11727 #ifndef PERL_NO_UTF16_FILTER
11728 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11729 s = add_utf16_textfilter(s, TRUE);
11731 /* diag_listed_as: Unsupported script encoding %s */
11732 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11740 #ifndef PERL_NO_UTF16_FILTER
11742 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11745 SV *const filter = FILTER_DATA(idx);
11746 /* We re-use this each time round, throwing the contents away before we
11748 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11749 SV *const utf8_buffer = filter;
11750 IV status = IoPAGE(filter);
11751 const bool reverse = cBOOL(IoLINES(filter));
11754 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11756 /* As we're automatically added, at the lowest level, and hence only called
11757 from this file, we can be sure that we're not called in block mode. Hence
11758 don't bother writing code to deal with block mode. */
11760 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11763 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11765 DEBUG_P(PerlIO_printf(Perl_debug_log,
11766 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11767 FPTR2DPTR(void *, S_utf16_textfilter),
11768 reverse ? 'l' : 'b', idx, maxlen, status,
11769 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11776 /* First, look in our buffer of existing UTF-8 data: */
11777 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11781 } else if (status == 0) {
11783 IoPAGE(filter) = 0;
11784 nl = SvEND(utf8_buffer);
11787 STRLEN got = nl - SvPVX(utf8_buffer);
11788 /* Did we have anything to append? */
11790 sv_catpvn(sv, SvPVX(utf8_buffer), got);
11791 /* Everything else in this code works just fine if SVp_POK isn't
11792 set. This, however, needs it, and we need it to work, else
11793 we loop infinitely because the buffer is never consumed. */
11794 sv_chop(utf8_buffer, nl);
11798 /* OK, not a complete line there, so need to read some more UTF-16.
11799 Read an extra octect if the buffer currently has an odd number. */
11803 if (SvCUR(utf16_buffer) >= 2) {
11804 /* Location of the high octet of the last complete code point.
11805 Gosh, UTF-16 is a pain. All the benefits of variable length,
11806 *coupled* with all the benefits of partial reads and
11808 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11809 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11811 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11815 /* We have the first half of a surrogate. Read more. */
11816 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11819 status = FILTER_READ(idx + 1, utf16_buffer,
11820 160 + (SvCUR(utf16_buffer) & 1));
11821 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
11822 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11825 IoPAGE(filter) = status;
11830 chars = SvCUR(utf16_buffer) >> 1;
11831 have = SvCUR(utf8_buffer);
11832 SvGROW(utf8_buffer, have + chars * 3 + 1);
11835 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11836 (U8*)SvPVX_const(utf8_buffer) + have,
11837 chars * 2, &newlen);
11839 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
11840 (U8*)SvPVX_const(utf8_buffer) + have,
11841 chars * 2, &newlen);
11843 SvCUR_set(utf8_buffer, have + newlen);
11846 /* No need to keep this SV "well-formed" with a '\0' after the end, as
11847 it's private to us, and utf16_to_utf8{,reversed} take a
11848 (pointer,length) pair, rather than a NUL-terminated string. */
11849 if(SvCUR(utf16_buffer) & 1) {
11850 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11851 SvCUR_set(utf16_buffer, 1);
11853 SvCUR_set(utf16_buffer, 0);
11856 DEBUG_P(PerlIO_printf(Perl_debug_log,
11857 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11859 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11860 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
11865 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11867 SV *filter = filter_add(S_utf16_textfilter, NULL);
11869 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11871 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
11872 sv_setpvs(filter, "");
11873 IoLINES(filter) = reversed;
11874 IoPAGE(filter) = 1; /* Not EOF */
11876 /* Sadly, we have to return a valid pointer, come what may, so we have to
11877 ignore any error return from this. */
11878 SvCUR_set(PL_linestr, 0);
11879 if (FILTER_READ(0, PL_linestr, 0)) {
11880 SvUTF8_on(PL_linestr);
11882 SvUTF8_on(PL_linestr);
11884 PL_bufend = SvEND(PL_linestr);
11885 return (U8*)SvPVX(PL_linestr);
11890 Returns a pointer to the next character after the parsed
11891 vstring, as well as updating the passed in sv.
11893 Function must be called like
11895 sv = sv_2mortal(newSV(5));
11896 s = scan_vstring(s,e,sv);
11898 where s and e are the start and end of the string.
11899 The sv should already be large enough to store the vstring
11900 passed in, for performance reasons.
11902 This function may croak if fatal warnings are enabled in the
11903 calling scope, hence the sv_2mortal in the example (to prevent
11904 a leak). Make sure to do SvREFCNT_inc afterwards if you use
11910 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
11913 const char *pos = s;
11914 const char *start = s;
11916 PERL_ARGS_ASSERT_SCAN_VSTRING;
11918 if (*pos == 'v') pos++; /* get past 'v' */
11919 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11921 if ( *pos != '.') {
11922 /* this may not be a v-string if followed by => */
11923 const char *next = pos;
11924 while (next < e && isSPACE(*next))
11926 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
11927 /* return string not v-string */
11928 sv_setpvn(sv,(char *)s,pos-s);
11929 return (char *)pos;
11933 if (!isALPHA(*pos)) {
11934 U8 tmpbuf[UTF8_MAXBYTES+1];
11937 s++; /* get past 'v' */
11942 /* this is atoi() that tolerates underscores */
11945 const char *end = pos;
11947 while (--end >= s) {
11949 const UV orev = rev;
11950 rev += (*end - '0') * mult;
11953 /* diag_listed_as: Integer overflow in %s number */
11954 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11955 "Integer overflow in decimal number");
11959 if (rev > 0x7FFFFFFF)
11960 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11962 /* Append native character for the rev point */
11963 tmpend = uvchr_to_utf8(tmpbuf, rev);
11964 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11965 if (!UVCHR_IS_INVARIANT(rev))
11967 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
11973 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
11977 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11984 Perl_keyword_plugin_standard(pTHX_
11985 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11987 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11988 PERL_UNUSED_CONTEXT;
11989 PERL_UNUSED_ARG(keyword_ptr);
11990 PERL_UNUSED_ARG(keyword_len);
11991 PERL_UNUSED_ARG(op_ptr);
11992 return KEYWORD_PLUGIN_DECLINE;
11995 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
11997 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
11999 SAVEI32(PL_lex_brackets);
12000 if (PL_lex_brackets > 100)
12001 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
12002 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
12003 SAVEI32(PL_lex_allbrackets);
12004 PL_lex_allbrackets = 0;
12005 SAVEI8(PL_lex_fakeeof);
12006 PL_lex_fakeeof = (U8)fakeeof;
12007 if(yyparse(gramtype) && !PL_parser->error_count)
12008 qerror(Perl_mess(aTHX_ "Parse error"));
12011 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
12013 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
12017 SAVEVPTR(PL_eval_root);
12018 PL_eval_root = NULL;
12019 parse_recdescent(gramtype, fakeeof);
12025 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
12027 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
12030 if (flags & ~PARSE_OPTIONAL)
12031 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
12032 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
12033 if (!exprop && !(flags & PARSE_OPTIONAL)) {
12034 if (!PL_parser->error_count)
12035 qerror(Perl_mess(aTHX_ "Parse error"));
12036 exprop = newOP(OP_NULL, 0);
12042 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
12044 Parse a Perl arithmetic expression. This may contain operators of precedence
12045 down to the bit shift operators. The expression must be followed (and thus
12046 terminated) either by a comparison or lower-precedence operator or by
12047 something that would normally terminate an expression such as semicolon.
12048 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12049 otherwise it is mandatory. It is up to the caller to ensure that the
12050 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12051 the source of the code to be parsed and the lexical context for the
12054 The op tree representing the expression is returned. If an optional
12055 expression is absent, a null pointer is returned, otherwise the pointer
12058 If an error occurs in parsing or compilation, in most cases a valid op
12059 tree is returned anyway. The error is reflected in the parser state,
12060 normally resulting in a single exception at the top level of parsing
12061 which covers all the compilation errors that occurred. Some compilation
12062 errors, however, will throw an exception immediately.
12068 Perl_parse_arithexpr(pTHX_ U32 flags)
12070 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
12074 =for apidoc Amx|OP *|parse_termexpr|U32 flags
12076 Parse a Perl term expression. This may contain operators of precedence
12077 down to the assignment operators. The expression must be followed (and thus
12078 terminated) either by a comma or lower-precedence operator or by
12079 something that would normally terminate an expression such as semicolon.
12080 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12081 otherwise it is mandatory. It is up to the caller to ensure that the
12082 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12083 the source of the code to be parsed and the lexical context for the
12086 The op tree representing the expression is returned. If an optional
12087 expression is absent, a null pointer is returned, otherwise the pointer
12090 If an error occurs in parsing or compilation, in most cases a valid op
12091 tree is returned anyway. The error is reflected in the parser state,
12092 normally resulting in a single exception at the top level of parsing
12093 which covers all the compilation errors that occurred. Some compilation
12094 errors, however, will throw an exception immediately.
12100 Perl_parse_termexpr(pTHX_ U32 flags)
12102 return parse_expr(LEX_FAKEEOF_COMMA, flags);
12106 =for apidoc Amx|OP *|parse_listexpr|U32 flags
12108 Parse a Perl list expression. This may contain operators of precedence
12109 down to the comma operator. The expression must be followed (and thus
12110 terminated) either by a low-precedence logic operator such as C<or> or by
12111 something that would normally terminate an expression such as semicolon.
12112 If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12113 otherwise it is mandatory. It is up to the caller to ensure that the
12114 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12115 the source of the code to be parsed and the lexical context for the
12118 The op tree representing the expression is returned. If an optional
12119 expression is absent, a null pointer is returned, otherwise the pointer
12122 If an error occurs in parsing or compilation, in most cases a valid op
12123 tree is returned anyway. The error is reflected in the parser state,
12124 normally resulting in a single exception at the top level of parsing
12125 which covers all the compilation errors that occurred. Some compilation
12126 errors, however, will throw an exception immediately.
12132 Perl_parse_listexpr(pTHX_ U32 flags)
12134 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
12138 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
12140 Parse a single complete Perl expression. This allows the full
12141 expression grammar, including the lowest-precedence operators such
12142 as C<or>. The expression must be followed (and thus terminated) by a
12143 token that an expression would normally be terminated by: end-of-file,
12144 closing bracketing punctuation, semicolon, or one of the keywords that
12145 signals a postfix expression-statement modifier. If I<flags> includes
12146 C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
12147 mandatory. It is up to the caller to ensure that the dynamic parser
12148 state (L</PL_parser> et al) is correctly set to reflect the source of
12149 the code to be parsed and the lexical context for the expression.
12151 The op tree representing the expression is returned. If an optional
12152 expression is absent, a null pointer is returned, otherwise the pointer
12155 If an error occurs in parsing or compilation, in most cases a valid op
12156 tree is returned anyway. The error is reflected in the parser state,
12157 normally resulting in a single exception at the top level of parsing
12158 which covers all the compilation errors that occurred. Some compilation
12159 errors, however, will throw an exception immediately.
12165 Perl_parse_fullexpr(pTHX_ U32 flags)
12167 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
12171 =for apidoc Amx|OP *|parse_block|U32 flags
12173 Parse a single complete Perl code block. This consists of an opening
12174 brace, a sequence of statements, and a closing brace. The block
12175 constitutes a lexical scope, so C<my> variables and various compile-time
12176 effects can be contained within it. It is up to the caller to ensure
12177 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12178 reflect the source of the code to be parsed and the lexical context for
12181 The op tree representing the code block is returned. This is always a
12182 real op, never a null pointer. It will normally be a C<lineseq> list,
12183 including C<nextstate> or equivalent ops. No ops to construct any kind
12184 of runtime scope are included by virtue of it being a block.
12186 If an error occurs in parsing or compilation, in most cases a valid op
12187 tree (most likely null) is returned anyway. The error is reflected in
12188 the parser state, normally resulting in a single exception at the top
12189 level of parsing which covers all the compilation errors that occurred.
12190 Some compilation errors, however, will throw an exception immediately.
12192 The I<flags> parameter is reserved for future use, and must always
12199 Perl_parse_block(pTHX_ U32 flags)
12202 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
12203 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
12207 =for apidoc Amx|OP *|parse_barestmt|U32 flags
12209 Parse a single unadorned Perl statement. This may be a normal imperative
12210 statement or a declaration that has compile-time effect. It does not
12211 include any label or other affixture. It is up to the caller to ensure
12212 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12213 reflect the source of the code to be parsed and the lexical context for
12216 The op tree representing the statement is returned. This may be a
12217 null pointer if the statement is null, for example if it was actually
12218 a subroutine definition (which has compile-time side effects). If not
12219 null, it will be ops directly implementing the statement, suitable to
12220 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
12221 equivalent op (except for those embedded in a scope contained entirely
12222 within the statement).
12224 If an error occurs in parsing or compilation, in most cases a valid op
12225 tree (most likely null) is returned anyway. The error is reflected in
12226 the parser state, normally resulting in a single exception at the top
12227 level of parsing which covers all the compilation errors that occurred.
12228 Some compilation errors, however, will throw an exception immediately.
12230 The I<flags> parameter is reserved for future use, and must always
12237 Perl_parse_barestmt(pTHX_ U32 flags)
12240 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
12241 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
12245 =for apidoc Amx|SV *|parse_label|U32 flags
12247 Parse a single label, possibly optional, of the type that may prefix a
12248 Perl statement. It is up to the caller to ensure that the dynamic parser
12249 state (L</PL_parser> et al) is correctly set to reflect the source of
12250 the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
12251 label is optional, otherwise it is mandatory.
12253 The name of the label is returned in the form of a fresh scalar. If an
12254 optional label is absent, a null pointer is returned.
12256 If an error occurs in parsing, which can only occur if the label is
12257 mandatory, a valid label is returned anyway. The error is reflected in
12258 the parser state, normally resulting in a single exception at the top
12259 level of parsing which covers all the compilation errors that occurred.
12265 Perl_parse_label(pTHX_ U32 flags)
12267 if (flags & ~PARSE_OPTIONAL)
12268 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
12269 if (PL_lex_state == LEX_KNOWNEXT) {
12270 PL_parser->yychar = yylex();
12271 if (PL_parser->yychar == LABEL) {
12272 char * const lpv = pl_yylval.pval;
12273 STRLEN llen = strlen(lpv);
12274 PL_parser->yychar = YYEMPTY;
12275 return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
12282 STRLEN wlen, bufptr_pos;
12285 if (!isIDFIRST_lazy_if(s, UTF))
12287 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
12288 if (word_takes_any_delimeter(s, wlen))
12290 bufptr_pos = s - SvPVX(PL_linestr);
12292 lex_read_space(LEX_KEEP_PREVIOUS);
12294 s = SvPVX(PL_linestr) + bufptr_pos;
12295 if (t[0] == ':' && t[1] != ':') {
12296 PL_oldoldbufptr = PL_oldbufptr;
12299 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
12303 if (flags & PARSE_OPTIONAL) {
12306 qerror(Perl_mess(aTHX_ "Parse error"));
12307 return newSVpvs("x");
12314 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
12316 Parse a single complete Perl statement. This may be a normal imperative
12317 statement or a declaration that has compile-time effect, and may include
12318 optional labels. It is up to the caller to ensure that the dynamic
12319 parser state (L</PL_parser> et al) is correctly set to reflect the source
12320 of the code to be parsed and the lexical context for the statement.
12322 The op tree representing the statement is returned. This may be a
12323 null pointer if the statement is null, for example if it was actually
12324 a subroutine definition (which has compile-time side effects). If not
12325 null, it will be the result of a L</newSTATEOP> call, normally including
12326 a C<nextstate> or equivalent op.
12328 If an error occurs in parsing or compilation, in most cases a valid op
12329 tree (most likely null) is returned anyway. The error is reflected in
12330 the parser state, normally resulting in a single exception at the top
12331 level of parsing which covers all the compilation errors that occurred.
12332 Some compilation errors, however, will throw an exception immediately.
12334 The I<flags> parameter is reserved for future use, and must always
12341 Perl_parse_fullstmt(pTHX_ U32 flags)
12344 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
12345 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
12349 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
12351 Parse a sequence of zero or more Perl statements. These may be normal
12352 imperative statements, including optional labels, or declarations
12353 that have compile-time effect, or any mixture thereof. The statement
12354 sequence ends when a closing brace or end-of-file is encountered in a
12355 place where a new statement could have validly started. It is up to
12356 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
12357 is correctly set to reflect the source of the code to be parsed and the
12358 lexical context for the statements.
12360 The op tree representing the statement sequence is returned. This may
12361 be a null pointer if the statements were all null, for example if there
12362 were no statements or if there were only subroutine definitions (which
12363 have compile-time side effects). If not null, it will be a C<lineseq>
12364 list, normally including C<nextstate> or equivalent ops.
12366 If an error occurs in parsing or compilation, in most cases a valid op
12367 tree is returned anyway. The error is reflected in the parser state,
12368 normally resulting in a single exception at the top level of parsing
12369 which covers all the compilation errors that occurred. Some compilation
12370 errors, however, will throw an exception immediately.
12372 The I<flags> parameter is reserved for future use, and must always
12379 Perl_parse_stmtseq(pTHX_ U32 flags)
12384 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
12385 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
12386 c = lex_peek_unichar(0);
12387 if (c != -1 && c != /*{*/'}')
12388 qerror(Perl_mess(aTHX_ "Parse error"));
12394 * c-indentation-style: bsd
12395 * c-basic-offset: 4
12396 * indent-tabs-mode: nil
12399 * ex: set ts=8 sts=4 sw=4 et: