3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 =head1 Lexer interface
26 This is the lower layer of the Perl parser, managing characters and tokens.
28 =for apidoc AmnU|yy_parser *|PL_parser
30 Pointer to a structure encapsulating the state of the parsing operation
31 currently in progress. The pointer can be locally changed to perform
32 a nested parse without interfering with the state of an outer parse.
33 Individual members of C<PL_parser> have their own documentation.
39 #define PERL_IN_TOKE_C
41 #include "invlist_inline.h"
43 #define new_constant(a,b,c,d,e,f,g, h) \
44 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h)
46 #define pl_yylval (PL_parser->yylval)
48 /* XXX temporary backwards compatibility */
49 #define PL_lex_brackets (PL_parser->lex_brackets)
50 #define PL_lex_allbrackets (PL_parser->lex_allbrackets)
51 #define PL_lex_fakeeof (PL_parser->lex_fakeeof)
52 #define PL_lex_brackstack (PL_parser->lex_brackstack)
53 #define PL_lex_casemods (PL_parser->lex_casemods)
54 #define PL_lex_casestack (PL_parser->lex_casestack)
55 #define PL_lex_dojoin (PL_parser->lex_dojoin)
56 #define PL_lex_formbrack (PL_parser->lex_formbrack)
57 #define PL_lex_inpat (PL_parser->lex_inpat)
58 #define PL_lex_inwhat (PL_parser->lex_inwhat)
59 #define PL_lex_op (PL_parser->lex_op)
60 #define PL_lex_repl (PL_parser->lex_repl)
61 #define PL_lex_starts (PL_parser->lex_starts)
62 #define PL_lex_stuff (PL_parser->lex_stuff)
63 #define PL_multi_start (PL_parser->multi_start)
64 #define PL_multi_open (PL_parser->multi_open)
65 #define PL_multi_close (PL_parser->multi_close)
66 #define PL_preambled (PL_parser->preambled)
67 #define PL_linestr (PL_parser->linestr)
68 #define PL_expect (PL_parser->expect)
69 #define PL_copline (PL_parser->copline)
70 #define PL_bufptr (PL_parser->bufptr)
71 #define PL_oldbufptr (PL_parser->oldbufptr)
72 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
73 #define PL_linestart (PL_parser->linestart)
74 #define PL_bufend (PL_parser->bufend)
75 #define PL_last_uni (PL_parser->last_uni)
76 #define PL_last_lop (PL_parser->last_lop)
77 #define PL_last_lop_op (PL_parser->last_lop_op)
78 #define PL_lex_state (PL_parser->lex_state)
79 #define PL_rsfp (PL_parser->rsfp)
80 #define PL_rsfp_filters (PL_parser->rsfp_filters)
81 #define PL_in_my (PL_parser->in_my)
82 #define PL_in_my_stash (PL_parser->in_my_stash)
83 #define PL_tokenbuf (PL_parser->tokenbuf)
84 #define PL_multi_end (PL_parser->multi_end)
85 #define PL_error_count (PL_parser->error_count)
87 # define PL_nexttoke (PL_parser->nexttoke)
88 # define PL_nexttype (PL_parser->nexttype)
89 # define PL_nextval (PL_parser->nextval)
92 #define SvEVALED(sv) \
93 (SvTYPE(sv) >= SVt_PVNV \
94 && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen)
96 static const char* const ident_too_long = "Identifier too long";
97 static const char* const ident_var_zero_multi_digit = "Numeric variables with more than one digit may not start with '0'";
99 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
101 #define XENUMMASK 0x3f
102 #define XFAKEEOF 0x40
103 #define XFAKEBRACK 0x80
105 #ifdef USE_UTF8_SCRIPTS
106 # define UTF cBOOL(!IN_BYTES)
108 # define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
111 /* The maximum number of characters preceding the unrecognized one to display */
112 #define UNRECOGNIZED_PRECEDE_COUNT 10
114 /* In variables named $^X, these are the legal values for X.
115 * 1999-02-27 mjd-perl-patch@plover.com */
116 #define isCONTROLVAR(x) (isUPPER(x) || memCHRs("[\\]^_?", (x)))
118 #define SPACE_OR_TAB(c) isBLANK_A(c)
120 #define HEXFP_PEEK(s) \
122 (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
123 isALPHA_FOLD_EQ(s[0], 'p'))
125 /* LEX_* are values for PL_lex_state, the state of the lexer.
126 * They are arranged oddly so that the guard on the switch statement
127 * can get by with a single comparison (if the compiler is smart enough).
129 * These values refer to the various states within a sublex parse,
130 * i.e. within a double quotish string
133 /* #define LEX_NOTPARSING 11 is done in perl.h. */
135 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
136 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
137 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
138 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
139 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
141 /* at end of code, eg "$x" followed by: */
142 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
143 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
145 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
146 string or after \E, $foo, etc */
147 #define LEX_INTERPCONST 2 /* NOT USED */
148 #define LEX_FORMLINE 1 /* expecting a format line */
150 /* returned to yyl_try() to request it to retry the parse loop, expected to only
151 be returned directly by yyl_fake_eof(), but functions that call yyl_fake_eof()
154 yylex (aka Perl_yylex) returns 0 on EOF rather than returning -1,
155 other token values are 258 or higher (see perly.h), so -1 should be
158 #define YYL_RETRY (-1)
161 static const char* const lex_state_names[] = {
176 #include "keywords.h"
178 /* CLINE is a macro that ensures PL_copline has a sane value */
180 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
183 * Convenience functions to return different tokens and prime the
184 * lexer for the next token. They all take an argument.
186 * TOKEN : generic token (used for '(', DOLSHARP, etc)
187 * OPERATOR : generic operator
188 * AOPERATOR : assignment operator
189 * PREBLOCK : beginning the block after an if, while, foreach, ...
190 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
191 * PREREF : *EXPR where EXPR is not a simple identifier
192 * TERM : expression term
193 * POSTDEREF : postfix dereference (->$* ->@[...] etc.)
194 * LOOPX : loop exiting command (goto, last, dump, etc)
195 * FTST : file test operator
196 * FUN0 : zero-argument function
197 * FUN0OP : zero-argument function, with its op created in this file
198 * FUN1 : not used, except for not, which isn't a UNIOP
199 * BOop : bitwise or or xor
201 * BCop : bitwise complement
202 * SHop : shift operator
203 * PWop : power operator
204 * PMop : pattern-matching operator
205 * Aop : addition-level operator
206 * AopNOASSIGN : addition-level operator that is never part of .=
207 * Mop : multiplication-level operator
208 * ChEop : chaining equality-testing operator
209 * NCEop : non-chaining comparison operator at equality precedence
210 * ChRop : chaining relational operator <= != gt
211 * NCRop : non-chaining relational operator isa
213 * Also see LOP and lop() below.
216 #ifdef DEBUGGING /* Serve -DT. */
217 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
219 # define REPORT(retval) (retval)
222 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
223 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
224 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
225 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
226 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
227 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
228 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
229 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
230 #define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \
232 PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
234 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
235 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
236 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
237 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
238 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
239 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
240 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
242 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
243 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
244 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
245 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
246 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
247 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
248 #define ChEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHEQOP))
249 #define NCEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCEQOP))
250 #define ChRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHRELOP))
251 #define NCRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCRELOP))
253 /* This bit of chicanery makes a unary function followed by
254 * a parenthesis into a function with one argument, highest precedence.
255 * The UNIDOR macro is for unary functions that can be followed by the //
256 * operator (such as C<shift // 0>).
258 #define UNI3(f,x,have_x) { \
259 pl_yylval.ival = f; \
260 if (have_x) PL_expect = x; \
262 PL_last_uni = PL_oldbufptr; \
263 PL_last_lop_op = (f) < 0 ? -(f) : (f); \
265 return REPORT( (int)FUNC1 ); \
267 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
269 #define UNI(f) UNI3(f,XTERM,1)
270 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
271 #define UNIPROTO(f,optional) { \
272 if (optional) PL_last_uni = PL_oldbufptr; \
276 #define UNIBRACK(f) UNI3(f,0,0)
278 /* grandfather return to old style */
281 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
282 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
283 pl_yylval.ival = (f); \
289 #define COPLINE_INC_WITH_HERELINES \
291 CopLINE_inc(PL_curcop); \
292 if (PL_parser->herelines) \
293 CopLINE(PL_curcop) += PL_parser->herelines, \
294 PL_parser->herelines = 0; \
296 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
297 * is no sublex_push to follow. */
298 #define COPLINE_SET_FROM_MULTI_END \
300 CopLINE_set(PL_curcop, PL_multi_end); \
301 if (PL_multi_end != PL_multi_start) \
302 PL_parser->herelines = 0; \
306 /* A file-local structure for passing around information about subroutines and
307 * related definable words */
317 static const struct code no_code = { NULL, NULL, NULL, NULL, NULL, 0, FALSE };
321 /* how to interpret the pl_yylval associated with the token */
325 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
330 #define DEBUG_TOKEN(Type, Name) \
331 { Name, TOKENTYPE_##Type, #Name }
333 static struct debug_tokens {
335 enum token_type type;
337 } const debug_tokens[] =
339 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
340 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
341 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
342 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
343 { ANON_SIGSUB, TOKENTYPE_IVAL, "ANON_SIGSUB" },
344 { ARROW, TOKENTYPE_NONE, "ARROW" },
345 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
346 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
347 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
348 { CHEQOP, TOKENTYPE_OPNUM, "CHEQOP" },
349 { CHRELOP, TOKENTYPE_OPNUM, "CHRELOP" },
350 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
351 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
352 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
353 { DO, TOKENTYPE_NONE, "DO" },
354 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
355 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
356 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
357 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
358 { ELSE, TOKENTYPE_NONE, "ELSE" },
359 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
360 { FOR, TOKENTYPE_IVAL, "FOR" },
361 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
362 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
363 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
364 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
365 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
366 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
367 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
368 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
369 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
370 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
371 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
372 { IF, TOKENTYPE_IVAL, "IF" },
373 { LABEL, TOKENTYPE_OPVAL, "LABEL" },
374 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
375 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
376 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
377 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
378 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
379 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
380 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
381 { MY, TOKENTYPE_IVAL, "MY" },
382 { NCEQOP, TOKENTYPE_OPNUM, "NCEQOP" },
383 { NCRELOP, TOKENTYPE_OPNUM, "NCRELOP" },
384 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
385 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
386 { OROP, TOKENTYPE_IVAL, "OROP" },
387 { OROR, TOKENTYPE_NONE, "OROR" },
388 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
389 DEBUG_TOKEN (IVAL, PERLY_AMPERSAND),
390 DEBUG_TOKEN (IVAL, PERLY_BRACE_CLOSE),
391 DEBUG_TOKEN (IVAL, PERLY_BRACE_OPEN),
392 DEBUG_TOKEN (IVAL, PERLY_BRACKET_CLOSE),
393 DEBUG_TOKEN (IVAL, PERLY_BRACKET_OPEN),
394 DEBUG_TOKEN (IVAL, PERLY_COLON),
395 DEBUG_TOKEN (IVAL, PERLY_COMMA),
396 DEBUG_TOKEN (IVAL, PERLY_DOT),
397 DEBUG_TOKEN (IVAL, PERLY_EQUAL_SIGN),
398 DEBUG_TOKEN (IVAL, PERLY_EXCLAMATION_MARK),
399 DEBUG_TOKEN (IVAL, PERLY_MINUS),
400 DEBUG_TOKEN (IVAL, PERLY_PLUS),
401 DEBUG_TOKEN (IVAL, PERLY_QUESTION_MARK),
402 DEBUG_TOKEN (IVAL, PERLY_SEMICOLON),
403 DEBUG_TOKEN (IVAL, PERLY_SNAIL),
404 DEBUG_TOKEN (IVAL, PERLY_TILDE),
405 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
406 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
407 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
408 { POSTJOIN, TOKENTYPE_NONE, "POSTJOIN" },
409 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
410 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
411 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
412 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
413 { PREINC, TOKENTYPE_NONE, "PREINC" },
414 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
415 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
416 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
417 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
418 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
419 { SIGSUB, TOKENTYPE_NONE, "SIGSUB" },
420 { SUB, TOKENTYPE_NONE, "SUB" },
421 { SUBLEXEND, TOKENTYPE_NONE, "SUBLEXEND" },
422 { SUBLEXSTART, TOKENTYPE_NONE, "SUBLEXSTART" },
423 { THING, TOKENTYPE_OPVAL, "THING" },
424 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
425 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
426 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
427 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
428 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
429 { USE, TOKENTYPE_IVAL, "USE" },
430 { WHEN, TOKENTYPE_IVAL, "WHEN" },
431 { WHILE, TOKENTYPE_IVAL, "WHILE" },
432 { BAREWORD, TOKENTYPE_OPVAL, "BAREWORD" },
433 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
434 { 0, TOKENTYPE_NONE, NULL }
439 /* dump the returned token in rv, plus any optional arg in pl_yylval */
442 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
444 PERL_ARGS_ASSERT_TOKEREPORT;
447 const char *name = NULL;
448 enum token_type type = TOKENTYPE_NONE;
449 const struct debug_tokens *p;
450 SV* const report = newSVpvs("<== ");
452 for (p = debug_tokens; p->token; p++) {
453 if (p->token == (int)rv) {
460 Perl_sv_catpv(aTHX_ report, name);
461 else if (isGRAPH(rv))
463 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
465 sv_catpvs(report, " (pending identifier)");
468 sv_catpvs(report, "EOF");
470 Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
475 Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
477 case TOKENTYPE_OPNUM:
478 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
479 PL_op_name[lvalp->ival]);
482 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
484 case TOKENTYPE_OPVAL:
486 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
487 PL_op_name[lvalp->opval->op_type]);
488 if (lvalp->opval->op_type == OP_CONST) {
489 Perl_sv_catpvf(aTHX_ report, " %s",
490 SvPEEK(cSVOPx_sv(lvalp->opval)));
495 sv_catpvs(report, "(opval=null)");
498 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
504 /* print the buffer with suitable escapes */
507 S_printbuf(pTHX_ const char *const fmt, const char *const s)
509 SV* const tmp = newSVpvs("");
511 PERL_ARGS_ASSERT_PRINTBUF;
513 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */
514 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
515 GCC_DIAG_RESTORE_STMT;
524 * This subroutine looks for an '=' next to the operator that has just been
525 * parsed and turns it into an ASSIGNOP if it finds one.
529 S_ao(pTHX_ int toketype)
531 if (*PL_bufptr == '=') {
533 if (toketype == ANDAND)
534 pl_yylval.ival = OP_ANDASSIGN;
535 else if (toketype == OROR)
536 pl_yylval.ival = OP_ORASSIGN;
537 else if (toketype == DORDOR)
538 pl_yylval.ival = OP_DORASSIGN;
541 return REPORT(toketype);
546 * When Perl expects an operator and finds something else, no_op
547 * prints the warning. It always prints "<something> found where
548 * operator expected. It prints "Missing semicolon on previous line?"
549 * if the surprise occurs at the start of the line. "do you need to
550 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
551 * where the compiler doesn't know if foo is a method call or a function.
552 * It prints "Missing operator before end of line" if there's nothing
553 * after the missing operator, or "... before <...>" if there is something
554 * after the missing operator.
556 * PL_bufptr is expected to point to the start of the thing that was found,
557 * and s after the next token or partial token.
561 S_no_op(pTHX_ const char *const what, char *s)
563 char * const oldbp = PL_bufptr;
564 const bool is_first = (PL_oldbufptr == PL_linestart);
566 PERL_ARGS_ASSERT_NO_OP;
572 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
573 if (ckWARN_d(WARN_SYNTAX)) {
575 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
576 "\t(Missing semicolon on previous line?)\n");
577 else if (PL_oldoldbufptr && isIDFIRST_lazy_if_safe(PL_oldoldbufptr,
582 for (t = PL_oldoldbufptr;
583 (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
584 t += UTF ? UTF8SKIP(t) : 1)
588 if (t < PL_bufptr && isSPACE(*t))
589 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
590 "\t(Do you need to predeclare %" UTF8f "?)\n",
591 UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
595 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
596 "\t(Missing operator before %" UTF8f "?)\n",
597 UTF8fARG(UTF, s - oldbp, oldbp));
605 * Complain about missing quote/regexp/heredoc terminator.
606 * If it's called with NULL then it cauterizes the line buffer.
607 * If we're in a delimited string and the delimiter is a control
608 * character, it's reformatted into a two-char sequence like ^C.
613 S_missingterm(pTHX_ char *s, STRLEN len)
615 char tmpbuf[UTF8_MAXBYTES + 1];
620 char * const nl = (char *) my_memrchr(s, '\n', len);
627 else if (PL_multi_close < 32) {
629 tmpbuf[1] = (char)toCTRL(PL_multi_close);
635 if (LIKELY(PL_multi_close < 256)) {
636 *tmpbuf = (char)PL_multi_close;
641 char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close);
648 q = memchr(s, '"', len) ? '\'' : '"';
649 sv = sv_2mortal(newSVpvn(s, len));
652 Perl_croak(aTHX_ "Can't find string terminator %c%" SVf "%c"
653 " anywhere before EOF", q, SVfARG(sv), q);
659 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
660 * utf16-to-utf8-reversed.
663 #ifdef PERL_CR_FILTER
667 const char *s = SvPVX_const(sv);
668 const char * const e = s + SvCUR(sv);
670 PERL_ARGS_ASSERT_STRIP_RETURN;
672 /* outer loop optimized to do nothing if there are no CR-LFs */
674 if (*s++ == '\r' && *s == '\n') {
675 /* hit a CR-LF, need to copy the rest */
679 if (*s == '\r' && s[1] == '\n')
690 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
692 const I32 count = FILTER_READ(idx+1, sv, maxlen);
693 if (count > 0 && !maxlen)
700 =for apidoc lex_start
702 Creates and initialises a new lexer/parser state object, supplying
703 a context in which to lex and parse from a new source of Perl code.
704 A pointer to the new state object is placed in L</PL_parser>. An entry
705 is made on the save stack so that upon unwinding, the new state object
706 will be destroyed and the former value of L</PL_parser> will be restored.
707 Nothing else need be done to clean up the parsing context.
709 The code to be parsed comes from C<line> and C<rsfp>. C<line>, if
710 non-null, provides a string (in SV form) containing code to be parsed.
711 A copy of the string is made, so subsequent modification of C<line>
712 does not affect parsing. C<rsfp>, if non-null, provides an input stream
713 from which code will be read to be parsed. If both are non-null, the
714 code in C<line> comes first and must consist of complete lines of input,
715 and C<rsfp> supplies the remainder of the source.
717 The C<flags> parameter is reserved for future use. Currently it is only
718 used by perl internally, so extensions should always pass zero.
723 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
724 can share filters with the current parser.
725 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
726 caller, hence isn't owned by the parser, so shouldn't be closed on parser
727 destruction. This is used to handle the case of defaulting to reading the
728 script from the standard input because no filename was given on the command
729 line (without getting confused by situation where STDIN has been closed, so
730 the script handle is opened on fd 0) */
733 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
735 const char *s = NULL;
736 yy_parser *parser, *oparser;
738 if (flags && flags & ~LEX_START_FLAGS)
739 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
741 /* create and initialise a parser */
743 Newxz(parser, 1, yy_parser);
744 parser->old_parser = oparser = PL_parser;
747 parser->stack = NULL;
748 parser->stack_max1 = NULL;
751 /* on scope exit, free this parser and restore any outer one */
753 parser->saved_curcop = PL_curcop;
755 /* initialise lexer state */
757 parser->nexttoke = 0;
758 parser->error_count = oparser ? oparser->error_count : 0;
759 parser->copline = parser->preambling = NOLINE;
760 parser->lex_state = LEX_NORMAL;
761 parser->expect = XSTATE;
763 parser->recheck_utf8_validity = TRUE;
764 parser->rsfp_filters =
765 !(flags & LEX_START_SAME_FILTER) || !oparser
767 : MUTABLE_AV(SvREFCNT_inc(
768 oparser->rsfp_filters
769 ? oparser->rsfp_filters
770 : (oparser->rsfp_filters = newAV())
773 Newx(parser->lex_brackstack, 120, char);
774 Newx(parser->lex_casestack, 12, char);
775 *parser->lex_casestack = '\0';
776 Newxz(parser->lex_shared, 1, LEXSHARED);
780 const U8* first_bad_char_loc;
782 s = SvPV_const(line, len);
785 && UNLIKELY(! is_utf8_string_loc((U8 *) s,
787 &first_bad_char_loc)))
789 _force_out_malformed_utf8_message(first_bad_char_loc,
790 (U8 *) s + SvCUR(line),
792 1 /* 1 means die */ );
793 NOT_REACHED; /* NOTREACHED */
796 parser->linestr = flags & LEX_START_COPIED
797 ? SvREFCNT_inc_simple_NN(line)
798 : newSVpvn_flags(s, len, SvUTF8(line));
800 sv_catpvs(parser->linestr, "\n;");
802 parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
805 parser->oldoldbufptr =
808 parser->linestart = SvPVX(parser->linestr);
809 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
810 parser->last_lop = parser->last_uni = NULL;
812 STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
813 |LEX_DONT_CLOSE_RSFP));
814 parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
815 |LEX_DONT_CLOSE_RSFP));
817 parser->in_pod = parser->filtered = 0;
821 /* delete a parser object */
824 Perl_parser_free(pTHX_ const yy_parser *parser)
826 PERL_ARGS_ASSERT_PARSER_FREE;
828 PL_curcop = parser->saved_curcop;
829 SvREFCNT_dec(parser->linestr);
831 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
832 PerlIO_clearerr(parser->rsfp);
833 else if (parser->rsfp && (!parser->old_parser
834 || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
835 PerlIO_close(parser->rsfp);
836 SvREFCNT_dec(parser->rsfp_filters);
837 SvREFCNT_dec(parser->lex_stuff);
838 SvREFCNT_dec(parser->lex_sub_repl);
840 Safefree(parser->lex_brackstack);
841 Safefree(parser->lex_casestack);
842 Safefree(parser->lex_shared);
843 PL_parser = parser->old_parser;
848 Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab)
850 I32 nexttoke = parser->nexttoke;
851 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
853 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
854 && parser->nextval[nexttoke].opval
855 && parser->nextval[nexttoke].opval->op_slabbed
856 && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
857 op_free(parser->nextval[nexttoke].opval);
858 parser->nextval[nexttoke].opval = NULL;
865 =for apidoc AmnxUN|SV *|PL_parser-E<gt>linestr
867 Buffer scalar containing the chunk currently under consideration of the
868 text currently being lexed. This is always a plain string scalar (for
869 which C<SvPOK> is true). It is not intended to be used as a scalar by
870 normal scalar means; instead refer to the buffer directly by the pointer
871 variables described below.
873 The lexer maintains various C<char*> pointers to things in the
874 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
875 reallocated, all of these pointers must be updated. Don't attempt to
876 do this manually, but rather use L</lex_grow_linestr> if you need to
877 reallocate the buffer.
879 The content of the text chunk in the buffer is commonly exactly one
880 complete line of input, up to and including a newline terminator,
881 but there are situations where it is otherwise. The octets of the
882 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
883 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
884 flag on this scalar, which may disagree with it.
886 For direct examination of the buffer, the variable
887 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
888 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
889 of these pointers is usually preferable to examination of the scalar
890 through normal scalar means.
892 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufend
894 Direct pointer to the end of the chunk of text currently being lexed, the
895 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
896 + SvCUR(PL_parser-E<gt>linestr)>. A C<NUL> character (zero octet) is
897 always located at the end of the buffer, and does not count as part of
898 the buffer's contents.
900 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufptr
902 Points to the current position of lexing inside the lexer buffer.
903 Characters around this point may be freely examined, within
904 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
905 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
906 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
908 Lexing code (whether in the Perl core or not) moves this pointer past
909 the characters that it consumes. It is also expected to perform some
910 bookkeeping whenever a newline character is consumed. This movement
911 can be more conveniently performed by the function L</lex_read_to>,
912 which handles newlines appropriately.
914 Interpretation of the buffer's octets can be abstracted out by
915 using the slightly higher-level functions L</lex_peek_unichar> and
916 L</lex_read_unichar>.
918 =for apidoc AmnxUN|char *|PL_parser-E<gt>linestart
920 Points to the start of the current line inside the lexer buffer.
921 This is useful for indicating at which column an error occurred, and
922 not much else. This must be updated by any lexing code that consumes
923 a newline; the function L</lex_read_to> handles this detail.
929 =for apidoc lex_bufutf8
931 Indicates whether the octets in the lexer buffer
932 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
933 of Unicode characters. If not, they should be interpreted as Latin-1
934 characters. This is analogous to the C<SvUTF8> flag for scalars.
936 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
937 contains valid UTF-8. Lexing code must be robust in the face of invalid
940 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
941 is significant, but not the whole story regarding the input character
942 encoding. Normally, when a file is being read, the scalar contains octets
943 and its C<SvUTF8> flag is off, but the octets should be interpreted as
944 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
945 however, the scalar may have the C<SvUTF8> flag on, and in this case its
946 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
947 is in effect. This logic may change in the future; use this function
948 instead of implementing the logic yourself.
954 Perl_lex_bufutf8(pTHX)
960 =for apidoc lex_grow_linestr
962 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
963 at least C<len> octets (including terminating C<NUL>). Returns a
964 pointer to the reallocated buffer. This is necessary before making
965 any direct modification of the buffer that would increase its length.
966 L</lex_stuff_pvn> provides a more convenient way to insert text into
969 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
970 this function updates all of the lexer's variables that point directly
977 Perl_lex_grow_linestr(pTHX_ STRLEN len)
981 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
982 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
985 linestr = PL_parser->linestr;
986 buf = SvPVX(linestr);
987 if (len <= SvLEN(linestr))
990 /* Is the lex_shared linestr SV the same as the current linestr SV?
991 * Only in this case does re_eval_start need adjusting, since it
992 * points within lex_shared->ls_linestr's buffer */
993 current = ( !PL_parser->lex_shared->ls_linestr
994 || linestr == PL_parser->lex_shared->ls_linestr);
996 bufend_pos = PL_parser->bufend - buf;
997 bufptr_pos = PL_parser->bufptr - buf;
998 oldbufptr_pos = PL_parser->oldbufptr - buf;
999 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1000 linestart_pos = PL_parser->linestart - buf;
1001 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1002 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1003 re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ?
1004 PL_parser->lex_shared->re_eval_start - buf : 0;
1006 buf = sv_grow(linestr, len);
1008 PL_parser->bufend = buf + bufend_pos;
1009 PL_parser->bufptr = buf + bufptr_pos;
1010 PL_parser->oldbufptr = buf + oldbufptr_pos;
1011 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1012 PL_parser->linestart = buf + linestart_pos;
1013 if (PL_parser->last_uni)
1014 PL_parser->last_uni = buf + last_uni_pos;
1015 if (PL_parser->last_lop)
1016 PL_parser->last_lop = buf + last_lop_pos;
1017 if (current && PL_parser->lex_shared->re_eval_start)
1018 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
1023 =for apidoc lex_stuff_pvn
1025 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1026 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1027 reallocating the buffer if necessary. This means that lexing code that
1028 runs later will see the characters as if they had appeared in the input.
1029 It is not recommended to do this as part of normal parsing, and most
1030 uses of this facility run the risk of the inserted characters being
1031 interpreted in an unintended manner.
1033 The string to be inserted is represented by C<len> octets starting
1034 at C<pv>. These octets are interpreted as either UTF-8 or Latin-1,
1035 according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
1036 The characters are recoded for the lexer buffer, according to how the
1037 buffer is currently being interpreted (L</lex_bufutf8>). If a string
1038 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1039 function is more convenient.
1041 =for apidoc Amnh||LEX_STUFF_UTF8
1047 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1050 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1051 if (flags & ~(LEX_STUFF_UTF8))
1052 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1054 if (flags & LEX_STUFF_UTF8) {
1057 STRLEN highhalf = variant_under_utf8_count((U8 *) pv,
1059 const char *p, *e = pv+len;;
1062 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1063 bufptr = PL_parser->bufptr;
1064 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1065 SvCUR_set(PL_parser->linestr,
1066 SvCUR(PL_parser->linestr) + len+highhalf);
1067 PL_parser->bufend += len+highhalf;
1068 for (p = pv; p != e; p++) {
1069 append_utf8_from_native_byte(*p, (U8 **) &bufptr);
1073 if (flags & LEX_STUFF_UTF8) {
1074 STRLEN highhalf = 0;
1075 const char *p, *e = pv+len;
1076 for (p = pv; p != e; p++) {
1078 if (UTF8_IS_ABOVE_LATIN1(c)) {
1079 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1080 "non-Latin-1 character into Latin-1 input");
1081 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1084 } else assert(UTF8_IS_INVARIANT(c));
1088 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1089 bufptr = PL_parser->bufptr;
1090 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1091 SvCUR_set(PL_parser->linestr,
1092 SvCUR(PL_parser->linestr) + len-highhalf);
1093 PL_parser->bufend += len-highhalf;
1096 if (UTF8_IS_INVARIANT(*p)) {
1102 *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
1108 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1109 bufptr = PL_parser->bufptr;
1110 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1111 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1112 PL_parser->bufend += len;
1113 Copy(pv, bufptr, len, char);
1119 =for apidoc lex_stuff_pv
1121 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1122 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1123 reallocating the buffer if necessary. This means that lexing code that
1124 runs later will see the characters as if they had appeared in the input.
1125 It is not recommended to do this as part of normal parsing, and most
1126 uses of this facility run the risk of the inserted characters being
1127 interpreted in an unintended manner.
1129 The string to be inserted is represented by octets starting at C<pv>
1130 and continuing to the first nul. These octets are interpreted as either
1131 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1132 in C<flags>. The characters are recoded for the lexer buffer, according
1133 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1134 If it is not convenient to nul-terminate a string to be inserted, the
1135 L</lex_stuff_pvn> function is more appropriate.
1141 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1143 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1144 lex_stuff_pvn(pv, strlen(pv), flags);
1148 =for apidoc lex_stuff_sv
1150 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1151 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1152 reallocating the buffer if necessary. This means that lexing code that
1153 runs later will see the characters as if they had appeared in the input.
1154 It is not recommended to do this as part of normal parsing, and most
1155 uses of this facility run the risk of the inserted characters being
1156 interpreted in an unintended manner.
1158 The string to be inserted is the string value of C<sv>. The characters
1159 are recoded for the lexer buffer, according to how the buffer is currently
1160 being interpreted (L</lex_bufutf8>). If a string to be inserted is
1161 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1162 need to construct a scalar.
1168 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1172 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1174 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1176 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1180 =for apidoc lex_unstuff
1182 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1183 C<ptr>. Text following C<ptr> will be moved, and the buffer shortened.
1184 This hides the discarded text from any lexing code that runs later,
1185 as if the text had never appeared.
1187 This is not the normal way to consume lexed text. For that, use
1194 Perl_lex_unstuff(pTHX_ char *ptr)
1198 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1199 buf = PL_parser->bufptr;
1201 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1204 bufend = PL_parser->bufend;
1206 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1207 unstuff_len = ptr - buf;
1208 Move(ptr, buf, bufend+1-ptr, char);
1209 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1210 PL_parser->bufend = bufend - unstuff_len;
1214 =for apidoc lex_read_to
1216 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1217 to C<ptr>. This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
1218 performing the correct bookkeeping whenever a newline character is passed.
1219 This is the normal way to consume lexed text.
1221 Interpretation of the buffer's octets can be abstracted out by
1222 using the slightly higher-level functions L</lex_peek_unichar> and
1223 L</lex_read_unichar>.
1229 Perl_lex_read_to(pTHX_ char *ptr)
1232 PERL_ARGS_ASSERT_LEX_READ_TO;
1233 s = PL_parser->bufptr;
1234 if (ptr < s || ptr > PL_parser->bufend)
1235 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1236 for (; s != ptr; s++)
1238 COPLINE_INC_WITH_HERELINES;
1239 PL_parser->linestart = s+1;
1241 PL_parser->bufptr = ptr;
1245 =for apidoc lex_discard_to
1247 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1248 up to C<ptr>. The remaining content of the buffer will be moved, and
1249 all pointers into the buffer updated appropriately. C<ptr> must not
1250 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1251 it is not permitted to discard text that has yet to be lexed.
1253 Normally it is not necessarily to do this directly, because it suffices to
1254 use the implicit discarding behaviour of L</lex_next_chunk> and things
1255 based on it. However, if a token stretches across multiple lines,
1256 and the lexing code has kept multiple lines of text in the buffer for
1257 that purpose, then after completion of the token it would be wise to
1258 explicitly discard the now-unneeded earlier lines, to avoid future
1259 multi-line tokens growing the buffer without bound.
1265 Perl_lex_discard_to(pTHX_ char *ptr)
1269 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1270 buf = SvPVX(PL_parser->linestr);
1272 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1275 if (ptr > PL_parser->bufptr)
1276 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1277 discard_len = ptr - buf;
1278 if (PL_parser->oldbufptr < ptr)
1279 PL_parser->oldbufptr = ptr;
1280 if (PL_parser->oldoldbufptr < ptr)
1281 PL_parser->oldoldbufptr = ptr;
1282 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1283 PL_parser->last_uni = NULL;
1284 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1285 PL_parser->last_lop = NULL;
1286 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1287 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1288 PL_parser->bufend -= discard_len;
1289 PL_parser->bufptr -= discard_len;
1290 PL_parser->oldbufptr -= discard_len;
1291 PL_parser->oldoldbufptr -= discard_len;
1292 if (PL_parser->last_uni)
1293 PL_parser->last_uni -= discard_len;
1294 if (PL_parser->last_lop)
1295 PL_parser->last_lop -= discard_len;
1299 Perl_notify_parser_that_changed_to_utf8(pTHX)
1301 /* Called when $^H is changed to indicate that HINT_UTF8 has changed from
1302 * off to on. At compile time, this has the effect of entering a 'use
1303 * utf8' section. This means that any input was not previously checked for
1304 * UTF-8 (because it was off), but now we do need to check it, or our
1305 * assumptions about the input being sane could be wrong, and we could
1306 * segfault. This routine just sets a flag so that the next time we look
1307 * at the input we do the well-formed UTF-8 check. If we aren't in the
1308 * proper phase, there may not be a parser object, but if there is, setting
1309 * the flag is harmless */
1312 PL_parser->recheck_utf8_validity = TRUE;
1317 =for apidoc lex_next_chunk
1319 Reads in the next chunk of text to be lexed, appending it to
1320 L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1321 looked to the end of the current chunk and wants to know more. It is
1322 usual, but not necessary, for lexing to have consumed the entirety of
1323 the current chunk at this time.
1325 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1326 chunk (i.e., the current chunk has been entirely consumed), normally the
1327 current chunk will be discarded at the same time that the new chunk is
1328 read in. If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
1329 will not be discarded. If the current chunk has not been entirely
1330 consumed, then it will not be discarded regardless of the flag.
1332 Returns true if some new text was added to the buffer, or false if the
1333 buffer has reached the end of the input text.
1335 =for apidoc Amnh||LEX_KEEP_PREVIOUS
1340 #define LEX_FAKE_EOF 0x80000000
1341 #define LEX_NO_TERM 0x40000000 /* here-doc */
1344 Perl_lex_next_chunk(pTHX_ U32 flags)
1348 STRLEN old_bufend_pos, new_bufend_pos;
1349 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1350 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1351 bool got_some_for_debugger = 0;
1354 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1355 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1356 if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1358 linestr = PL_parser->linestr;
1359 buf = SvPVX(linestr);
1360 if (!(flags & LEX_KEEP_PREVIOUS)
1361 && PL_parser->bufptr == PL_parser->bufend)
1363 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1365 if (PL_parser->last_uni != PL_parser->bufend)
1366 PL_parser->last_uni = NULL;
1367 if (PL_parser->last_lop != PL_parser->bufend)
1368 PL_parser->last_lop = NULL;
1369 last_uni_pos = last_lop_pos = 0;
1371 SvCUR_set(linestr, 0);
1373 old_bufend_pos = PL_parser->bufend - buf;
1374 bufptr_pos = PL_parser->bufptr - buf;
1375 oldbufptr_pos = PL_parser->oldbufptr - buf;
1376 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1377 linestart_pos = PL_parser->linestart - buf;
1378 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1379 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1381 if (flags & LEX_FAKE_EOF) {
1383 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1385 } else if (filter_gets(linestr, old_bufend_pos)) {
1387 got_some_for_debugger = 1;
1388 } else if (flags & LEX_NO_TERM) {
1391 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1394 /* End of real input. Close filehandle (unless it was STDIN),
1395 * then add implicit termination.
1397 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1398 PerlIO_clearerr(PL_parser->rsfp);
1399 else if (PL_parser->rsfp)
1400 (void)PerlIO_close(PL_parser->rsfp);
1401 PL_parser->rsfp = NULL;
1402 PL_parser->in_pod = PL_parser->filtered = 0;
1403 if (!PL_in_eval && PL_minus_p) {
1405 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1406 PL_minus_n = PL_minus_p = 0;
1407 } else if (!PL_in_eval && PL_minus_n) {
1408 sv_catpvs(linestr, /*{*/";}");
1411 sv_catpvs(linestr, ";");
1414 buf = SvPVX(linestr);
1415 new_bufend_pos = SvCUR(linestr);
1416 PL_parser->bufend = buf + new_bufend_pos;
1417 PL_parser->bufptr = buf + bufptr_pos;
1420 const U8* first_bad_char_loc;
1421 if (UNLIKELY(! is_utf8_string_loc(
1422 (U8 *) PL_parser->bufptr,
1423 PL_parser->bufend - PL_parser->bufptr,
1424 &first_bad_char_loc)))
1426 _force_out_malformed_utf8_message(first_bad_char_loc,
1427 (U8 *) PL_parser->bufend,
1429 1 /* 1 means die */ );
1430 NOT_REACHED; /* NOTREACHED */
1434 PL_parser->oldbufptr = buf + oldbufptr_pos;
1435 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1436 PL_parser->linestart = buf + linestart_pos;
1437 if (PL_parser->last_uni)
1438 PL_parser->last_uni = buf + last_uni_pos;
1439 if (PL_parser->last_lop)
1440 PL_parser->last_lop = buf + last_lop_pos;
1441 if (PL_parser->preambling != NOLINE) {
1442 CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1443 PL_parser->preambling = NOLINE;
1445 if ( got_some_for_debugger
1446 && PERLDB_LINE_OR_SAVESRC
1447 && PL_curstash != PL_debstash)
1449 /* debugger active and we're not compiling the debugger code,
1450 * so store the line into the debugger's array of lines
1452 update_debugger_info(NULL, buf+old_bufend_pos,
1453 new_bufend_pos-old_bufend_pos);
1459 =for apidoc lex_peek_unichar
1461 Looks ahead one (Unicode) character in the text currently being lexed.
1462 Returns the codepoint (unsigned integer value) of the next character,
1463 or -1 if lexing has reached the end of the input text. To consume the
1464 peeked character, use L</lex_read_unichar>.
1466 If the next character is in (or extends into) the next chunk of input
1467 text, the next chunk will be read in. Normally the current chunk will be
1468 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1469 bit set, then the current chunk will not be discarded.
1471 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1472 is encountered, an exception is generated.
1478 Perl_lex_peek_unichar(pTHX_ U32 flags)
1481 if (flags & ~(LEX_KEEP_PREVIOUS))
1482 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1483 s = PL_parser->bufptr;
1484 bufend = PL_parser->bufend;
1490 if (!lex_next_chunk(flags))
1492 s = PL_parser->bufptr;
1493 bufend = PL_parser->bufend;
1496 if (UTF8_IS_INVARIANT(head))
1498 if (UTF8_IS_START(head)) {
1499 len = UTF8SKIP(&head);
1500 while ((STRLEN)(bufend-s) < len) {
1501 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1503 s = PL_parser->bufptr;
1504 bufend = PL_parser->bufend;
1507 unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1508 if (retlen == (STRLEN)-1) {
1509 _force_out_malformed_utf8_message((U8 *) s,
1512 1 /* 1 means die */ );
1513 NOT_REACHED; /* NOTREACHED */
1518 if (!lex_next_chunk(flags))
1520 s = PL_parser->bufptr;
1527 =for apidoc lex_read_unichar
1529 Reads the next (Unicode) character in the text currently being lexed.
1530 Returns the codepoint (unsigned integer value) of the character read,
1531 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1532 if lexing has reached the end of the input text. To non-destructively
1533 examine the next character, use L</lex_peek_unichar> instead.
1535 If the next character is in (or extends into) the next chunk of input
1536 text, the next chunk will be read in. Normally the current chunk will be
1537 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1538 bit set, then the current chunk will not be discarded.
1540 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1541 is encountered, an exception is generated.
1547 Perl_lex_read_unichar(pTHX_ U32 flags)
1550 if (flags & ~(LEX_KEEP_PREVIOUS))
1551 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1552 c = lex_peek_unichar(flags);
1555 COPLINE_INC_WITH_HERELINES;
1557 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1559 ++(PL_parser->bufptr);
1565 =for apidoc lex_read_space
1567 Reads optional spaces, in Perl style, in the text currently being
1568 lexed. The spaces may include ordinary whitespace characters and
1569 Perl-style comments. C<#line> directives are processed if encountered.
1570 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1571 at a non-space character (or the end of the input text).
1573 If spaces extend into the next chunk of input text, the next chunk will
1574 be read in. Normally the current chunk will be discarded at the same
1575 time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
1576 chunk will not be discarded.
1581 #define LEX_NO_INCLINE 0x40000000
1582 #define LEX_NO_NEXT_CHUNK 0x80000000
1585 Perl_lex_read_space(pTHX_ U32 flags)
1588 const bool can_incline = !(flags & LEX_NO_INCLINE);
1589 bool need_incline = 0;
1590 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1591 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1592 s = PL_parser->bufptr;
1593 bufend = PL_parser->bufend;
1599 } while (!(c == '\n' || (c == 0 && s == bufend)));
1600 } else if (c == '\n') {
1603 PL_parser->linestart = s;
1609 } else if (isSPACE(c)) {
1611 } else if (c == 0 && s == bufend) {
1614 if (flags & LEX_NO_NEXT_CHUNK)
1616 PL_parser->bufptr = s;
1617 l = CopLINE(PL_curcop);
1618 CopLINE(PL_curcop) += PL_parser->herelines + 1;
1619 got_more = lex_next_chunk(flags);
1620 CopLINE_set(PL_curcop, l);
1621 s = PL_parser->bufptr;
1622 bufend = PL_parser->bufend;
1625 if (can_incline && need_incline && PL_parser->rsfp) {
1635 PL_parser->bufptr = s;
1640 =for apidoc validate_proto
1642 This function performs syntax checking on a prototype, C<proto>.
1643 If C<warn> is true, any illegal characters or mismatched brackets
1644 will trigger illegalproto warnings, declaring that they were
1645 detected in the prototype for C<name>.
1647 The return value is C<true> if this is a valid prototype, and
1648 C<false> if it is not, regardless of whether C<warn> was C<true> or
1651 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1658 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash)
1660 STRLEN len, origlen;
1662 bool bad_proto = FALSE;
1663 bool in_brackets = FALSE;
1664 bool after_slash = FALSE;
1665 char greedy_proto = ' ';
1666 bool proto_after_greedy_proto = FALSE;
1667 bool must_be_last = FALSE;
1668 bool underscore = FALSE;
1669 bool bad_proto_after_underscore = FALSE;
1671 PERL_ARGS_ASSERT_VALIDATE_PROTO;
1676 p = SvPV(proto, len);
1678 for (; len--; p++) {
1681 proto_after_greedy_proto = TRUE;
1683 if (!memCHRs(";@%", *p))
1684 bad_proto_after_underscore = TRUE;
1687 if (!memCHRs("$@%*;[]&\\_+", *p) || *p == '\0') {
1694 in_brackets = FALSE;
1695 else if ((*p == '@' || *p == '%')
1699 must_be_last = TRUE;
1708 after_slash = FALSE;
1713 SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1716 ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1717 origlen, UNI_DISPLAY_ISPRINT)
1718 : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1720 if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) {
1721 SV *name2 = sv_2mortal(newSVsv(PL_curstname));
1722 sv_catpvs(name2, "::");
1723 sv_catsv(name2, (SV *)name);
1727 if (proto_after_greedy_proto)
1728 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1729 "Prototype after '%c' for %" SVf " : %s",
1730 greedy_proto, SVfARG(name), p);
1732 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1733 "Missing ']' in prototype for %" SVf " : %s",
1736 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1737 "Illegal character in prototype for %" SVf " : %s",
1739 if (bad_proto_after_underscore)
1740 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1741 "Illegal character after '_' in prototype for %" SVf " : %s",
1745 return (! (proto_after_greedy_proto || bad_proto) );
1750 * This subroutine has nothing to do with tilting, whether at windmills
1751 * or pinball tables. Its name is short for "increment line". It
1752 * increments the current line number in CopLINE(PL_curcop) and checks
1753 * to see whether the line starts with a comment of the form
1754 * # line 500 "foo.pm"
1755 * If so, it sets the current line number and file to the values in the comment.
1759 S_incline(pTHX_ const char *s, const char *end)
1767 PERL_ARGS_ASSERT_INCLINE;
1771 COPLINE_INC_WITH_HERELINES;
1772 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1773 && s+1 == PL_bufend && *s == ';') {
1774 /* fake newline in string eval */
1775 CopLINE_dec(PL_curcop);
1780 while (SPACE_OR_TAB(*s))
1782 if (memBEGINs(s, (STRLEN) (end - s), "line"))
1783 s += sizeof("line") - 1;
1786 if (SPACE_OR_TAB(*s))
1790 while (SPACE_OR_TAB(*s))
1798 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1800 while (SPACE_OR_TAB(*s))
1802 if (*s == '"' && (t = (char *) memchr(s+1, '"', end - s))) {
1808 while (*t && !isSPACE(*t))
1812 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1814 if (*e != '\n' && *e != '\0')
1815 return; /* false alarm */
1817 if (!grok_atoUV(n, &uv, &e))
1819 line_num = ((line_t)uv) - 1;
1822 const STRLEN len = t - s;
1824 if (!PL_rsfp && !PL_parser->filtered) {
1825 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1826 * to *{"::_<newfilename"} */
1827 /* However, the long form of evals is only turned on by the
1828 debugger - usually they're "(eval %lu)" */
1829 GV * const cfgv = CopFILEGV(PL_curcop);
1832 STRLEN tmplen2 = len;
1836 if (tmplen2 + 2 <= sizeof smallbuf)
1839 Newx(tmpbuf2, tmplen2 + 2, char);
1844 memcpy(tmpbuf2 + 2, s, tmplen2);
1847 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1849 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1850 /* adjust ${"::_<newfilename"} to store the new file name */
1851 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1852 /* The line number may differ. If that is the case,
1853 alias the saved lines that are in the array.
1854 Otherwise alias the whole array. */
1855 if (CopLINE(PL_curcop) == line_num) {
1856 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1857 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1859 else if (GvAV(cfgv)) {
1860 AV * const av = GvAV(cfgv);
1861 const line_t start = CopLINE(PL_curcop)+1;
1862 SSize_t items = AvFILLp(av) - start;
1864 AV * const av2 = GvAVn(gv2);
1865 SV **svp = AvARRAY(av) + start;
1866 Size_t l = line_num+1;
1867 while (items-- && l < SSize_t_MAX && l == (line_t)l)
1868 av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++));
1873 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1876 CopFILE_free(PL_curcop);
1877 CopFILE_setn(PL_curcop, s, len);
1879 CopLINE_set(PL_curcop, line_num);
1883 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1885 AV *av = CopFILEAVx(PL_curcop);
1888 if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1890 sv = *av_fetch(av, 0, 1);
1891 SvUPGRADE(sv, SVt_PVMG);
1893 if (!SvPOK(sv)) SvPVCLEAR(sv);
1895 sv_catsv(sv, orig_sv);
1897 sv_catpvn(sv, buf, len);
1902 if (PL_parser->preambling == NOLINE)
1903 av_store(av, CopLINE(PL_curcop), sv);
1909 * Called to gobble the appropriate amount and type of whitespace.
1910 * Skips comments as well.
1911 * Returns the next character after the whitespace that is skipped.
1914 * Same thing, but look ahead without incrementing line numbers or
1915 * adjusting PL_linestart.
1918 #define skipspace(s) skipspace_flags(s, 0)
1919 #define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
1922 Perl_skipspace_flags(pTHX_ char *s, U32 flags)
1924 PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1925 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1926 while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
1929 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1931 lex_read_space(flags | LEX_KEEP_PREVIOUS |
1932 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
1933 LEX_NO_NEXT_CHUNK : 0));
1935 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1936 if (PL_linestart > PL_bufptr)
1937 PL_bufptr = PL_linestart;
1945 * Check the unary operators to ensure there's no ambiguity in how they're
1946 * used. An ambiguous piece of code would be:
1948 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1949 * the +5 is its argument.
1957 if (PL_oldoldbufptr != PL_last_uni)
1959 while (isSPACE(*PL_last_uni))
1962 while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
1963 s += UTF ? UTF8SKIP(s) : 1;
1964 if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s))
1967 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1968 "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
1969 UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
1973 * LOP : macro to build a list operator. Its behaviour has been replaced
1974 * with a subroutine, S_lop() for which LOP is just another name.
1977 #define LOP(f,x) return lop(f,x,s)
1981 * Build a list operator (or something that might be one). The rules:
1982 * - if we have a next token, then it's a list operator (no parens) for
1983 * which the next token has already been parsed; e.g.,
1986 * - if the next thing is an opening paren, then it's a function
1987 * - else it's a list operator
1991 S_lop(pTHX_ I32 f, U8 x, char *s)
1993 PERL_ARGS_ASSERT_LOP;
1998 PL_last_lop = PL_oldbufptr;
1999 PL_last_lop_op = (OPCODE)f;
2004 return REPORT(FUNC);
2007 return REPORT(FUNC);
2010 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
2011 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2012 return REPORT(LSTOP);
2018 * When the lexer realizes it knows the next token (for instance,
2019 * it is reordering tokens for the parser) then it can call S_force_next
2020 * to know what token to return the next time the lexer is called. Caller
2021 * will need to set PL_nextval[] and possibly PL_expect to ensure
2022 * the lexer handles the token correctly.
2026 S_force_next(pTHX_ I32 type)
2030 PerlIO_printf(Perl_debug_log, "### forced token:\n");
2031 tokereport(type, &NEXTVAL_NEXTTOKE);
2034 assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
2035 PL_nexttype[PL_nexttoke] = type;
2042 * This subroutine handles postfix deref syntax after the arrow has already
2043 * been emitted. @* $* etc. are emitted as two separate tokens right here.
2044 * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2045 * only the first, leaving yylex to find the next.
2049 S_postderef(pTHX_ int const funny, char const next)
2051 assert(funny == DOLSHARP
2052 || memCHRs("$@%&*", funny)
2053 || funny == PERLY_SNAIL
2054 || funny == PERLY_AMPERSAND
2057 PL_expect = XOPERATOR;
2058 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2059 assert(PERLY_SNAIL == funny || '$' == funny || DOLSHARP == funny);
2060 PL_lex_state = LEX_INTERPEND;
2061 if (PERLY_SNAIL == funny)
2062 force_next(POSTJOIN);
2068 if (PERLY_SNAIL == funny && PL_lex_state == LEX_INTERPNORMAL
2069 && !PL_lex_brackets)
2071 PL_expect = XOPERATOR;
2080 int yyc = PL_parser->yychar;
2081 if (yyc != YYEMPTY) {
2083 NEXTVAL_NEXTTOKE = PL_parser->yylval;
2084 if (yyc == PERLY_BRACE_OPEN || yyc == HASHBRACK || yyc == PERLY_BRACKET_OPEN) {
2085 PL_lex_allbrackets--;
2087 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2088 } else if (yyc == '('/*)*/) {
2089 PL_lex_allbrackets--;
2094 PL_parser->yychar = YYEMPTY;
2099 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2101 SV * const sv = newSVpvn_utf8(start, len,
2105 && is_utf8_non_invariant_string((const U8*)start, len));
2111 * When the lexer knows the next thing is a word (for instance, it has
2112 * just seen -> and it knows that the next char is a word char, then
2113 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2117 * char *start : buffer position (must be within PL_linestr)
2118 * int token : PL_next* will be this type of bare word
2119 * (e.g., METHOD,BAREWORD)
2120 * int check_keyword : if true, Perl checks to make sure the word isn't
2121 * a keyword (do this if the word is a label, e.g. goto FOO)
2122 * int allow_pack : if true, : characters will also be allowed (require,
2123 * use, etc. do this)
2127 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2132 PERL_ARGS_ASSERT_FORCE_WORD;
2134 start = skipspace(start);
2136 if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
2137 || (allow_pack && *s == ':' && s[1] == ':') )
2139 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2140 if (check_keyword) {
2141 char *s2 = PL_tokenbuf;
2143 if (allow_pack && memBEGINPs(s2, len, "CORE::")) {
2144 s2 += sizeof("CORE::") - 1;
2145 len2 -= sizeof("CORE::") - 1;
2147 if (keyword(s2, len2, 0))
2150 if (token == METHOD) {
2155 PL_expect = XOPERATOR;
2158 NEXTVAL_NEXTTOKE.opval
2159 = newSVOP(OP_CONST,0,
2160 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2161 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2169 * Called when the lexer wants $foo *foo &foo etc, but the program
2170 * text only contains the "foo" portion. The first argument is a pointer
2171 * to the "foo", and the second argument is the type symbol to prefix.
2172 * Forces the next token to be a "BAREWORD".
2173 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2177 S_force_ident(pTHX_ const char *s, int kind)
2179 PERL_ARGS_ASSERT_FORCE_IDENT;
2182 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2183 OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2184 UTF ? SVf_UTF8 : 0));
2185 NEXTVAL_NEXTTOKE.opval = o;
2186 force_next(BAREWORD);
2188 o->op_private = OPpCONST_ENTERED;
2189 /* XXX see note in pp_entereval() for why we forgo typo
2190 warnings if the symbol must be introduced in an eval.
2192 gv_fetchpvn_flags(s, len,
2193 (PL_in_eval ? GV_ADDMULTI
2194 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2195 kind == '$' ? SVt_PV :
2196 kind == PERLY_SNAIL ? SVt_PVAV :
2197 kind == '%' ? SVt_PVHV :
2205 S_force_ident_maybe_lex(pTHX_ char pit)
2207 NEXTVAL_NEXTTOKE.ival = pit;
2212 Perl_str_to_version(pTHX_ SV *sv)
2217 const char *start = SvPV_const(sv,len);
2218 const char * const end = start + len;
2219 const bool utf = cBOOL(SvUTF8(sv));
2221 PERL_ARGS_ASSERT_STR_TO_VERSION;
2223 while (start < end) {
2227 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2232 retval += ((NV)n)/nshift;
2241 * Forces the next token to be a version number.
2242 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2243 * and if "guessing" is TRUE, then no new token is created (and the caller
2244 * must use an alternative parsing method).
2248 S_force_version(pTHX_ char *s, int guessing)
2253 PERL_ARGS_ASSERT_FORCE_VERSION;
2261 while (isDIGIT(*d) || *d == '_' || *d == '.')
2263 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2265 s = scan_num(s, &pl_yylval);
2266 version = pl_yylval.opval;
2267 ver = cSVOPx(version)->op_sv;
2268 if (SvPOK(ver) && !SvNIOK(ver)) {
2269 SvUPGRADE(ver, SVt_PVNV);
2270 SvNV_set(ver, str_to_version(ver));
2271 SvNOK_on(ver); /* hint that it is a version */
2274 else if (guessing) {
2279 /* NOTE: The parser sees the package name and the VERSION swapped */
2280 NEXTVAL_NEXTTOKE.opval = version;
2281 force_next(BAREWORD);
2287 * S_force_strict_version
2288 * Forces the next token to be a version number using strict syntax rules.
2292 S_force_strict_version(pTHX_ char *s)
2295 const char *errstr = NULL;
2297 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2299 while (isSPACE(*s)) /* leading whitespace */
2302 if (is_STRICT_VERSION(s,&errstr)) {
2304 s = (char *)scan_version(s, ver, 0);
2305 version = newSVOP(OP_CONST, 0, ver);
2307 else if ((*s != ';' && *s != '{' && *s != '}' )
2308 && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
2312 yyerror(errstr); /* version required */
2316 /* NOTE: The parser sees the package name and the VERSION swapped */
2317 NEXTVAL_NEXTTOKE.opval = version;
2318 force_next(BAREWORD);
2325 * Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv',
2326 * modified as necessary. However, if HINT_NEW_STRING is on, 'sv' is
2327 * unchanged, and a new SV containing the modified input is returned.
2331 S_tokeq(pTHX_ SV *sv)
2338 PERL_ARGS_ASSERT_TOKEQ;
2342 assert (!SvIsCOW(sv));
2343 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2347 /* This is relying on the SV being "well formed" with a trailing '\0' */
2348 while (s < send && !(*s == '\\' && s[1] == '\\'))
2353 if ( PL_hints & HINT_NEW_STRING ) {
2354 pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2355 SVs_TEMP | SvUTF8(sv));
2359 if (s + 1 < send && (s[1] == '\\'))
2360 s++; /* all that, just for this */
2365 SvCUR_set(sv, d - SvPVX_const(sv));
2367 if ( PL_hints & HINT_NEW_STRING )
2368 return new_constant(NULL, 0, "q", sv, pv, "q", 1, NULL);
2373 * Now come three functions related to double-quote context,
2374 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2375 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2376 * interact with PL_lex_state, and create fake ( ... ) argument lists
2377 * to handle functions and concatenation.
2381 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
2386 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2388 * Pattern matching will set PL_lex_op to the pattern-matching op to
2389 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2391 * OP_CONST is easy--just make the new op and return.
2393 * Everything else becomes a FUNC.
2395 * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we
2396 * had an OP_CONST. This just sets us up for a
2397 * call to S_sublex_push().
2401 S_sublex_start(pTHX)
2403 const I32 op_type = pl_yylval.ival;
2405 if (op_type == OP_NULL) {
2406 pl_yylval.opval = PL_lex_op;
2410 if (op_type == OP_CONST) {
2411 SV *sv = PL_lex_stuff;
2412 PL_lex_stuff = NULL;
2415 if (SvTYPE(sv) == SVt_PVIV) {
2416 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2418 const char * const p = SvPV_const(sv, len);
2419 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2423 pl_yylval.opval = newSVOP(op_type, 0, sv);
2427 PL_parser->lex_super_state = PL_lex_state;
2428 PL_parser->lex_sub_inwhat = (U16)op_type;
2429 PL_parser->lex_sub_op = PL_lex_op;
2430 PL_parser->sub_no_recover = FALSE;
2431 PL_parser->sub_error_count = PL_error_count;
2432 PL_lex_state = LEX_INTERPPUSH;
2436 pl_yylval.opval = PL_lex_op;
2446 * Create a new scope to save the lexing state. The scope will be
2447 * ended in S_sublex_done. Returns a '(', starting the function arguments
2448 * to the uc, lc, etc. found before.
2449 * Sets PL_lex_state to LEX_INTERPCONCAT.
2456 const bool is_heredoc = PL_multi_close == '<';
2459 PL_lex_state = PL_parser->lex_super_state;
2460 SAVEI8(PL_lex_dojoin);
2461 SAVEI32(PL_lex_brackets);
2462 SAVEI32(PL_lex_allbrackets);
2463 SAVEI32(PL_lex_formbrack);
2464 SAVEI8(PL_lex_fakeeof);
2465 SAVEI32(PL_lex_casemods);
2466 SAVEI32(PL_lex_starts);
2467 SAVEI8(PL_lex_state);
2468 SAVESPTR(PL_lex_repl);
2469 SAVEVPTR(PL_lex_inpat);
2470 SAVEI16(PL_lex_inwhat);
2473 SAVECOPLINE(PL_curcop);
2474 SAVEI32(PL_multi_end);
2475 SAVEI32(PL_parser->herelines);
2476 PL_parser->herelines = 0;
2478 SAVEIV(PL_multi_close);
2479 SAVEPPTR(PL_bufptr);
2480 SAVEPPTR(PL_bufend);
2481 SAVEPPTR(PL_oldbufptr);
2482 SAVEPPTR(PL_oldoldbufptr);
2483 SAVEPPTR(PL_last_lop);
2484 SAVEPPTR(PL_last_uni);
2485 SAVEPPTR(PL_linestart);
2486 SAVESPTR(PL_linestr);
2487 SAVEGENERICPV(PL_lex_brackstack);
2488 SAVEGENERICPV(PL_lex_casestack);
2489 SAVEGENERICPV(PL_parser->lex_shared);
2490 SAVEBOOL(PL_parser->lex_re_reparsing);
2491 SAVEI32(PL_copline);
2493 /* The here-doc parser needs to be able to peek into outer lexing
2494 scopes to find the body of the here-doc. So we put PL_linestr and
2495 PL_bufptr into lex_shared, to ‘share’ those values.
2497 PL_parser->lex_shared->ls_linestr = PL_linestr;
2498 PL_parser->lex_shared->ls_bufptr = PL_bufptr;
2500 PL_linestr = PL_lex_stuff;
2501 PL_lex_repl = PL_parser->lex_sub_repl;
2502 PL_lex_stuff = NULL;
2503 PL_parser->lex_sub_repl = NULL;
2505 /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2506 set for an inner quote-like operator and then an error causes scope-
2507 popping. We must not have a PL_lex_stuff value left dangling, as
2508 that breaks assumptions elsewhere. See bug #123617. */
2509 SAVEGENERICSV(PL_lex_stuff);
2510 SAVEGENERICSV(PL_parser->lex_sub_repl);
2512 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2513 = SvPVX(PL_linestr);
2514 PL_bufend += SvCUR(PL_linestr);
2515 PL_last_lop = PL_last_uni = NULL;
2516 SAVEFREESV(PL_linestr);
2517 if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2519 PL_lex_dojoin = FALSE;
2520 PL_lex_brackets = PL_lex_formbrack = 0;
2521 PL_lex_allbrackets = 0;
2522 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2523 Newx(PL_lex_brackstack, 120, char);
2524 Newx(PL_lex_casestack, 12, char);
2525 PL_lex_casemods = 0;
2526 *PL_lex_casestack = '\0';
2528 PL_lex_state = LEX_INTERPCONCAT;
2530 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2531 PL_copline = NOLINE;
2533 Newxz(shared, 1, LEXSHARED);
2534 shared->ls_prev = PL_parser->lex_shared;
2535 PL_parser->lex_shared = shared;
2537 PL_lex_inwhat = PL_parser->lex_sub_inwhat;
2538 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2539 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2540 PL_lex_inpat = PL_parser->lex_sub_op;
2542 PL_lex_inpat = NULL;
2544 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2545 PL_in_eval &= ~EVAL_RE_REPARSING;
2552 * Restores lexer state after a S_sublex_push.
2558 if (!PL_lex_starts++) {
2559 SV * const sv = newSVpvs("");
2560 if (SvUTF8(PL_linestr))
2562 PL_expect = XOPERATOR;
2563 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
2567 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2568 PL_lex_state = LEX_INTERPCASEMOD;
2572 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2573 assert(PL_lex_inwhat != OP_TRANSR);
2575 assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2576 PL_linestr = PL_lex_repl;
2578 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2579 PL_bufend += SvCUR(PL_linestr);
2580 PL_last_lop = PL_last_uni = NULL;
2581 PL_lex_dojoin = FALSE;
2582 PL_lex_brackets = 0;
2583 PL_lex_allbrackets = 0;
2584 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2585 PL_lex_casemods = 0;
2586 *PL_lex_casestack = '\0';
2588 if (SvEVALED(PL_lex_repl)) {
2589 PL_lex_state = LEX_INTERPNORMAL;
2591 /* we don't clear PL_lex_repl here, so that we can check later
2592 whether this is an evalled subst; that means we rely on the
2593 logic to ensure sublex_done() is called again only via the
2594 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2597 PL_lex_state = LEX_INTERPCONCAT;
2600 if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2601 CopLINE(PL_curcop) +=
2602 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines
2603 + PL_parser->herelines;
2604 PL_parser->herelines = 0;
2609 const line_t l = CopLINE(PL_curcop);
2611 if (PL_parser->sub_error_count != PL_error_count) {
2612 if (PL_parser->sub_no_recover) {
2617 if (PL_multi_close == '<')
2618 PL_parser->herelines += l - PL_multi_end;
2619 PL_bufend = SvPVX(PL_linestr);
2620 PL_bufend += SvCUR(PL_linestr);
2621 PL_expect = XOPERATOR;
2627 Perl_load_charnames(pTHX_ SV * char_name, const char * context,
2628 const STRLEN context_len, const char ** error_msg)
2630 /* Load the official _charnames module if not already there. The
2631 * parameters are just to give info for any error messages generated:
2632 * char_name a name to look up which is the reason for loading this
2633 * context 'char_name' in the context in the input in which it appears
2634 * context_len how many bytes 'context' occupies
2635 * error_msg *error_msg will be set to any error
2637 * Returns the ^H table if success; otherwise NULL */
2644 PERL_ARGS_ASSERT_LOAD_CHARNAMES;
2646 /* This loop is executed 1 1/2 times. On the first time through, if it
2647 * isn't already loaded, try loading it, and iterate just once to see if it
2649 for (i = 0; i < 2; i++) {
2650 table = GvHV(PL_hintgv); /* ^H */
2653 && (PL_hints & HINT_LOCALIZE_HH)
2654 && (cvp = hv_fetchs(table, "charnames", FALSE))
2657 return table; /* Quit if already loaded */
2661 Perl_load_module(aTHX_
2663 newSVpvs("_charnames"),
2665 /* version parameter; no need to specify it, as if we get too early
2666 * a version, will fail anyway, not being able to find 'charnames'
2675 /* Here, it failed; new_constant will give appropriate error messages */
2677 res = new_constant( NULL, 0, "charnames", char_name, NULL,
2678 context, context_len, error_msg);
2685 S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e)
2687 /* This justs wraps get_and_check_backslash_N_name() to output any error
2688 * message it returns. */
2690 const char * error_msg = NULL;
2693 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER;
2695 /* charnames doesn't work well if there have been errors found */
2696 if (PL_error_count > 0) {
2700 result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg);
2703 yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0);
2710 Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
2711 const char* const e,
2713 const char ** error_msg)
2715 /* <s> points to first character of interior of \N{}, <e> to one beyond the
2716 * interior, hence to the "}". Finds what the name resolves to, returning
2717 * an SV* containing it; NULL if no valid one found.
2719 * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it
2720 * doesn't have to be. */
2730 /* Points to the beginning of the \N{... so that any messages include the
2731 * context of what's failing*/
2732 const char* context = s - 3;
2733 STRLEN context_len = e - context + 1; /* include all of \N{...} */
2736 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2739 assert(s > (char *) 3);
2741 char_name = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0);
2743 if (!SvCUR(char_name)) {
2744 SvREFCNT_dec_NN(char_name);
2745 /* diag_listed_as: Unknown charname '%s' */
2746 *error_msg = Perl_form(aTHX_ "Unknown charname ''");
2750 /* Autoload the charnames module */
2752 table = load_charnames(char_name, context, context_len, error_msg);
2753 if (table == NULL) {
2758 res = new_constant( NULL, 0, "charnames", char_name, NULL,
2759 context, context_len, error_msg);
2761 *error_msg = Perl_form(aTHX_ "Unknown charname '%s'", SvPVX(char_name));
2767 /* See if the charnames handler is the Perl core's, and if so, we can skip
2768 * the validation needed for a user-supplied one, as Perl's does its own
2770 cvp = hv_fetchs(table, "charnames", FALSE);
2771 if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2772 SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2774 const char * const name = HvNAME(stash);
2775 if (memEQs(name, HvNAMELEN(stash), "_charnames")) {
2780 /* Here, it isn't Perl's charname handler. We can't rely on a
2781 * user-supplied handler to validate the input name. For non-ut8 input,
2782 * look to see that the first character is legal. Then loop through the
2783 * rest checking that each is a continuation */
2785 /* This code makes the reasonable assumption that the only Latin1-range
2786 * characters that begin a character name alias are alphabetic, otherwise
2787 * would have to create a isCHARNAME_BEGIN macro */
2790 if (! isALPHAU(*s)) {
2795 if (! isCHARNAME_CONT(*s)) {
2798 if (*s == ' ' && *(s-1) == ' ') {
2805 /* Similarly for utf8. For invariants can check directly; for other
2806 * Latin1, can calculate their code point and check; otherwise use an
2808 if (UTF8_IS_INVARIANT(*s)) {
2809 if (! isALPHAU(*s)) {
2813 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2814 if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
2820 if (! _invlist_contains_cp(PL_utf8_charname_begin,
2821 utf8_to_uvchr_buf((U8 *) s,
2831 if (UTF8_IS_INVARIANT(*s)) {
2832 if (! isCHARNAME_CONT(*s)) {
2835 if (*s == ' ' && *(s-1) == ' ') {
2840 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2841 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
2848 if (! _invlist_contains_cp(PL_utf8_charname_continue,
2849 utf8_to_uvchr_buf((U8 *) s,
2859 if (*(s-1) == ' ') {
2860 /* diag_listed_as: charnames alias definitions may not contain
2861 trailing white-space; marked by <-- HERE in %s
2863 *error_msg = Perl_form(aTHX_
2864 "charnames alias definitions may not contain trailing "
2865 "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2866 (int)(s - context + 1), context,
2867 (int)(e - s + 1), s + 1);
2871 if (SvUTF8(res)) { /* Don't accept malformed charname value */
2872 const U8* first_bad_char_loc;
2874 const char* const str = SvPV_const(res, len);
2875 if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len,
2876 &first_bad_char_loc)))
2878 _force_out_malformed_utf8_message(first_bad_char_loc,
2879 (U8 *) PL_parser->bufend,
2881 0 /* 0 means don't die */ );
2882 /* diag_listed_as: Malformed UTF-8 returned by \N{%s}
2883 immediately after '%s' */
2884 *error_msg = Perl_form(aTHX_
2885 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2886 (int) context_len, context,
2887 (int) ((char *) first_bad_char_loc - str), str);
2896 /* The final %.*s makes sure that should the trailing NUL be missing
2897 * that this print won't run off the end of the string */
2898 /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
2900 *error_msg = Perl_form(aTHX_
2901 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2902 (int)(s - context + 1), context,
2903 (int)(e - s + 1), s + 1);
2908 /* diag_listed_as: charnames alias definitions may not contain a
2909 sequence of multiple spaces; marked by <-- HERE
2911 *error_msg = Perl_form(aTHX_
2912 "charnames alias definitions may not contain a sequence of "
2913 "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
2914 (int)(s - context + 1), context,
2915 (int)(e - s + 1), s + 1);
2922 Extracts the next constant part of a pattern, double-quoted string,
2923 or transliteration. This is terrifying code.
2925 For example, in parsing the double-quoted string "ab\x63$d", it would
2926 stop at the '$' and return an OP_CONST containing 'abc'.
2928 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2929 processing a pattern (PL_lex_inpat is true), a transliteration
2930 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2932 Returns a pointer to the character scanned up to. If this is
2933 advanced from the start pointer supplied (i.e. if anything was
2934 successfully parsed), will leave an OP_CONST for the substring scanned
2935 in pl_yylval. Caller must intuit reason for not parsing further
2936 by looking at the next characters herself.
2940 \N{FOO} => \N{U+hex_for_character_FOO}
2941 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2944 all other \-char, including \N and \N{ apart from \N{ABC}
2947 @ and $ where it appears to be a var, but not for $ as tail anchor
2951 In transliterations:
2952 characters are VERY literal, except for - not at the start or end
2953 of the string, which indicates a range. However some backslash sequences
2954 are recognized: \r, \n, and the like
2955 \007 \o{}, \x{}, \N{}
2956 If all elements in the transliteration are below 256,
2957 scan_const expands the range to the full set of intermediate
2958 characters. If the range is in utf8, the hyphen is replaced with
2959 a certain range mark which will be handled by pmtrans() in op.c.
2961 In double-quoted strings:
2963 all those recognized in transliterations
2964 deprecated backrefs: \1 (in substitution replacements)
2965 case and quoting: \U \Q \E
2968 scan_const does *not* construct ops to handle interpolated strings.
2969 It stops processing as soon as it finds an embedded $ or @ variable
2970 and leaves it to the caller to work out what's going on.
2972 embedded arrays (whether in pattern or not) could be:
2973 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2975 $ in double-quoted strings must be the symbol of an embedded scalar.
2977 $ in pattern could be $foo or could be tail anchor. Assumption:
2978 it's a tail anchor if $ is the last thing in the string, or if it's
2979 followed by one of "()| \r\n\t"
2981 \1 (backreferences) are turned into $1 in substitutions
2983 The structure of the code is
2984 while (there's a character to process) {
2985 handle transliteration ranges
2986 skip regexp comments /(?#comment)/ and codes /(?{code})/
2987 skip #-initiated comments in //x patterns
2988 check for embedded arrays
2989 check for embedded scalars
2991 deprecate \1 in substitution replacements
2992 handle string-changing backslashes \l \U \Q \E, etc.
2993 switch (what was escaped) {
2994 handle \- in a transliteration (becomes a literal -)
2995 if a pattern and not \N{, go treat as regular character
2996 handle \132 (octal characters)
2997 handle \x15 and \x{1234} (hex characters)
2998 handle \N{name} (named characters, also \N{3,5} in a pattern)
2999 handle \cV (control characters)
3000 handle printf-style backslashes (\f, \r, \n, etc)
3003 } (end if backslash)
3004 handle regular character
3005 } (end while character to read)
3010 S_scan_const(pTHX_ char *start)
3012 char *send = PL_bufend; /* end of the constant */
3013 SV *sv = newSV(send - start); /* sv for the constant. See note below
3015 char *s = start; /* start of the constant */
3016 char *d = SvPVX(sv); /* destination for copies */
3017 bool dorange = FALSE; /* are we in a translit range? */
3018 bool didrange = FALSE; /* did we just finish a range? */
3019 bool in_charclass = FALSE; /* within /[...]/ */
3020 bool s_is_utf8 = cBOOL(UTF); /* Is the source string assumed to be
3021 UTF8? But, this can show as true
3022 when the source isn't utf8, as for
3023 example when it is entirely composed
3025 bool d_is_utf8 = FALSE; /* Output constant is UTF8 */
3026 STRLEN utf8_variant_count = 0; /* When not in UTF-8, this counts the
3027 number of characters found so far
3028 that will expand (into 2 bytes)
3029 should we have to convert to
3031 SV *res; /* result from charnames */
3032 STRLEN offset_to_max = 0; /* The offset in the output to where the range
3033 high-end character is temporarily placed */
3035 /* Does something require special handling in tr/// ? This avoids extra
3036 * work in a less likely case. As such, khw didn't feel it was worth
3037 * adding any branches to the more mainline code to handle this, which
3038 * means that this doesn't get set in some circumstances when things like
3039 * \x{100} get expanded out. As a result there needs to be extra testing
3040 * done in the tr code */
3041 bool has_above_latin1 = FALSE;
3043 /* Note on sizing: The scanned constant is placed into sv, which is
3044 * initialized by newSV() assuming one byte of output for every byte of
3045 * input. This routine expects newSV() to allocate an extra byte for a
3046 * trailing NUL, which this routine will append if it gets to the end of
3047 * the input. There may be more bytes of input than output (eg., \N{LATIN
3048 * CAPITAL LETTER A}), or more output than input if the constant ends up
3049 * recoded to utf8, but each time a construct is found that might increase
3050 * the needed size, SvGROW() is called. Its size parameter each time is
3051 * based on the best guess estimate at the time, namely the length used so
3052 * far, plus the length the current construct will occupy, plus room for
3053 * the trailing NUL, plus one byte for every input byte still unscanned */
3055 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3058 int backslash_N = 0; /* ? was the character from \N{} */
3059 int non_portable_endpoint = 0; /* ? In a range is an endpoint
3060 platform-specific like \x65 */
3063 PERL_ARGS_ASSERT_SCAN_CONST;
3065 assert(PL_lex_inwhat != OP_TRANSR);
3067 /* Protect sv from errors and fatal warnings. */
3068 ENTER_with_name("scan_const");
3071 /* A bunch of code in the loop below assumes that if s[n] exists and is not
3072 * NUL, then s[n+1] exists. This assertion makes sure that assumption is
3074 assert(*send == '\0');
3077 || dorange /* Handle tr/// range at right edge of input */
3080 /* get transliterations out of the way (they're most literal) */
3081 if (PL_lex_inwhat == OP_TRANS) {
3083 /* But there isn't any special handling necessary unless there is a
3084 * range, so for most cases we just drop down and handle the value
3085 * as any other. There are two exceptions.
3087 * 1. A hyphen indicates that we are actually going to have a
3088 * range. In this case, skip the '-', set a flag, then drop
3089 * down to handle what should be the end range value.
3090 * 2. After we've handled that value, the next time through, that
3091 * flag is set and we fix up the range.
3093 * Ranges entirely within Latin1 are expanded out entirely, in
3094 * order to make the transliteration a simple table look-up.
3095 * Ranges that extend above Latin1 have to be done differently, so
3096 * there is no advantage to expanding them here, so they are
3097 * stored here as Min, RANGE_INDICATOR, Max. 'RANGE_INDICATOR' is
3098 * a byte that can't occur in legal UTF-8, and hence can signify a
3099 * hyphen without any possible ambiguity. On EBCDIC machines, if
3100 * the range is expressed as Unicode, the Latin1 portion is
3101 * expanded out even if the range extends above Latin1. This is
3102 * because each code point in it has to be processed here
3103 * individually to get its native translation */
3107 /* Here, we don't think we're in a range. If the new character
3108 * is not a hyphen; or if it is a hyphen, but it's too close to
3109 * either edge to indicate a range, or if we haven't output any
3110 * characters yet then it's a regular character. */
3111 if (*s != '-' || s >= send - 1 || s == start || d == SvPVX(sv))
3114 /* A regular character. Process like any other, but first
3115 * clear any flags */
3119 non_portable_endpoint = 0;
3122 /* The tests here for being above Latin1 and similar ones
3123 * in the following 'else' suffice to find all such
3124 * occurences in the constant, except those added by a
3125 * backslash escape sequence, like \x{100}. Mostly, those
3126 * set 'has_above_latin1' as appropriate */
3127 if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3128 has_above_latin1 = TRUE;
3131 /* Drops down to generic code to process current byte */
3133 else { /* Is a '-' in the context where it means a range */
3134 if (didrange) { /* Something like y/A-C-Z// */
3135 Perl_croak(aTHX_ "Ambiguous range in transliteration"
3141 s++; /* Skip past the hyphen */
3143 /* d now points to where the end-range character will be
3144 * placed. Drop down to get that character. We'll finish
3145 * processing the range the next time through the loop */
3147 if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3148 has_above_latin1 = TRUE;
3151 /* Drops down to generic code to process current byte */
3153 } /* End of not a range */
3155 /* Here we have parsed a range. Now must handle it. At this
3157 * 'sv' is a SV* that contains the output string we are
3158 * constructing. The final two characters in that string
3159 * are the range start and range end, in order.
3160 * 'd' points to just beyond the range end in the 'sv' string,
3161 * where we would next place something
3166 IV range_max; /* last character in range */
3168 Size_t offset_to_min = 0;
3171 bool convert_unicode;
3172 IV real_range_max = 0;
3174 /* Get the code point values of the range ends. */
3175 max_ptr = (d_is_utf8) ? (char *) utf8_hop( (U8*) d, -1) : d - 1;
3176 offset_to_max = max_ptr - SvPVX_const(sv);
3178 /* We know the utf8 is valid, because we just constructed
3179 * it ourselves in previous loop iterations */
3180 min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
3181 range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
3182 range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
3184 /* This compensates for not all code setting
3185 * 'has_above_latin1', so that we don't skip stuff that
3186 * should be executed */
3187 if (range_max > 255) {
3188 has_above_latin1 = TRUE;
3192 min_ptr = max_ptr - 1;
3193 range_min = * (U8*) min_ptr;
3194 range_max = * (U8*) max_ptr;
3197 /* If the range is just a single code point, like tr/a-a/.../,
3198 * that code point is already in the output, twice. We can
3199 * just back up over the second instance and avoid all the rest
3200 * of the work. But if it is a variant character, it's been
3201 * counted twice, so decrement. (This unlikely scenario is
3202 * special cased, like the one for a range of 2 code points
3203 * below, only because the main-line code below needs a range
3204 * of 3 or more to work without special casing. Might as well
3205 * get it out of the way now.) */
3206 if (UNLIKELY(range_max == range_min)) {
3208 if (! d_is_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
3209 utf8_variant_count--;
3215 /* On EBCDIC platforms, we may have to deal with portable
3216 * ranges. These happen if at least one range endpoint is a
3217 * Unicode value (\N{...}), or if the range is a subset of
3218 * [A-Z] or [a-z], and both ends are literal characters,
3219 * like 'A', and not like \x{C1} */
3221 cBOOL(backslash_N) /* \N{} forces Unicode,
3222 hence portable range */
3223 || ( ! non_portable_endpoint
3224 && (( isLOWER_A(range_min) && isLOWER_A(range_max))
3225 || (isUPPER_A(range_min) && isUPPER_A(range_max))));
3226 if (convert_unicode) {
3228 /* Special handling is needed for these portable ranges.
3229 * They are defined to be in Unicode terms, which includes
3230 * all the Unicode code points between the end points.
3231 * Convert to Unicode to get the Unicode range. Later we
3232 * will convert each code point in the range back to
3234 range_min = NATIVE_TO_UNI(range_min);
3235 range_max = NATIVE_TO_UNI(range_max);
3239 if (range_min > range_max) {
3241 if (convert_unicode) {
3242 /* Need to convert back to native for meaningful
3243 * messages for this platform */
3244 range_min = UNI_TO_NATIVE(range_min);
3245 range_max = UNI_TO_NATIVE(range_max);
3248 /* Use the characters themselves for the error message if
3249 * ASCII printables; otherwise some visible representation
3251 if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
3253 "Invalid range \"%c-%c\" in transliteration operator",
3254 (char)range_min, (char)range_max);
3257 else if (convert_unicode) {
3258 /* diag_listed_as: Invalid range "%s" in transliteration operator */
3260 "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04"
3261 UVXf "}\" in transliteration operator",
3262 range_min, range_max);
3266 /* diag_listed_as: Invalid range "%s" in transliteration operator */
3268 "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
3269 " in transliteration operator",
3270 range_min, range_max);
3274 /* If the range is exactly two code points long, they are
3275 * already both in the output */
3276 if (UNLIKELY(range_min + 1 == range_max)) {
3280 /* Here the range contains at least 3 code points */
3284 /* If everything in the transliteration is below 256, we
3285 * can avoid special handling later. A translation table
3286 * for each of those bytes is created by op.c. So we
3287 * expand out all ranges to their constituent code points.
3288 * But if we've encountered something above 255, the
3289 * expanding won't help, so skip doing that. But if it's
3290 * EBCDIC, we may have to look at each character below 256
3291 * if we have to convert to/from Unicode values */
3292 if ( has_above_latin1
3294 && (range_min > 255 || ! convert_unicode)
3297 const STRLEN off = d - SvPVX(sv);
3298 const STRLEN extra = 1 + (send - s) + 1;
3301 /* Move the high character one byte to the right; then
3302 * insert between it and the range begin, an illegal
3303 * byte which serves to indicate this is a range (using
3304 * a '-' would be ambiguous). */
3306 if (off + extra > SvLEN(sv)) {
3307 d = off + SvGROW(sv, off + extra);
3308 max_ptr = d - off + offset_to_max;
3312 while (e-- > max_ptr) {
3315 *(e + 1) = (char) RANGE_INDICATOR;
3319 /* Here, we're going to expand out the range. For EBCDIC
3320 * the range can extend above 255 (not so in ASCII), so
3321 * for EBCDIC, split it into the parts above and below
3324 if (range_max > 255) {
3325 real_range_max = range_max;
3331 /* Here we need to expand out the string to contain each
3332 * character in the range. Grow the output to handle this.
3333 * For non-UTF8, we need a byte for each code point in the
3334 * range, minus the three that we've already allocated for: the
3335 * hyphen, the min, and the max. For UTF-8, we need this
3336 * plus an extra byte for each code point that occupies two
3337 * bytes (is variant) when in UTF-8 (except we've already
3338 * allocated for the end points, including if they are
3339 * variants). For ASCII platforms and Unicode ranges on EBCDIC
3340 * platforms, it's easy to calculate a precise number. To
3341 * start, we count the variants in the range, which we need
3342 * elsewhere in this function anyway. (For the case where it
3343 * isn't easy to calculate, 'extras' has been initialized to 0,
3344 * and the calculation is done in a loop further down.) */
3346 if (convert_unicode)
3349 /* This is executed unconditionally on ASCII, and for
3350 * Unicode ranges on EBCDIC. Under these conditions, all
3351 * code points above a certain value are variant; and none
3352 * under that value are. We just need to find out how much
3353 * of the range is above that value. We don't count the
3354 * end points here, as they will already have been counted
3355 * as they were parsed. */
3356 if (range_min >= UTF_CONTINUATION_MARK) {
3358 /* The whole range is made up of variants */
3359 extras = (range_max - 1) - (range_min + 1) + 1;
3361 else if (range_max >= UTF_CONTINUATION_MARK) {
3363 /* Only the higher portion of the range is variants */
3364 extras = (range_max - 1) - UTF_CONTINUATION_MARK + 1;
3367 utf8_variant_count += extras;
3370 /* The base growth is the number of code points in the range,
3371 * not including the endpoints, which have already been sized
3372 * for (and output). We don't subtract for the hyphen, as it
3373 * has been parsed but not output, and the SvGROW below is
3374 * based only on what's been output plus what's left to parse.
3376 grow = (range_max - 1) - (range_min + 1) + 1;
3380 /* In some cases in EBCDIC, we haven't yet calculated a
3381 * precise amount needed for the UTF-8 variants. Just
3382 * assume the worst case, that everything will expand by a
3384 if (! convert_unicode) {
3390 /* Otherwise we know exactly how many variants there
3391 * are in the range. */
3396 /* Grow, but position the output to overwrite the range min end
3397 * point, because in some cases we overwrite that */
3398 SvCUR_set(sv, d - SvPVX_const(sv));
3399 offset_to_min = min_ptr - SvPVX_const(sv);
3401 /* See Note on sizing above. */
3402 d = offset_to_min + SvGROW(sv, SvCUR(sv)
3405 + 1 /* Trailing NUL */ );
3407 /* Now, we can expand out the range. */
3409 if (convert_unicode) {
3412 /* Recall that the min and max are now in Unicode terms, so
3413 * we have to convert each character to its native
3416 for (i = range_min; i <= range_max; i++) {
3417 append_utf8_from_native_byte(
3418 LATIN1_TO_NATIVE((U8) i),
3423 for (i = range_min; i <= range_max; i++) {
3424 *d++ = (char)LATIN1_TO_NATIVE((U8) i);
3430 /* Always gets run for ASCII, and sometimes for EBCDIC. */
3432 /* Here, no conversions are necessary, which means that the
3433 * first character in the range is already in 'd' and
3434 * valid, so we can skip overwriting it */
3438 for (i = range_min + 1; i <= range_max; i++) {
3439 append_utf8_from_native_byte((U8) i, (U8 **) &d);
3445 assert(range_min + 1 <= range_max);
3446 for (i = range_min + 1; i < range_max; i++) {
3448 /* In this case on EBCDIC, we haven't calculated
3449 * the variants. Do it here, as we go along */
3450 if (! UVCHR_IS_INVARIANT(i)) {
3451 utf8_variant_count++;
3457 /* The range_max is done outside the loop so as to
3458 * avoid having to special case not incrementing
3459 * 'utf8_variant_count' on EBCDIC (it's already been
3460 * counted when originally parsed) */
3461 *d++ = (char) range_max;
3466 /* If the original range extended above 255, add in that
3468 if (real_range_max) {
3469 *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
3470 *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
3471 if (real_range_max > 0x100) {
3472 if (real_range_max > 0x101) {
3473 *d++ = (char) RANGE_INDICATOR;
3475 d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
3481 /* mark the range as done, and continue */
3485 non_portable_endpoint = 0;
3489 } /* End of is a range */
3490 } /* End of transliteration. Joins main code after these else's */
3491 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3494 while (s1 >= start && *s1-- == '\\')
3497 in_charclass = TRUE;
3499 else if (*s == ']' && PL_lex_inpat && in_charclass) {
3502 while (s1 >= start && *s1-- == '\\')
3505 in_charclass = FALSE;
3507 /* skip for regexp comments /(?#comment)/, except for the last
3508 * char, which will be done separately. Stop on (?{..}) and
3510 else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3513 PERL_UINT_FAST8_T len = UTF8SKIP(s);
3515 while (s + len < send && *s != ')') {
3516 Copy(s, d, len, U8);
3519 len = UTF8_SAFE_SKIP(s, send);
3522 else while (s+1 < send && *s != ')') {
3526 else if (!PL_lex_casemods
3527 && ( s[2] == '{' /* This should match regcomp.c */
3528 || (s[2] == '?' && s[3] == '{')))
3533 /* likewise skip #-initiated comments in //x patterns */
3537 && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
3539 while (s < send && *s != '\n')
3542 /* no further processing of single-quoted regex */
3543 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3544 goto default_action;
3546 /* check for embedded arrays
3547 * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3549 else if (*s == '@' && s[1]) {
3551 ? isIDFIRST_utf8_safe(s+1, send)
3552 : isWORDCHAR_A(s[1]))
3556 if (memCHRs(":'{$", s[1]))
3558 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3559 break; /* in regexp, neither @+ nor @- are interpolated */
3561 /* check for embedded scalars. only stop if we're sure it's a
3563 else if (*s == '$') {
3564 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
3566 if (s + 1 < send && !memCHRs("()| \r\n\t", s[1])) {
3568 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3569 "Possible unintended interpolation of $\\ in regex");
3571 break; /* in regexp, $ might be tail anchor */
3575 /* End of else if chain - OP_TRANS rejoin rest */
3577 if (UNLIKELY(s >= send)) {
3583 if (*s == '\\' && s+1 < send) {
3584 char* e; /* Can be used for ending '}', etc. */
3588 /* warn on \1 - \9 in substitution replacements, but note that \11
3589 * is an octal; and \19 is \1 followed by '9' */
3590 if (PL_lex_inwhat == OP_SUBST
3596 /* diag_listed_as: \%d better written as $%d */
3597 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3602 /* string-change backslash escapes */
3603 if (PL_lex_inwhat != OP_TRANS && *s && memCHRs("lLuUEQF", *s)) {
3607 /* In a pattern, process \N, but skip any other backslash escapes.
3608 * This is because we don't want to translate an escape sequence
3609 * into a meta symbol and have the regex compiler use the meta
3610 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3611 * in spite of this, we do have to process \N here while the proper
3612 * charnames handler is in scope. See bugs #56444 and #62056.
3614 * There is a complication because \N in a pattern may also stand
3615 * for 'match a non-nl', and not mean a charname, in which case its
3616 * processing should be deferred to the regex compiler. To be a
3617 * charname it must be followed immediately by a '{', and not look
3618 * like \N followed by a curly quantifier, i.e., not something like
3619 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3621 else if (PL_lex_inpat
3624 || regcurly(s + 1)))
3627 goto default_action;
3633 if ((isALPHANUMERIC(*s)))
3634 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3635 "Unrecognized escape \\%c passed through",
3637 /* default action is to copy the quoted character */
3638 goto default_action;
3641 /* eg. \132 indicates the octal constant 0132 */
3642 case '0': case '1': case '2': case '3':
3643 case '4': case '5': case '6': case '7':
3645 I32 flags = PERL_SCAN_SILENT_ILLDIGIT
3646 | PERL_SCAN_NOTIFY_ILLDIGIT;
3648 uv = grok_oct(s, &len, &flags, NULL);
3650 if ( (flags & PERL_SCAN_NOTIFY_ILLDIGIT)
3652 && isDIGIT(*s) /* like \08, \178 */
3653 && ckWARN(WARN_MISC))
3655 Perl_warner(aTHX_ packWARN(WARN_MISC), "%s",
3656 form_alien_digit_msg(8, len, s, send, UTF, FALSE));
3659 goto NUM_ESCAPE_INSERT;
3661 /* eg. \o{24} indicates the octal constant \024 */
3666 if (! grok_bslash_o(&s, send,
3669 FALSE, /* Not strict */
3670 FALSE, /* No illegal cp's */
3674 uv = 0; /* drop through to ensure range ends are set */
3676 goto NUM_ESCAPE_INSERT;
3679 /* eg. \x24 indicates the hex constant 0x24 */
3684 if (! grok_bslash_x(&s, send,
3687 FALSE, /* Not strict */
3688 FALSE, /* No illegal cp's */
3692 uv = 0; /* drop through to ensure range ends are set */
3697 /* Insert oct or hex escaped character. */
3699 /* Here uv is the ordinal of the next character being added */
3700 if (UVCHR_IS_INVARIANT(uv)) {
3704 if (!d_is_utf8 && uv > 255) {
3706 /* Here, 'uv' won't fit unless we convert to UTF-8.
3707 * If we've only seen invariants so far, all we have to
3708 * do is turn on the flag */
3709 if (utf8_variant_count == 0) {
3713 SvCUR_set(sv, d - SvPVX_const(sv));
3717 sv_utf8_upgrade_flags_grow(
3719 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3721 /* Since we're having to grow here,
3722 * make sure we have enough room for
3723 * this escape and a NUL, so the
3724 * code immediately below won't have
3725 * to actually grow again */
3727 + (STRLEN)(send - s) + 1);
3728 d = SvPVX(sv) + SvCUR(sv);
3731 has_above_latin1 = TRUE;
3737 utf8_variant_count++;
3740 /* Usually, there will already be enough room in 'sv'
3741 * since such escapes are likely longer than any UTF-8
3742 * sequence they can end up as. This isn't the case on
3743 * EBCDIC where \x{40000000} contains 12 bytes, and the
3744 * UTF-8 for it contains 14. And, we have to allow for
3745 * a trailing NUL. It probably can't happen on ASCII
3746 * platforms, but be safe. See Note on sizing above. */
3747 const STRLEN needed = d - SvPVX(sv)
3751 if (UNLIKELY(needed > SvLEN(sv))) {
3752 SvCUR_set(sv, d - SvPVX_const(sv));
3753 d = SvCUR(sv) + SvGROW(sv, needed);
3756 d = (char*) uvchr_to_utf8_flags((U8*)d, uv,
3757 (ckWARN(WARN_PORTABLE))
3758 ? UNICODE_WARN_PERL_EXTENDED
3763 non_portable_endpoint++;
3768 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3769 * named character, like \N{LATIN SMALL LETTER A}, or a named
3770 * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3771 * GRAVE} (except y/// can't handle the latter, croaking). For
3772 * convenience all three forms are referred to as "named
3773 * characters" below.
3775 * For patterns, \N also can mean to match a non-newline. Code
3776 * before this 'switch' statement should already have handled
3777 * this situation, and hence this code only has to deal with
3778 * the named character cases.
3780 * For non-patterns, the named characters are converted to
3781 * their string equivalents. In patterns, named characters are
3782 * not converted to their ultimate forms for the same reasons
3783 * that other escapes aren't (mainly that the ultimate
3784 * character could be considered a meta-symbol by the regex
3785 * compiler). Instead, they are converted to the \N{U+...}
3786 * form to get the value from the charnames that is in effect
3787 * right now, while preserving the fact that it was a named
3788 * character, so that the regex compiler knows this.
3790 * The structure of this section of code (besides checking for
3791 * errors and upgrading to utf8) is:
3792 * If the named character is of the form \N{U+...}, pass it
3793 * through if a pattern; otherwise convert the code point
3795 * Otherwise must be some \N{NAME}: convert to
3796 * \N{U+c1.c2...} if a pattern; otherwise convert to utf8
3798 * Transliteration is an exception. The conversion to utf8 is
3799 * only done if the code point requires it to be representable.
3801 * Here, 's' points to the 'N'; the test below is guaranteed to
3802 * succeed if we are being called on a pattern, as we already
3803 * know from a test above that the next character is a '{'. A
3804 * non-pattern \N must mean 'named character', which requires
3808 yyerror("Missing braces on \\N{}");
3814 /* If there is no matching '}', it is an error. */
3815 if (! (e = (char *) memchr(s, '}', send - s))) {
3816 if (! PL_lex_inpat) {
3817 yyerror("Missing right brace on \\N{}");
3819 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3821 yyquit(); /* Have exhausted the input. */
3824 /* Here it looks like a named character */
3826 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3827 s += 2; /* Skip to next char after the 'U+' */
3830 /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3831 /* Check the syntax. */
3834 if (!isXDIGIT(*s)) {
3837 "Invalid hexadecimal number in \\N{U+...}"
3846 else if ((*s == '.' || *s == '_')
3852 /* Pass everything through unchanged.
3853 * +1 is for the '}' */
3854 Copy(orig_s, d, e - orig_s + 1, char);
3855 d += e - orig_s + 1;
3857 else { /* Not a pattern: convert the hex to string */
3858 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3859 | PERL_SCAN_SILENT_ILLDIGIT
3860 | PERL_SCAN_SILENT_OVERFLOW
3861 | PERL_SCAN_DISALLOW_PREFIX;
3864 uv = grok_hex(s, &len, &flags, NULL);
3865 if (len == 0 || (len != (STRLEN)(e - s)))
3868 if ( uv > MAX_LEGAL_CP
3869 || (flags & PERL_SCAN_GREATER_THAN_UV_MAX))
3871 yyerror(form_cp_too_large_msg(16, s, len, 0));
3872 uv = 0; /* drop through to ensure range ends are
3876 /* For non-tr///, if the destination is not in utf8,
3877 * unconditionally recode it to be so. This is
3878 * because \N{} implies Unicode semantics, and scalars
3879 * have to be in utf8 to guarantee those semantics.
3880 * tr/// doesn't care about Unicode rules, so no need
3881 * there to upgrade to UTF-8 for small enough code
3883 if (! d_is_utf8 && ( uv > 0xFF
3884 || PL_lex_inwhat != OP_TRANS))
3886 /* See Note on sizing above. */
3887 const STRLEN extra = OFFUNISKIP(uv) + (send - e) + 1;
3889 SvCUR_set(sv, d - SvPVX_const(sv));
3893 if (utf8_variant_count == 0) {
3895 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
3898 sv_utf8_upgrade_flags_grow(
3900 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3902 d = SvPVX(sv) + SvCUR(sv);
3906 has_above_latin1 = TRUE;
3909 /* Add the (Unicode) code point to the output. */
3910 if (! d_is_utf8 || OFFUNI_IS_INVARIANT(uv)) {
3911 *d++ = (char) LATIN1_TO_NATIVE(uv);
3914 d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv,
3915 (ckWARN(WARN_PORTABLE))
3916 ? UNICODE_WARN_PERL_EXTENDED
3921 else /* Here is \N{NAME} but not \N{U+...}. */
3922 if (! (res = get_and_check_backslash_N_name_wrapper(s, e)))
3923 { /* Failed. We should die eventually, but for now use a NUL
3927 else { /* Successfully evaluated the name */
3929 const char *str = SvPV_const(res, len);
3932 if (! len) { /* The name resolved to an empty string */
3933 const char empty_N[] = "\\N{_}";
3934 Copy(empty_N, d, sizeof(empty_N) - 1, char);
3935 d += sizeof(empty_N) - 1;
3938 /* In order to not lose information for the regex
3939 * compiler, pass the result in the specially made
3940 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3941 * the code points in hex of each character
3942 * returned by charnames */
3944 const char *str_end = str + len;
3945 const STRLEN off = d - SvPVX_const(sv);
3947 if (! SvUTF8(res)) {
3948 /* For the non-UTF-8 case, we can determine the
3949 * exact length needed without having to parse
3950 * through the string. Each character takes up
3951 * 2 hex digits plus either a trailing dot or
3953 const char initial_text[] = "\\N{U+";
3954 const STRLEN initial_len = sizeof(initial_text)
3956 d = off + SvGROW(sv, off
3959 /* +1 for trailing NUL */
3962 + (STRLEN)(send - e));
3963 Copy(initial_text, d, initial_len, char);
3965 while (str < str_end) {
3968 my_snprintf(hex_string,
3972 /* The regex compiler is
3973 * expecting Unicode, not
3975 NATIVE_TO_LATIN1(*str));
3976 PERL_MY_SNPRINTF_POST_GUARD(len,
3977 sizeof(hex_string));
3978 Copy(hex_string, d, 3, char);
3982 d--; /* Below, we will overwrite the final
3983 dot with a right brace */
3986 STRLEN char_length; /* cur char's byte length */
3988 /* and the number of bytes after this is
3989 * translated into hex digits */
3990 STRLEN output_length;
3992 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3993 * for max('U+', '.'); and 1 for NUL */
3994 char hex_string[2 * UTF8_MAXBYTES + 5];
3996 /* Get the first character of the result. */
3997 U32 uv = utf8n_to_uvchr((U8 *) str,
4001 /* Convert first code point to Unicode hex,
4002 * including the boiler plate before it. */
4004 my_snprintf(hex_string, sizeof(hex_string),
4006 (unsigned int) NATIVE_TO_UNI(uv));
4008 /* Make sure there is enough space to hold it */
4009 d = off + SvGROW(sv, off
4011 + (STRLEN)(send - e)
4012 + 2); /* '}' + NUL */
4014 Copy(hex_string, d, output_length, char);
4017 /* For each subsequent character, append dot and
4018 * its Unicode code point in hex */
4019 while ((str += char_length) < str_end) {
4020 const STRLEN off = d - SvPVX_const(sv);
4021 U32 uv = utf8n_to_uvchr((U8 *) str,
4026 my_snprintf(hex_string,
4029 (unsigned int) NATIVE_TO_UNI(uv));
4031 d = off + SvGROW(sv, off
4033 + (STRLEN)(send - e)
4034 + 2); /* '}' + NUL */
4035 Copy(hex_string, d, output_length, char);
4040 *d++ = '}'; /* Done. Add the trailing brace */
4043 else { /* Here, not in a pattern. Convert the name to a
4046 if (PL_lex_inwhat == OP_TRANS) {
4047 str = SvPV_const(res, len);
4048 if (len > ((SvUTF8(res))
4052 yyerror(Perl_form(aTHX_
4053 "%.*s must not be a named sequence"
4054 " in transliteration operator",
4055 /* +1 to include the "}" */
4056 (int) (e + 1 - start), start));
4058 goto end_backslash_N;
4061 if (SvUTF8(res) && UTF8_IS_ABOVE_LATIN1(*str)) {
4062 has_above_latin1 = TRUE;
4066 else if (! SvUTF8(res)) {
4067 /* Make sure \N{} return is UTF-8. This is because
4068 * \N{} implies Unicode semantics, and scalars have
4069 * to be in utf8 to guarantee those semantics; but
4070 * not needed in tr/// */
4071 sv_utf8_upgrade_flags(res, 0);
4072 str = SvPV_const(res, len);
4075 /* Upgrade destination to be utf8 if this new
4077 if (! d_is_utf8 && SvUTF8(res)) {
4078 /* See Note on sizing above. */
4079 const STRLEN extra = len + (send - s) + 1;
4081 SvCUR_set(sv, d - SvPVX_const(sv));
4085 if (utf8_variant_count == 0) {
4087 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
4090 sv_utf8_upgrade_flags_grow(sv,
4091 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4093 d = SvPVX(sv) + SvCUR(sv);
4096 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
4098 /* See Note on sizing above. (NOTE: SvCUR() is not
4099 * set correctly here). */
4100 const STRLEN extra = len + (send - e) + 1;
4101 const STRLEN off = d - SvPVX_const(sv);
4102 d = off + SvGROW(sv, off + extra);
4104 Copy(str, d, len, char);
4110 } /* End \N{NAME} */
4114 backslash_N++; /* \N{} is defined to be Unicode */
4116 s = e + 1; /* Point to just after the '}' */
4119 /* \c is a control character */
4123 const char * message;
4125 if (! grok_bslash_c(*s, (U8 *) d, &message, NULL)) {
4127 yyquit(); /* Have always immediately croaked on
4133 yyerror("Missing control char name in \\c");
4134 yyquit(); /* Are at end of input, no sense continuing */
4137 non_portable_endpoint++;
4141 /* printf-style backslashes, formfeeds, newlines, etc */
4167 } /* end if (backslash) */
4170 /* Just copy the input to the output, though we may have to convert
4173 * If the input has the same representation in UTF-8 as not, it will be
4174 * a single byte, and we don't care about UTF8ness; just copy the byte */
4175 if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
4178 else if (! s_is_utf8 && ! d_is_utf8) {
4179 /* If neither source nor output is UTF-8, is also a single byte,
4180 * just copy it; but this byte counts should we later have to
4181 * convert to UTF-8 */
4183 utf8_variant_count++;
4185 else if (s_is_utf8 && d_is_utf8) { /* Both UTF-8, can just copy */
4186 const STRLEN len = UTF8SKIP(s);
4188 /* We expect the source to have already been checked for
4190 assert(isUTF8_CHAR((U8 *) s, (U8 *) send));
4192 Copy(s, d, len, U8);
4196 else if (s_is_utf8) { /* UTF8ness matters: convert output to utf8 */
4197 STRLEN need = send - s + 1; /* See Note on sizing above. */
4199 SvCUR_set(sv, d - SvPVX_const(sv));
4203 if (utf8_variant_count == 0) {
4205 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
4208 sv_utf8_upgrade_flags_grow(sv,
4209 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4211 d = SvPVX(sv) + SvCUR(sv);
4214 goto default_action; /* Redo, having upgraded so both are UTF-8 */
4216 else { /* UTF8ness matters: convert this non-UTF8 source char to
4217 UTF-8 for output. It will occupy 2 bytes, but don't include
4218 the input byte since we haven't incremented 's' yet. See
4219 Note on sizing above. */
4220 const STRLEN off = d - SvPVX(sv);
4221 const STRLEN extra = 2 + (send - s - 1) + 1;
4222 if (off + extra > SvLEN(sv)) {
4223 d = off + SvGROW(sv, off + extra);
4225 *d++ = UTF8_EIGHT_BIT_HI(*s);
4226 *d++ = UTF8_EIGHT_BIT_LO(*s);
4229 } /* while loop to process each character */
4232 const STRLEN off = d - SvPVX(sv);
4234 /* See if room for the terminating NUL */
4235 if (UNLIKELY(off >= SvLEN(sv))) {
4239 if (off > SvLEN(sv))
4241 Perl_croak(aTHX_ "panic: constant overflowed allocated space,"
4242 " %" UVuf " >= %" UVuf, (UV)off, (UV)SvLEN(sv));
4244 /* Whew! Here we don't have room for the terminating NUL, but
4245 * everything else so far has fit. It's not too late to grow
4246 * to fit the NUL and continue on. But it is a bug, as the code
4247 * above was supposed to have made room for this, so under
4248 * DEBUGGING builds, we panic anyway. */
4249 d = off + SvGROW(sv, off + 1);
4253 /* terminate the string and set up the sv */
4255 SvCUR_set(sv, d - SvPVX_const(sv));
4262 /* shrink the sv if we allocated more than we used */
4263 if (SvCUR(sv) + 5 < SvLEN(sv)) {
4264 SvPV_shrink_to_cur(sv);
4267 /* return the substring (via pl_yylval) only if we parsed anything */
4270 for (; s2 < s; s2++) {
4272 COPLINE_INC_WITH_HERELINES;
4274 SvREFCNT_inc_simple_void_NN(sv);
4275 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
4276 && ! PL_parser->lex_re_reparsing)
4278 const char *const key = PL_lex_inpat ? "qr" : "q";
4279 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
4283 if (PL_lex_inwhat == OP_TRANS) {
4286 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
4289 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
4297 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
4298 type, typelen, NULL);
4300 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
4302 LEAVE_with_name("scan_const");
4307 * Returns TRUE if there's more to the expression (e.g., a subscript),
4310 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
4312 * ->[ and ->{ return TRUE
4313 * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
4314 * { and [ outside a pattern are always subscripts, so return TRUE
4315 * if we're outside a pattern and it's not { or [, then return FALSE
4316 * if we're in a pattern and the first char is a {
4317 * {4,5} (any digits around the comma) returns FALSE
4318 * if we're in a pattern and the first char is a [
4320 * [SOMETHING] has a funky algorithm to decide whether it's a
4321 * character class or not. It has to deal with things like
4322 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
4323 * anything else returns TRUE
4326 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
4329 S_intuit_more(pTHX_ char *s, char *e)
4331 PERL_ARGS_ASSERT_INTUIT_MORE;
4333 if (PL_lex_brackets)
4335 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
4337 if (*s == '-' && s[1] == '>'
4338 && FEATURE_POSTDEREF_QQ_IS_ENABLED
4339 && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
4340 ||(s[2] == '@' && memCHRs("*[{",s[3])) ))
4342 if (*s != '{' && *s != '[')
4344 PL_parser->sub_no_recover = TRUE;
4348 /* In a pattern, so maybe we have {n,m}. */
4356 /* On the other hand, maybe we have a character class */
4359 if (*s == ']' || *s == '^')
4362 /* this is terrifying, and it works */
4365 const char * const send = (char *) memchr(s, ']', e - s);
4366 unsigned char un_char, last_un_char;
4367 char tmpbuf[sizeof PL_tokenbuf * 4];
4369 if (!send) /* has to be an expression */
4371 weight = 2; /* let's weigh the evidence */
4375 else if (isDIGIT(*s)) {
4377 if (isDIGIT(s[1]) && s[2] == ']')
4383 Zero(seen,256,char);
4385 for (; s < send; s++) {
4386 last_un_char = un_char;
4387 un_char = (unsigned char)*s;
4392 weight -= seen[un_char] * 10;
4393 if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
4395 scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4396 len = (int)strlen(tmpbuf);
4397 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
4398 UTF ? SVf_UTF8 : 0, SVt_PV))
4405 && memCHRs("[#!%*<>()-=",s[1]))
4407 if (/*{*/ memCHRs("])} =",s[2]))
4416 if (memCHRs("wds]",s[1]))
4418 else if (seen[(U8)'\''] || seen[(U8)'"'])
4420 else if (memCHRs("rnftbxcav",s[1]))
4422 else if (isDIGIT(s[1])) {
4424 while (s[1] && isDIGIT(s[1]))
4434 if (memCHRs("aA01! ",last_un_char))
4436 if (memCHRs("zZ79~",s[1]))
4438 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4439 weight -= 5; /* cope with negative subscript */
4442 if (!isWORDCHAR(last_un_char)
4443 && !(last_un_char == '$' || last_un_char == '@'
4444 || last_un_char == '&')
4445 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4449 if (keyword(d, s - d, 0))
4452 if (un_char == last_un_char + 1)
4454 weight -= seen[un_char];
4459 if (weight >= 0) /* probably a character class */
4469 * Does all the checking to disambiguate
4471 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
4472 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4474 * First argument is the stuff after the first token, e.g. "bar".
4476 * Not a method if foo is a filehandle.
4477 * Not a method if foo is a subroutine prototyped to take a filehandle.
4478 * Not a method if it's really "Foo $bar"
4479 * Method if it's "foo $bar"
4480 * Not a method if it's really "print foo $bar"
4481 * Method if it's really "foo package::" (interpreted as package->foo)
4482 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4483 * Not a method if bar is a filehandle or package, but is quoted with
4488 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
4490 char *s = start + (*start == '$');
4491 char tmpbuf[sizeof PL_tokenbuf];
4494 /* Mustn't actually add anything to a symbol table.
4495 But also don't want to "initialise" any placeholder
4496 constants that might already be there into full
4497 blown PVGVs with attached PVCV. */
4499 ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
4501 PERL_ARGS_ASSERT_INTUIT_METHOD;
4503 if (!FEATURE_INDIRECT_IS_ENABLED)
4506 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4508 if (cv && SvPOK(cv)) {
4509 const char *proto = CvPROTO(cv);
4511 while (*proto && (isSPACE(*proto) || *proto == ';'))
4518 if (*start == '$') {
4519 SSize_t start_off = start - SvPVX(PL_linestr);
4520 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
4521 || isUPPER(*PL_tokenbuf))
4523 /* this could be $# */
4526 PL_bufptr = SvPVX(PL_linestr) + start_off;
4528 return *s == '(' ? FUNCMETH : METHOD;
4531 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4532 /* start is the beginning of the possible filehandle/object,
4533 * and s is the end of it
4534 * tmpbuf is a copy of it (but with single quotes as double colons)
4537 if (!keyword(tmpbuf, len, 0)) {
4538 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4543 indirgv = gv_fetchpvn_flags(tmpbuf, len,
4544 GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
4546 if (indirgv && SvTYPE(indirgv) != SVt_NULL
4547 && (!isGV(indirgv) || GvCVu(indirgv)))
4549 /* filehandle or package name makes it a method */
4550 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4552 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4553 return 0; /* no assumptions -- "=>" quotes bareword */
4555 NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0,
4556 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4557 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4559 force_next(BAREWORD);
4561 return *s == '(' ? FUNCMETH : METHOD;
4567 /* Encoded script support. filter_add() effectively inserts a
4568 * 'pre-processing' function into the current source input stream.
4569 * Note that the filter function only applies to the current source file
4570 * (e.g., it will not affect files 'require'd or 'use'd by this one).
4572 * The datasv parameter (which may be NULL) can be used to pass
4573 * private data to this instance of the filter. The filter function
4574 * can recover the SV using the FILTER_DATA macro and use it to
4575 * store private buffers and state information.
4577 * The supplied datasv parameter is upgraded to a PVIO type
4578 * and the IoDIRP/IoANY field is used to store the function pointer,
4579 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4580 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4581 * private use must be set using malloc'd pointers.
4585 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4593 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4594 Perl_croak(aTHX_ "Source filters apply only to byte streams");
4596 if (!PL_rsfp_filters)
4597 PL_rsfp_filters = newAV();
4600 SvUPGRADE(datasv, SVt_PVIO);
4601 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4602 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4603 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4604 FPTR2DPTR(void *, IoANY(datasv)),
4605 SvPV_nolen(datasv)));
4606 av_unshift(PL_rsfp_filters, 1);
4607 av_store(PL_rsfp_filters, 0, datasv) ;
4609 !PL_parser->filtered
4610 && PL_parser->lex_flags & LEX_EVALBYTES
4611 && PL_bufptr < PL_bufend
4613 const char *s = PL_bufptr;
4614 while (s < PL_bufend) {
4616 SV *linestr = PL_parser->linestr;
4617 char *buf = SvPVX(linestr);
4618 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4619 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4620 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4621 STRLEN const linestart_pos = PL_parser->linestart - buf;
4622 STRLEN const last_uni_pos =
4623 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4624 STRLEN const last_lop_pos =
4625 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4626 av_push(PL_rsfp_filters, linestr);
4627 PL_parser->linestr =
4628 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4629 buf = SvPVX(PL_parser->linestr);
4630 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4631 PL_parser->bufptr = buf + bufptr_pos;
4632 PL_parser->oldbufptr = buf + oldbufptr_pos;
4633 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4634 PL_parser->linestart = buf + linestart_pos;
4635 if (PL_parser->last_uni)
4636 PL_parser->last_uni = buf + last_uni_pos;
4637 if (PL_parser->last_lop)
4638 PL_parser->last_lop = buf + last_lop_pos;
4639 SvLEN_set(linestr, SvCUR(linestr));
4640 SvCUR_set(linestr, s - SvPVX(linestr));
4641 PL_parser->filtered = 1;
4651 /* Delete most recently added instance of this filter function. */
4653 Perl_filter_del(pTHX_ filter_t funcp)
4657 PERL_ARGS_ASSERT_FILTER_DEL;
4660 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4661 FPTR2DPTR(void*, funcp)));
4663 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4665 /* if filter is on top of stack (usual case) just pop it off */
4666 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4667 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4668 sv_free(av_pop(PL_rsfp_filters));
4672 /* we need to search for the correct entry and clear it */
4673 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4677 /* Invoke the idxth filter function for the current rsfp. */
4678 /* maxlen 0 = read one text line */
4680 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4685 /* This API is bad. It should have been using unsigned int for maxlen.
4686 Not sure if we want to change the API, but if not we should sanity
4687 check the value here. */
4688 unsigned int correct_length = maxlen < 0 ? PERL_INT_MAX : maxlen;
4690 PERL_ARGS_ASSERT_FILTER_READ;
4692 if (!PL_parser || !PL_rsfp_filters)
4694 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
4695 /* Provide a default input filter to make life easy. */
4696 /* Note that we append to the line. This is handy. */
4697 DEBUG_P(PerlIO_printf(Perl_debug_log,
4698 "filter_read %d: from rsfp\n", idx));
4699 if (correct_length) {
4702 const int old_len = SvCUR(buf_sv);
4704 /* ensure buf_sv is large enough */
4705 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4706 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4707 correct_length)) <= 0) {
4708 if (PerlIO_error(PL_rsfp))
4709 return -1; /* error */
4711 return 0 ; /* end of file */
4713 SvCUR_set(buf_sv, old_len + len) ;
4714 SvPVX(buf_sv)[old_len + len] = '\0';
4717 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4718 if (PerlIO_error(PL_rsfp))
4719 return -1; /* error */
4721 return 0 ; /* end of file */
4724 return SvCUR(buf_sv);
4726 /* Skip this filter slot if filter has been deleted */
4727 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4728 DEBUG_P(PerlIO_printf(Perl_debug_log,
4729 "filter_read %d: skipped (filter deleted)\n",
4731 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4733 if (SvTYPE(datasv) != SVt_PVIO) {
4734 if (correct_length) {
4736 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4737 if (!remainder) return 0; /* eof */
4738 if (correct_length > remainder) correct_length = remainder;
4739 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4740 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4743 const char *s = SvEND(datasv);
4744 const char *send = SvPVX(datasv) + SvLEN(datasv);
4752 if (s == send) return 0; /* eof */
4753 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4754 SvCUR_set(datasv, s-SvPVX(datasv));
4756 return SvCUR(buf_sv);
4758 /* Get function pointer hidden within datasv */
4759 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4760 DEBUG_P(PerlIO_printf(Perl_debug_log,
4761 "filter_read %d: via function %p (%s)\n",
4762 idx, (void*)datasv, SvPV_nolen_const(datasv)));
4763 /* Call function. The function is expected to */
4764 /* call "FILTER_READ(idx+1, buf_sv)" first. */
4765 /* Return: <0:error, =0:eof, >0:not eof */
4767 save_scalar(PL_errgv);
4768 ret = (*funcp)(aTHX_ idx, buf_sv, correct_length);
4774 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4776 PERL_ARGS_ASSERT_FILTER_GETS;
4778 #ifdef PERL_CR_FILTER
4779 if (!PL_rsfp_filters) {
4780 filter_add(S_cr_textfilter,NULL);
4783 if (PL_rsfp_filters) {
4785 SvCUR_set(sv, 0); /* start with empty line */
4786 if (FILTER_READ(0, sv, 0) > 0)
4787 return ( SvPVX(sv) ) ;
4792 return (sv_gets(sv, PL_rsfp, append));
4796 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4800 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4802 if (memEQs(pkgname, len, "__PACKAGE__"))
4806 && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
4807 && (gv = gv_fetchpvn_flags(pkgname,
4809 ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4811 return GvHV(gv); /* Foo:: */
4814 /* use constant CLASS => 'MyClass' */
4815 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4816 if (gv && GvCV(gv)) {
4817 SV * const sv = cv_const_sv(GvCV(gv));
4819 return gv_stashsv(sv, 0);
4822 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4827 S_tokenize_use(pTHX_ int is_use, char *s) {
4828 PERL_ARGS_ASSERT_TOKENIZE_USE;
4830 if (PL_expect != XSTATE)
4831 /* diag_listed_as: "use" not allowed in expression */
4832 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4833 is_use ? "use" : "no"));
4836 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4837 s = force_version(s, TRUE);
4838 if (*s == ';' || *s == '}'
4839 || (s = skipspace(s), (*s == ';' || *s == '}'))) {
4840 NEXTVAL_NEXTTOKE.opval = NULL;
4841 force_next(BAREWORD);
4843 else if (*s == 'v') {
4844 s = force_word(s,BAREWORD,FALSE,TRUE);
4845 s = force_version(s, FALSE);
4849 s = force_word(s,BAREWORD,FALSE,TRUE);
4850 s = force_version(s, FALSE);
4852 pl_yylval.ival = is_use;
4856 static const char* const exp_name[] =
4857 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4858 "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
4859 "SIGVAR", "TERMORDORDOR"
4863 #define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
4865 S_word_takes_any_delimiter(char *p, STRLEN len)
4867 return (len == 1 && memCHRs("msyq", p[0]))
4869 && ((p[0] == 't' && p[1] == 'r')
4870 || (p[0] == 'q' && memCHRs("qwxr", p[1]))));
4874 S_check_scalar_slice(pTHX_ char *s)
4877 while (SPACE_OR_TAB(*s)) s++;
4878 if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2,
4884 while ( isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
4885 || (*s && memCHRs(" \t$#+-'\"", *s)))
4887 s += UTF ? UTF8SKIP(s) : 1;
4889 if (*s == '}' || *s == ']')
4890 pl_yylval.ival = OPpSLICEWARNING;
4893 #define lex_token_boundary() S_lex_token_boundary(aTHX)
4895 S_lex_token_boundary(pTHX)
4897 PL_oldoldbufptr = PL_oldbufptr;
4898 PL_oldbufptr = PL_bufptr;
4901 #define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s)
4903 S_vcs_conflict_marker(pTHX_ char *s)
4905 lex_token_boundary();
4907 yyerror("Version control conflict marker");
4908 while (s < PL_bufend && *s != '\n')
4914 yyl_sigvar(pTHX_ char *s)
4916 /* we expect the sigil and optional var name part of a
4917 * signature element here. Since a '$' is not necessarily
4918 * followed by a var name, handle it specially here; the general
4919 * yylex code would otherwise try to interpret whatever follows
4920 * as a var; e.g. ($, ...) would be seen as the var '$,'
4927 PL_bufptr = s; /* for error reporting */
4932 /* spot stuff that looks like an prototype */
4933 if (memCHRs("$:@%&*;\\[]", *s)) {
4934 yyerror("Illegal character following sigil in a subroutine signature");
4937 /* '$#' is banned, while '$ # comment' isn't */
4939 yyerror("'#' not allowed immediately following a sigil in a subroutine signature");
4943 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
4944 char *dest = PL_tokenbuf + 1;
4945 /* read var name, including sigil, into PL_tokenbuf */
4946 PL_tokenbuf[0] = sigil;
4947 parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
4948 0, cBOOL(UTF), FALSE, FALSE);
4950 assert(PL_tokenbuf[1]); /* we have a variable name */
4958 /* parse the = for the default ourselves to avoid '+=' etc being accepted here
4959 * as the ASSIGNOP, and exclude other tokens that start with =
4961 if (*s == '=' && (!s[1] || memCHRs("=~>", s[1]) == 0)) {
4962 /* save now to report with the same context as we did when
4963 * all ASSIGNOPS were accepted */
4967 NEXTVAL_NEXTTOKE.ival = 0;
4968 force_next(ASSIGNOP);
4971 else if (*s == ',' || *s == ')') {
4972 PL_expect = XOPERATOR;
4975 /* make sure the context shows the unexpected character and
4976 * hopefully a bit more */
4978 while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
4980 PL_bufptr = s; /* for error reporting */
4981 yyerror("Illegal operator following parameter in a subroutine signature");
4985 NEXTVAL_NEXTTOKE.ival = sigil;
4986 force_next('p'); /* force a signature pending identifier */
4993 case ',': /* handle ($a,,$b) */
4998 yyerror("A signature parameter must start with '$', '@' or '%'");
4999 /* very crude error recovery: skip to likely next signature
5001 while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5007 case ',': TOKEN (PERLY_COMMA);
5008 case '@': TOKEN (PERLY_SNAIL);
5009 default: TOKEN (sigil);
5014 yyl_dollar(pTHX_ char *s)
5018 if (PL_expect == XPOSTDEREF) {
5021 POSTDEREF(DOLSHARP);
5027 && ( isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
5028 || memCHRs("{$:+-@", s[2])))
5030 PL_tokenbuf[0] = '@';
5031 s = scan_ident(s + 1, PL_tokenbuf + 1,
5032 sizeof PL_tokenbuf - 1, FALSE);
5033 if (PL_expect == XOPERATOR) {
5035 if (PL_bufptr > s) {
5037 PL_bufptr = PL_oldbufptr;
5039 no_op("Array length", d);
5041 if (!PL_tokenbuf[1])
5043 PL_expect = XOPERATOR;
5044 force_ident_maybe_lex('#');
5048 PL_tokenbuf[0] = '$';
5049 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5050 if (PL_expect == XOPERATOR) {
5052 if (PL_bufptr > s) {
5054 PL_bufptr = PL_oldbufptr;
5058 if (!PL_tokenbuf[1]) {
5060 yyerror("Final $ should be \\$ or $name");
5065 const char tmp = *s;
5066 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5069 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5070 && intuit_more(s, PL_bufend)) {
5072 PL_tokenbuf[0] = '@';
5073 if (ckWARN(WARN_SYNTAX)) {
5076 while ( t < PL_bufend ) {
5078 do { t += UTF ? UTF8SKIP(t) : 1; } while (t < PL_bufend && isSPACE(*t));
5079 /* consumed one or more space chars */
5080 } else if (*t == '$' || *t == '@') {
5081 /* could be more than one '$' like $$ref or @$ref */
5082 do { t++; } while (t < PL_bufend && *t == '$');
5084 /* could be an abigail style identifier like $ foo */
5085 while (t < PL_bufend && *t == ' ') t++;
5087 /* strip off the name of the var */
5088 while (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
5089 t += UTF ? UTF8SKIP(t) : 1;
5090 /* consumed a varname */
5091 } else if (isDIGIT(*t)) {
5092 /* deal with hex constants like 0x11 */
5093 if (t[0] == '0' && t[1] == 'x') {
5095 while (t < PL_bufend && isXDIGIT(*t)) t++;
5097 /* deal with decimal/octal constants like 1 and 0123 */
5098 do { t++; } while (isDIGIT(*t));
5099 if (t<PL_bufend && *t == '.') {
5100 do { t++; } while (isDIGIT(*t));
5103 /* consumed a number */
5105 /* not a var nor a space nor a number */
5109 if (t < PL_bufend && *t++ == ',') {
5110 PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
5111 while (t < PL_bufend && *t != ']')
5113 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5114 "Multidimensional syntax %" UTF8f " not supported",
5115 UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
5119 else if (*s == '{') {
5121 PL_tokenbuf[0] = '%';
5122 if ( strEQ(PL_tokenbuf+1, "SIG")
5123 && ckWARN(WARN_SYNTAX)
5124 && (t = (char *) memchr(s, '}', PL_bufend - s))
5125 && (t = (char *) memchr(t, '=', PL_bufend - t)))
5127 char tmpbuf[sizeof PL_tokenbuf];
5130 } while (isSPACE(*t));
5131 if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
5133 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
5138 && get_cvn_flags(tmpbuf, len, UTF
5142 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5143 "You need to quote \"%" UTF8f "\"",
5144 UTF8fARG(UTF, len, tmpbuf));
5151 PL_expect = XOPERATOR;
5152 if ((PL_lex_state == LEX_NORMAL || PL_lex_brackets) && isSPACE((char)tmp)) {
5153 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5154 if (!islop || PL_last_lop_op == OP_GREPSTART)
5155 PL_expect = XOPERATOR;
5156 else if (memCHRs("$@\"'`q", *s))
5157 PL_expect = XTERM; /* e.g. print $fh "foo" */
5158 else if ( memCHRs("&*<%", *s)
5159 && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
5161 PL_expect = XTERM; /* e.g. print $fh &sub */
5163 else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5164 char tmpbuf[sizeof PL_tokenbuf];
5167 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5168 if ((t2 = keyword(tmpbuf, len, 0))) {
5169 /* binary operators exclude handle interpretations */
5181 PL_expect = XTERM; /* e.g. print $fh length() */
5186 PL_expect = XTERM; /* e.g. print $fh subr() */
5189 else if (isDIGIT(*s))
5190 PL_expect = XTERM; /* e.g. print $fh 3 */
5191 else if (*s == '.' && isDIGIT(s[1]))
5192 PL_expect = XTERM; /* e.g. print $fh .3 */
5193 else if ((*s == '?' || *s == '-' || *s == '+')
5194 && !isSPACE(s[1]) && s[1] != '=')
5195 PL_expect = XTERM; /* e.g. print $fh -1 */
5196 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5198 PL_expect = XTERM; /* e.g. print $fh /.../
5199 XXX except DORDOR operator
5201 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5203 PL_expect = XTERM; /* print $fh <<"EOF" */
5206 force_ident_maybe_lex('$');
5211 yyl_sub(pTHX_ char *s, const int key)
5213 char * const tmpbuf = PL_tokenbuf + 1;
5214 bool have_name, have_proto;
5216 SV *format_name = NULL;
5217 bool is_sigsub = FEATURE_SIGNATURES_IS_ENABLED;
5219 SSize_t off = s-SvPVX(PL_linestr);
5222 s = skipspace(s); /* can move PL_linestr */
5224 d = SvPVX(PL_linestr)+off;
5226 SAVEBOOL(PL_parser->sig_seen);
5227 PL_parser->sig_seen = FALSE;
5229 if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
5231 || (*s == ':' && s[1] == ':'))
5234 PL_expect = XATTRBLOCK;
5235 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
5237 if (key == KEY_format)
5238 format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
5240 if (memchr(tmpbuf, ':', len) || key != KEY_sub
5242 PL_tokenbuf, len + 1, 0
5244 sv_setpvn(PL_subname, tmpbuf, len);
5246 sv_setsv(PL_subname,PL_curstname);
5247 sv_catpvs(PL_subname,"::");
5248 sv_catpvn(PL_subname,tmpbuf,len);
5250 if (SvUTF8(PL_linestr))
5251 SvUTF8_on(PL_subname);
5257 if (key == KEY_my || key == KEY_our || key==KEY_state) {
5259 /* diag_listed_as: Missing name in "%s sub" */
5261 "Missing name in \"%s\"", PL_bufptr);
5263 PL_expect = XATTRTERM;
5264 sv_setpvs(PL_subname,"?");
5268 if (key == KEY_format) {
5270 NEXTVAL_NEXTTOKE.opval
5271 = newSVOP(OP_CONST,0, format_name);
5272 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
5273 force_next(BAREWORD);
5278 /* Look for a prototype */
5279 if (*s == '(' && !is_sigsub) {
5280 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5282 Perl_croak(aTHX_ "Prototype not terminated");
5283 COPLINE_SET_FROM_MULTI_END;
5284 (void)validate_proto(PL_subname, PL_lex_stuff,
5285 ckWARN(WARN_ILLEGALPROTO), 0);
5293 if ( !(*s == ':' && s[1] != ':')
5294 && (*s != '{' && *s != '(') && key != KEY_format)
5296 assert(key == KEY_sub || key == KEY_AUTOLOAD ||
5297 key == KEY_DESTROY || key == KEY_BEGIN ||
5298 key == KEY_UNITCHECK || key == KEY_CHECK ||
5299 key == KEY_INIT || key == KEY_END ||
5300 key == KEY_my || key == KEY_state ||
5303 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5304 else if (*s != ';' && *s != '}')
5305 Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname));
5309 NEXTVAL_NEXTTOKE.opval =
5310 newSVOP(OP_CONST, 0, PL_lex_stuff);
5311 PL_lex_stuff = NULL;
5316 sv_setpvs(PL_subname, "__ANON__");
5318 sv_setpvs(PL_subname, "__ANON__::__ANON__");
5324 force_ident_maybe_lex('&');
5332 yyl_interpcasemod(pTHX_ char *s)
5335 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
5337 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
5338 PL_bufptr, PL_bufend, *PL_bufptr);
5341 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
5343 if (PL_lex_casemods) {
5344 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
5345 PL_lex_casestack[PL_lex_casemods] = '\0';
5347 if (PL_bufptr != PL_bufend
5348 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
5349 || oldmod == 'F')) {
5351 PL_lex_state = LEX_INTERPCONCAT;
5353 PL_lex_allbrackets--;
5356 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
5357 /* Got an unpaired \E */
5358 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
5359 "Useless use of \\E");
5361 if (PL_bufptr != PL_bufend)
5363 PL_lex_state = LEX_INTERPCONCAT;
5368 PerlIO_printf(Perl_debug_log, "### Saw case modifier\n");
5371 if (s[1] == '\\' && s[2] == 'E') {
5373 PL_lex_state = LEX_INTERPCONCAT;
5378 if ( memBEGINs(s, (STRLEN) (PL_bufend - s), "L\\u")
5379 || memBEGINs(s, (STRLEN) (PL_bufend - s), "U\\l"))
5381 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
5383 if ((*s == 'L' || *s == 'U' || *s == 'F')
5384 && (strpbrk(PL_lex_casestack, "LUF")))
5386 PL_lex_casestack[--PL_lex_casemods] = '\0';
5387 PL_lex_allbrackets--;
5390 if (PL_lex_casemods > 10)
5391 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
5392 PL_lex_casestack[PL_lex_casemods++] = *s;
5393 PL_lex_casestack[PL_lex_casemods] = '\0';
5394 PL_lex_state = LEX_INTERPCONCAT;
5395 NEXTVAL_NEXTTOKE.ival = 0;
5396 force_next((2<<24)|'(');
5398 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
5400 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
5402 NEXTVAL_NEXTTOKE.ival = OP_LC;
5404 NEXTVAL_NEXTTOKE.ival = OP_UC;
5406 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
5408 NEXTVAL_NEXTTOKE.ival = OP_FC;
5410 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
5414 if (PL_lex_starts) {
5417 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5418 if (PL_lex_casemods == 1 && PL_lex_inpat)
5421 AopNOASSIGN(OP_CONCAT);
5429 yyl_secondclass_keyword(pTHX_ char *s, STRLEN len, int key, I32 *orig_keyword,
5430 GV **pgv, GV ***pgvp)
5432 GV *ogv = NULL; /* override (winner) */
5433 GV *hgv = NULL; /* hidden (loser) */
5436 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5438 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5439 (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
5441 && (cv = GvCVu(gv)))
5443 if (GvIMPORTED_CV(gv))
5445 else if (! CvMETHOD(cv))
5449 && (*pgvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf, len, FALSE))
5451 && (isGV_with_GP(gv)
5452 ? GvCVu(gv) && GvIMPORTED_CV(gv)
5453 : SvPCS_IMPORTED(gv)
5454 && (gv_init(gv, PL_globalstash, PL_tokenbuf,
5464 *orig_keyword = key;
5465 return 0; /* overridden by import or by GLOBAL */
5467 else if (gv && !*pgvp
5468 && -key==KEY_lock /* XXX generalizable kludge */
5471 return 0; /* any sub overrides "weak" keyword */
5473 else { /* no override */
5475 if (key == KEY_dump) {
5476 Perl_croak(aTHX_ "dump() must be written as CORE::dump() as of Perl 5.30");
5480 if (hgv && key != KEY_x) /* never ambiguous */
5481 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5482 "Ambiguous call resolved as CORE::%s(), "
5483 "qualify as such or use &",
5490 yyl_qw(pTHX_ char *s, STRLEN len)
5494 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5496 missingterm(NULL, 0);
5498 COPLINE_SET_FROM_MULTI_END;
5499 PL_expect = XOPERATOR;
5500 if (SvCUR(PL_lex_stuff)) {
5501 int warned_comma = !ckWARN(WARN_QW);
5502 int warned_comment = warned_comma;
5503 char *d = SvPV_force(PL_lex_stuff, len);
5505 for (; isSPACE(*d) && len; --len, ++d)
5510 if (!warned_comma || !warned_comment) {
5511 for (; !isSPACE(*d) && len; --len, ++d) {
5512 if (!warned_comma && *d == ',') {
5513 Perl_warner(aTHX_ packWARN(WARN_QW),
5514 "Possible attempt to separate words with commas");
5517 else if (!warned_comment && *d == '#') {
5518 Perl_warner(aTHX_ packWARN(WARN_QW),
5519 "Possible attempt to put comments in qw() list");
5525 for (; !isSPACE(*d) && len; --len, ++d)
5528 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
5529 words = op_append_elem(OP_LIST, words,
5530 newSVOP(OP_CONST, 0, tokeq(sv)));
5535 words = newNULLLIST();
5536 SvREFCNT_dec_NN(PL_lex_stuff);
5537 PL_lex_stuff = NULL;
5538 PL_expect = XOPERATOR;
5539 pl_yylval.opval = sawparens(words);
5544 yyl_hyphen(pTHX_ char *s)
5546 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5554 while (s < PL_bufend && SPACE_OR_TAB(*s))
5557 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=>")) {
5558 s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE);
5559 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5560 OPERATOR(PERLY_MINUS); /* unary minus */
5563 case 'r': ftst = OP_FTEREAD; break;
5564 case 'w': ftst = OP_FTEWRITE; break;
5565 case 'x': ftst = OP_FTEEXEC; break;
5566 case 'o': ftst = OP_FTEOWNED; break;
5567 case 'R': ftst = OP_FTRREAD; break;
5568 case 'W': ftst = OP_FTRWRITE; break;
5569 case 'X': ftst = OP_FTREXEC; break;
5570 case 'O': ftst = OP_FTROWNED; break;
5571 case 'e': ftst = OP_FTIS; break;
5572 case 'z': ftst = OP_FTZERO; break;
5573 case 's': ftst = OP_FTSIZE; break;
5574 case 'f': ftst = OP_FTFILE; break;
5575 case 'd': ftst = OP_FTDIR; break;
5576 case 'l': ftst = OP_FTLINK; break;
5577 case 'p': ftst = OP_FTPIPE; break;
5578 case 'S': ftst = OP_FTSOCK; break;
5579 case 'u': ftst = OP_FTSUID; break;
5580 case 'g': ftst = OP_FTSGID; break;
5581 case 'k': ftst = OP_FTSVTX; break;
5582 case 'b': ftst = OP_FTBLK; break;
5583 case 'c': ftst = OP_FTCHR; break;
5584 case 't': ftst = OP_FTTTY; break;
5585 case 'T': ftst = OP_FTTEXT; break;
5586 case 'B': ftst = OP_FTBINARY; break;
5587 case 'M': case 'A': case 'C':
5588 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5590 case 'M': ftst = OP_FTMTIME; break;
5591 case 'A': ftst = OP_FTATIME; break;
5592 case 'C': ftst = OP_FTCTIME; break;
5600 PL_last_uni = PL_oldbufptr;
5601 PL_last_lop_op = (OPCODE)ftst;
5603 PerlIO_printf(Perl_debug_log, "### Saw file test %c\n", (int)tmp);
5608 /* Assume it was a minus followed by a one-letter named
5609 * subroutine call (or a -bareword), then. */
5611 PerlIO_printf(Perl_debug_log,
5612 "### '-%c' looked like a file test but was not\n",
5619 const char tmp = *s++;
5622 if (PL_expect == XOPERATOR)
5627 else if (*s == '>') {
5630 if (((*s == '$' || *s == '&') && s[1] == '*')
5631 ||(*s == '$' && s[1] == '#' && s[2] == '*')
5632 ||((*s == '@' || *s == '%') && memCHRs("*[{", s[1]))
5633 ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5636 PL_expect = XPOSTDEREF;
5639 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5640 s = force_word(s,METHOD,FALSE,TRUE);
5648 if (PL_expect == XOPERATOR) {
5650 && !PL_lex_allbrackets
5651 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5659 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5661 OPERATOR(PERLY_MINUS); /* unary minus */
5667 yyl_plus(pTHX_ char *s)
5669 const char tmp = *s++;
5672 if (PL_expect == XOPERATOR)
5677 if (PL_expect == XOPERATOR) {
5679 && !PL_lex_allbrackets
5680 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5688 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5690 OPERATOR(PERLY_PLUS);
5695 yyl_star(pTHX_ char *s)
5697 if (PL_expect == XPOSTDEREF)
5700 if (PL_expect != XOPERATOR) {
5701 s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5702 PL_expect = XOPERATOR;
5703 force_ident(PL_tokenbuf, '*');
5712 if (*s == '=' && !PL_lex_allbrackets
5713 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5722 && !PL_lex_allbrackets
5723 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5733 yyl_percent(pTHX_ char *s)
5735 if (PL_expect == XOPERATOR) {
5737 && !PL_lex_allbrackets
5738 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5745 else if (PL_expect == XPOSTDEREF)
5748 PL_tokenbuf[0] = '%';
5749 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5751 if (!PL_tokenbuf[1]) {
5754 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5755 && intuit_more(s, PL_bufend)) {
5757 PL_tokenbuf[0] = '@';
5759 PL_expect = XOPERATOR;
5760 force_ident_maybe_lex('%');
5765 yyl_caret(pTHX_ char *s)
5768 const bool bof = cBOOL(FEATURE_BITWISE_IS_ENABLED);
5769 if (bof && s[1] == '.')
5771 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5772 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5778 BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
5782 yyl_colon(pTHX_ char *s)
5786 switch (PL_expect) {
5788 if (!PL_in_my || (PL_lex_state != LEX_NORMAL && !PL_lex_brackets))
5790 PL_bufptr = s; /* update in case we back off */
5793 "Use of := for an empty attribute list is not allowed");
5800 PL_expect = XTERMBLOCK;
5802 /* NB: as well as parsing normal attributes, we also end up
5803 * here if there is something looking like attributes
5804 * following a signature (which is illegal, but used to be
5805 * legal in 5.20..5.26). If the latter, we still parse the
5806 * attributes so that error messages(s) are less confusing,
5807 * but ignore them (parser->sig_seen).
5811 while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5812 bool sig = PL_parser->sig_seen;
5816 char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5817 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5818 if (tmp < 0) tmp = -tmp;
5833 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5835 d = scan_str(d,TRUE,TRUE,FALSE,NULL);
5840 Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list");
5842 COPLINE_SET_FROM_MULTI_END;
5845 sv_catsv(sv, PL_lex_stuff);
5846 attrs = op_append_elem(OP_LIST, attrs,
5847 newSVOP(OP_CONST, 0, sv));
5848 SvREFCNT_dec_NN(PL_lex_stuff);
5849 PL_lex_stuff = NULL;
5852 /* NOTE: any CV attrs applied here need to be part of
5853 the CVf_BUILTIN_ATTRS define in cv.h! */
5854 if (!PL_in_my && memEQs(SvPVX(sv), len, "lvalue")) {
5857 CvLVALUE_on(PL_compcv);
5859 else if (!PL_in_my && memEQs(SvPVX(sv), len, "method")) {
5862 CvMETHOD_on(PL_compcv);
5864 else if (!PL_in_my && memEQs(SvPVX(sv), len, "const")) {
5867 Perl_ck_warner_d(aTHX_
5868 packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
5869 ":const is experimental"
5871 CvANONCONST_on(PL_compcv);
5872 if (!CvANON(PL_compcv))
5873 yyerror(":const is not permitted on named "
5877 /* After we've set the flags, it could be argued that
5878 we don't need to do the attributes.pm-based setting
5879 process, and shouldn't bother appending recognized
5880 flags. To experiment with that, uncomment the
5881 following "else". (Note that's already been
5882 uncommented. That keeps the above-applied built-in
5883 attributes from being intercepted (and possibly
5884 rejected) by a package's attribute routines, but is
5885 justified by the performance win for the common case
5886 of applying only built-in attributes.) */
5888 attrs = op_append_elem(OP_LIST, attrs,
5889 newSVOP(OP_CONST, 0,
5893 if (*s == ':' && s[1] != ':')
5896 break; /* require real whitespace or :'s */
5897 /* XXX losing whitespace on sequential attributes here */
5902 && !(PL_expect == XOPERATOR
5903 ? (*s == '=' || *s == ')')
5904 : (*s == '{' || *s == '(')))
5906 const char q = ((*s == '\'') ? '"' : '\'');
5907 /* If here for an expression, and parsed no attrs, back off. */
5908 if (PL_expect == XOPERATOR && !attrs) {
5912 /* MUST advance bufptr here to avoid bogus "at end of line"
5913 context messages from yyerror().
5916 yyerror( (const char *)
5918 ? Perl_form(aTHX_ "Invalid separator character "
5919 "%c%c%c in attribute list", q, *s, q)
5920 : "Unterminated attribute list" ) );
5923 OPERATOR(PERLY_COLON);
5927 if (PL_parser->sig_seen) {
5928 /* see comment about about sig_seen and parser error
5932 Perl_croak(aTHX_ "Subroutine attributes must come "
5933 "before the signature");
5936 NEXTVAL_NEXTTOKE.opval = attrs;
5942 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5947 PL_lex_allbrackets--;
5948 OPERATOR(PERLY_COLON);
5952 yyl_subproto(pTHX_ char *s, CV *cv)
5954 STRLEN protolen = CvPROTOLEN(cv);
5955 const char *proto = CvPROTO(cv);
5958 proto = S_strip_spaces(aTHX_ proto, &protolen);
5961 if ((optional = *proto == ';')) {
5964 } while (*proto == ';');
5970 *proto == '$' || *proto == '_'
5971 || *proto == '*' || *proto == '+'
5976 *proto == '\\' && proto[1] && proto[2] == '\0'
5979 UNIPROTO(UNIOPSUB,optional);
5982 if (*proto == '\\' && proto[1] == '[') {
5983 const char *p = proto + 2;
5984 while(*p && *p != ']')
5986 if(*p == ']' && !p[1])
5987 UNIPROTO(UNIOPSUB,optional);
5990 if (*proto == '&' && *s == '{') {
5992 sv_setpvs(PL_subname, "__ANON__");
5994 sv_setpvs(PL_subname, "__ANON__::__ANON__");
5995 if (!PL_lex_allbrackets
5996 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
5998 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6007 yyl_leftcurly(pTHX_ char *s, const U8 formbrack)
6010 if (PL_lex_brackets > 100) {
6011 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6014 switch (PL_expect) {
6017 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6018 PL_lex_allbrackets++;
6019 OPERATOR(HASHBRACK);
6021 while (s < PL_bufend && SPACE_OR_TAB(*s))
6024 PL_tokenbuf[0] = '\0';
6025 if (d < PL_bufend && *d == '-') {
6026 PL_tokenbuf[0] = '-';
6028 while (d < PL_bufend && SPACE_OR_TAB(*d))
6031 if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
6033 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6035 while (d < PL_bufend && SPACE_OR_TAB(*d))
6038 const char minus = (PL_tokenbuf[0] == '-');
6039 s = force_word(s + minus, BAREWORD, FALSE, TRUE);
6041 force_next(PERLY_MINUS);
6047 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6048 PL_lex_allbrackets++;
6053 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6054 PL_lex_allbrackets++;
6058 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6059 PL_lex_allbrackets++;
6064 if (PL_oldoldbufptr == PL_last_lop)
6065 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6067 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6068 PL_lex_allbrackets++;
6071 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6073 /* This hack is to get the ${} in the message. */
6075 yyerror("syntax error");
6078 OPERATOR(HASHBRACK);
6080 if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
6081 /* ${...} or @{...} etc., but not print {...}
6082 * Skip the disambiguation and treat this as a block.
6084 goto block_expectation;
6086 /* This hack serves to disambiguate a pair of curlies
6087 * as being a block or an anon hash. Normally, expectation
6088 * determines that, but in cases where we're not in a
6089 * position to expect anything in particular (like inside
6090 * eval"") we have to resolve the ambiguity. This code
6091 * covers the case where the first term in the curlies is a
6092 * quoted string. Most other cases need to be explicitly
6093 * disambiguated by prepending a "+" before the opening
6094 * curly in order to force resolution as an anon hash.
6096 * XXX should probably propagate the outer expectation
6097 * into eval"" to rely less on this hack, but that could
6098 * potentially break current behavior of eval"".
6102 if (*s == '\'' || *s == '"' || *s == '`') {
6103 /* common case: get past first string, handling escapes */
6104 for (t++; t < PL_bufend && *t != *s;)
6109 else if (*s == 'q') {
6112 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6113 && !isWORDCHAR(*t))))
6115 /* skip q//-like construct */
6117 char open, close, term;
6120 while (t < PL_bufend && isSPACE(*t))
6122 /* check for q => */
6123 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6124 OPERATOR(HASHBRACK);
6128 if (term && (tmps = memCHRs("([{< )]}> )]}>",term)))
6132 for (t++; t < PL_bufend; t++) {
6133 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6135 else if (*t == open)
6139 for (t++; t < PL_bufend; t++) {
6140 if (*t == '\\' && t+1 < PL_bufend)
6142 else if (*t == close && --brackets <= 0)
6144 else if (*t == open)
6151 /* skip plain q word */
6152 while ( t < PL_bufend
6153 && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6155 t += UTF ? UTF8SKIP(t) : 1;
6158 else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
6159 t += UTF ? UTF8SKIP(t) : 1;
6160 while ( t < PL_bufend
6161 && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6163 t += UTF ? UTF8SKIP(t) : 1;
6166 while (t < PL_bufend && isSPACE(*t))
6168 /* if comma follows first term, call it an anon hash */
6169 /* XXX it could be a comma expression with loop modifiers */
6170 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6171 || (*t == '=' && t[1] == '>')))
6172 OPERATOR(HASHBRACK);
6173 if (PL_expect == XREF) {
6175 /* If there is an opening brace or 'sub:', treat it
6176 as a term to make ${{...}}{k} and &{sub:attr...}
6177 dwim. Otherwise, treat it as a statement, so
6178 map {no strict; ...} works.
6185 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "sub")) {
6198 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6205 pl_yylval.ival = CopLINE(PL_curcop);
6206 PL_copline = NOLINE; /* invalidate current command line number */
6207 TOKEN(formbrack ? PERLY_EQUAL_SIGN : PERLY_BRACE_OPEN);
6211 yyl_rightcurly(pTHX_ char *s, const U8 formbrack)
6213 assert(s != PL_bufend);
6216 if (PL_lex_brackets <= 0)
6217 /* diag_listed_as: Unmatched right %s bracket */
6218 yyerror("Unmatched right curly bracket");
6220 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6222 PL_lex_allbrackets--;
6224 if (PL_lex_state == LEX_INTERPNORMAL) {
6225 if (PL_lex_brackets == 0) {
6226 if (PL_expect & XFAKEBRACK) {
6227 PL_expect &= XENUMMASK;
6228 PL_lex_state = LEX_INTERPEND;
6230 return yylex(); /* ignore fake brackets */
6232 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6233 && SvEVALED(PL_lex_repl))
6234 PL_lex_state = LEX_INTERPEND;
6235 else if (*s == '-' && s[1] == '>')
6236 PL_lex_state = LEX_INTERPENDMAYBE;
6237 else if (*s != '[' && *s != '{')
6238 PL_lex_state = LEX_INTERPEND;
6242 if (PL_expect & XFAKEBRACK) {
6243 PL_expect &= XENUMMASK;
6245 return yylex(); /* ignore fake brackets */
6248 force_next(formbrack ? PERLY_DOT : PERLY_BRACE_CLOSE);
6249 if (formbrack) LEAVE_with_name("lex_format");
6250 if (formbrack == 2) { /* means . where arguments were expected */
6251 force_next(PERLY_SEMICOLON);
6255 TOKEN(PERLY_SEMICOLON);
6259 yyl_ampersand(pTHX_ char *s)
6261 if (PL_expect == XPOSTDEREF)
6262 POSTDEREF(PERLY_AMPERSAND);
6266 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6267 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6275 if (PL_expect == XOPERATOR) {
6278 if ( PL_bufptr == PL_linestart
6279 && ckWARN(WARN_SEMICOLON)
6280 && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
6282 CopLINE_dec(PL_curcop);
6283 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6284 CopLINE_inc(PL_curcop);
6287 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6289 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6290 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6296 BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
6301 PL_tokenbuf[0] = '&';
6302 s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
6303 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6306 force_ident_maybe_lex('&');
6308 PREREF(PERLY_AMPERSAND);
6310 TERM(PERLY_AMPERSAND);
6314 yyl_verticalbar(pTHX_ char *s)
6321 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6322 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6331 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6334 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6335 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6340 BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
6344 yyl_bang(pTHX_ char *s)
6346 const char tmp = *s++;
6348 /* was this !=~ where !~ was meant?
6349 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6351 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6352 const char *t = s+1;
6354 while (t < PL_bufend && isSPACE(*t))
6357 if (*t == '/' || *t == '?'
6358 || ((*t == 'm' || *t == 's' || *t == 'y')
6359 && !isWORDCHAR(t[1]))
6360 || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6361 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6362 "!=~ should be !~");
6365 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6377 OPERATOR(PERLY_EXCLAMATION_MARK);
6381 yyl_snail(pTHX_ char *s)
6383 if (PL_expect == XPOSTDEREF)
6384 POSTDEREF(PERLY_SNAIL);
6385 PL_tokenbuf[0] = '@';
6386 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6387 if (PL_expect == XOPERATOR) {
6389 if (PL_bufptr > s) {
6391 PL_bufptr = PL_oldbufptr;
6396 if (!PL_tokenbuf[1]) {
6397 PREREF(PERLY_SNAIL);
6399 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6401 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6402 && intuit_more(s, PL_bufend))
6405 PL_tokenbuf[0] = '%';
6407 /* Warn about @ where they meant $. */
6408 if (*s == '[' || *s == '{') {
6409 if (ckWARN(WARN_SYNTAX)) {
6410 S_check_scalar_slice(aTHX_ s);
6414 PL_expect = XOPERATOR;
6415 force_ident_maybe_lex('@');
6420 yyl_slash(pTHX_ char *s)
6422 if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6423 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6424 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6429 else if (PL_expect == XOPERATOR) {
6431 if (*s == '=' && !PL_lex_allbrackets
6432 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6440 /* Disable warning on "study /blah/" */
6441 if ( PL_oldoldbufptr == PL_last_uni
6442 && ( *PL_last_uni != 's' || s - PL_last_uni < 5
6443 || memNE(PL_last_uni, "study", 5)
6444 || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF)
6447 s = scan_pat(s,OP_MATCH);
6448 TERM(sublex_start());
6453 yyl_leftsquare(pTHX_ char *s)
6455 if (PL_lex_brackets > 100)
6456 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6457 PL_lex_brackstack[PL_lex_brackets++] = 0;
6458 PL_lex_allbrackets++;
6460 OPERATOR(PERLY_BRACKET_OPEN);
6464 yyl_rightsquare(pTHX_ char *s)
6466 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6469 if (PL_lex_brackets <= 0)
6470 /* diag_listed_as: Unmatched right %s bracket */
6471 yyerror("Unmatched right square bracket");
6474 PL_lex_allbrackets--;
6475 if (PL_lex_state == LEX_INTERPNORMAL) {
6476 if (PL_lex_brackets == 0) {
6477 if (*s == '-' && s[1] == '>')
6478 PL_lex_state = LEX_INTERPENDMAYBE;
6479 else if (*s != '[' && *s != '{')
6480 PL_lex_state = LEX_INTERPEND;
6483 TERM(PERLY_BRACKET_CLOSE);
6487 yyl_tilde(pTHX_ char *s)
6490 if (s[1] == '~' && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)) {
6491 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6494 Perl_ck_warner_d(aTHX_
6495 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
6496 "Smartmatch is experimental");
6497 NCEop(OP_SMARTMATCH);
6500 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
6502 BCop(OP_SCOMPLEMENT);
6504 BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
6508 yyl_leftparen(pTHX_ char *s)
6510 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6511 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
6515 PL_lex_allbrackets++;
6520 yyl_rightparen(pTHX_ char *s)
6522 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6525 PL_lex_allbrackets--;
6533 yyl_leftpointy(pTHX_ char *s)
6537 if (PL_expect != XOPERATOR) {
6538 if (s[1] != '<' && !memchr(s,'>', PL_bufend - s))
6540 if (s[1] == '<' && s[2] != '>')
6541 s = scan_heredoc(s);
6543 s = scan_inputsymbol(s);
6544 PL_expect = XOPERATOR;
6545 TOKEN(sublex_start());
6552 if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6556 SHop(OP_LEFT_SHIFT);
6561 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6568 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6576 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6585 yyl_rightpointy(pTHX_ char *s)
6587 const char tmp = *s++;
6590 if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6594 SHop(OP_RIGHT_SHIFT);
6596 else if (tmp == '=') {
6597 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6605 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6614 yyl_sglquote(pTHX_ char *s)
6616 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6618 missingterm(NULL, 0);
6619 COPLINE_SET_FROM_MULTI_END;
6620 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6621 if (PL_expect == XOPERATOR) {
6624 pl_yylval.ival = OP_CONST;
6625 TERM(sublex_start());
6629 yyl_dblquote(pTHX_ char *s)
6633 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6636 printbuf("### Saw string before %s\n", s);
6638 PerlIO_printf(Perl_debug_log,
6639 "### Saw unterminated string\n");
6641 if (PL_expect == XOPERATOR) {
6645 missingterm(NULL, 0);
6646 pl_yylval.ival = OP_CONST;
6647 /* FIXME. I think that this can be const if char *d is replaced by
6648 more localised variables. */
6649 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6650 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6651 pl_yylval.ival = OP_STRINGIFY;
6655 if (pl_yylval.ival == OP_CONST)
6656 COPLINE_SET_FROM_MULTI_END;
6657 TERM(sublex_start());
6661 yyl_backtick(pTHX_ char *s)
6663 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6666 printbuf("### Saw backtick string before %s\n", s);
6668 PerlIO_printf(Perl_debug_log,
6669 "### Saw unterminated backtick string\n");
6671 if (PL_expect == XOPERATOR)
6672 no_op("Backticks",s);
6674 missingterm(NULL, 0);
6675 pl_yylval.ival = OP_BACKTICK;
6676 TERM(sublex_start());
6680 yyl_backslash(pTHX_ char *s)
6682 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr && isDIGIT(*s))
6683 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6685 if (PL_expect == XOPERATOR)
6686 no_op("Backslash",s);
6691 yyl_data_handle(pTHX)
6693 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
6696 GV *gv = (GV *)*hv_fetchs(stash, "DATA", 1);
6699 gv_init(gv,stash,"DATA",4,0);
6703 GvIOp(gv) = newIO();
6704 IoIFP(GvIOp(gv)) = PL_rsfp;
6706 /* Mark this internal pseudo-handle as clean */
6707 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6708 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6709 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6711 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6713 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6714 /* if the script was opened in binmode, we need to revert
6715 * it to text mode for compatibility; but only iff it has CRs
6716 * XXX this is a questionable hack at best. */
6717 if (PL_bufend-PL_bufptr > 2
6718 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6721 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6722 loc = PerlIO_tell(PL_rsfp);
6723 (void)PerlIO_seek(PL_rsfp, 0L, 0);
6725 if (PerlLIO_setmode(RSFP_FILENO, O_TEXT) != -1) {
6727 PerlIO_seek(PL_rsfp, loc, 0);
6732 #ifdef PERLIO_LAYERS
6735 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6742 PERL_STATIC_NO_RET void yyl_croak_unrecognised(pTHX_ char*)
6743 __attribute__noreturn__;
6745 PERL_STATIC_NO_RET void
6746 yyl_croak_unrecognised(pTHX_ char *s)
6748 SV *dsv = newSVpvs_flags("", SVs_TEMP);
6754 STRLEN skiplen = UTF8SKIP(s);
6755 STRLEN stravail = PL_bufend - s;
6756 c = sv_uni_display(dsv, newSVpvn_flags(s,
6757 skiplen > stravail ? stravail : skiplen,
6758 SVs_TEMP | SVf_UTF8),
6759 10, UNI_DISPLAY_ISPRINT);
6762 c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
6765 if (s >= PL_linestart) {
6769 /* somehow (probably due to a parse failure), PL_linestart has advanced
6770 * pass PL_bufptr, get a reasonable beginning of line
6773 while (d > SvPVX(PL_linestr) && d[-1] && d[-1] != '\n')
6776 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d);
6777 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
6778 d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT;
6781 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
6782 UTF8fARG(UTF, (s - d), d),
6787 yyl_require(pTHX_ char *s, I32 orig_keyword)
6791 s = force_version(s, FALSE);
6793 else if (*s != 'v' || !isDIGIT(s[1])
6794 || (s = force_version(s, TRUE), *s == 'v'))
6796 *PL_tokenbuf = '\0';
6797 s = force_word(s,BAREWORD,TRUE,TRUE);
6798 if (isIDFIRST_lazy_if_safe(PL_tokenbuf,
6799 PL_tokenbuf + sizeof(PL_tokenbuf),
6802 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
6803 GV_ADD | (UTF ? SVf_UTF8 : 0));
6806 yyerror("<> at require-statement should be quotes");
6809 if (orig_keyword == KEY_require)
6814 PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
6816 PL_last_uni = PL_oldbufptr;
6817 PL_last_lop_op = OP_REQUIRE;
6819 return REPORT( (int)REQUIRE );
6823 yyl_foreach(pTHX_ char *s)
6825 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6827 pl_yylval.ival = CopLINE(PL_curcop);
6829 if (PL_expect == XSTATE && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6831 SSize_t s_off = s - SvPVX(PL_linestr);
6834 if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "my") && isSPACE(p[2])) {
6837 else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "our") && isSPACE(p[3])) {
6842 /* skip optional package name, as in "for my abc $x (..)" */
6843 if (isIDFIRST_lazy_if_safe(p, PL_bufend, UTF)) {
6844 p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6847 if (*p != '$' && *p != '\\')
6848 Perl_croak(aTHX_ "Missing $ on loop variable");
6850 /* The buffer may have been reallocated, update s */
6851 s = SvPVX(PL_linestr) + s_off;
6857 yyl_do(pTHX_ char *s, I32 orig_keyword)
6866 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6868 if (len && memNEs(PL_tokenbuf+1, len, "CORE")
6869 && !keyword(PL_tokenbuf + 1, len, 0)) {
6870 SSize_t off = s-SvPVX(PL_linestr);
6872 s = SvPVX(PL_linestr)+off;
6874 force_ident_maybe_lex('&');
6879 if (orig_keyword == KEY_do)
6887 yyl_my(pTHX_ char *s, I32 my)
6891 yyerror(Perl_form(aTHX_
6892 "Can't redeclare \"%s\" in \"%s\"",
6893 my == KEY_my ? "my" :
6894 my == KEY_state ? "state" : "our",
6895 PL_in_my == KEY_my ? "my" :
6896 PL_in_my == KEY_state ? "state" : "our"));
6900 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6902 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6903 if (memEQs(PL_tokenbuf, len, "sub"))
6904 return yyl_sub(aTHX_ s, my);
6905 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
6906 if (!PL_in_my_stash) {
6910 i = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
6911 PERL_MY_SNPRINTF_POST_GUARD(i, sizeof(tmpbuf));
6912 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
6915 else if (*s == '\\') {
6916 if (!FEATURE_MYREF_IS_ENABLED)
6917 Perl_croak(aTHX_ "The experimental declared_refs "
6918 "feature is not enabled");
6919 Perl_ck_warner_d(aTHX_
6920 packWARN(WARN_EXPERIMENTAL__DECLARED_REFS),
6921 "Declaring references is experimental");
6926 static int yyl_try(pTHX_ char*);
6929 yyl_eol_needs_semicolon(pTHX_ char **ps)
6932 if (PL_lex_state != LEX_NORMAL
6933 || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
6935 const bool in_comment = *s == '#';
6937 if (*s == '#' && s == PL_linestart && PL_in_eval
6938 && !PL_rsfp && !PL_parser->filtered) {
6939 /* handle eval qq[#line 1 "foo"\n ...] */
6940 CopLINE_dec(PL_curcop);
6941 incline(s, PL_bufend);
6944 while (d < PL_bufend && *d != '\n')
6949 if (in_comment && d == PL_bufend
6950 && PL_lex_state == LEX_INTERPNORMAL
6951 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6952 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
6954 incline(s, PL_bufend);
6955 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
6956 PL_lex_state = LEX_FORMLINE;
6957 force_next(FORMRBRACK);
6963 while (s < PL_bufend && *s != '\n')
6965 if (s < PL_bufend) {
6968 incline(s, PL_bufend);
6976 yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s)
6984 bof = cBOOL(PL_rsfp);
6987 PL_bufptr = PL_bufend;
6988 COPLINE_INC_WITH_HERELINES;
6989 if (!lex_next_chunk(fake_eof)) {
6990 CopLINE_dec(PL_curcop);
6992 TOKEN(PERLY_SEMICOLON); /* not infinite loop because rsfp is NULL now */
6994 CopLINE_dec(PL_curcop);
6996 /* If it looks like the start of a BOM or raw UTF-16,
6997 * check if it in fact is. */
7000 || *(U8*)s == BOM_UTF8_FIRST_BYTE
7004 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
7005 bof = (offset == (Off_t)SvCUR(PL_linestr));
7006 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
7007 /* offset may include swallowed CR */
7009 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
7012 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7013 s = swallow_bom((U8*)s);
7016 if (PL_parser->in_pod) {
7017 /* Incest with pod. */
7018 if ( memBEGINPs(s, (STRLEN) (PL_bufend - s), "=cut")
7021 SvPVCLEAR(PL_linestr);
7022 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
7023 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7024 PL_last_lop = PL_last_uni = NULL;
7025 PL_parser->in_pod = 0;
7028 if (PL_rsfp || PL_parser->filtered)
7029 incline(s, PL_bufend);
7030 } while (PL_parser->in_pod);
7032 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
7033 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7034 PL_last_lop = PL_last_uni = NULL;
7035 if (CopLINE(PL_curcop) == 1) {
7036 while (s < PL_bufend && isSPACE(*s))
7038 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
7042 if (*s == '#' && *(s+1) == '!')
7044 #ifdef ALTERNATE_SHEBANG
7046 static char const as[] = ALTERNATE_SHEBANG;
7047 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
7048 d = s + (sizeof(as) - 1);
7050 #endif /* ALTERNATE_SHEBANG */
7059 while (*d && !isSPACE(*d))
7063 #ifdef ARG_ZERO_IS_SCRIPT
7064 if (ipathend > ipath) {
7066 * HP-UX (at least) sets argv[0] to the script name,
7067 * which makes $^X incorrect. And Digital UNIX and Linux,
7068 * at least, set argv[0] to the basename of the Perl
7069 * interpreter. So, having found "#!", we'll set it right.
7071 SV* copfilesv = CopFILESV(PL_curcop);
7074 GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
7076 assert(SvPOK(x) || SvGMAGICAL(x));
7077 if (sv_eq(x, copfilesv)) {
7078 sv_setpvn(x, ipath, ipathend - ipath);
7084 const char *bstart = SvPV_const(copfilesv, blen);
7085 const char * const lstart = SvPV_const(x, llen);
7087 bstart += blen - llen;
7088 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
7089 sv_setpvn(x, ipath, ipathend - ipath);
7096 /* Anything to do if no copfilesv? */
7098 TAINT_NOT; /* $^X is always tainted, but that's OK */
7100 #endif /* ARG_ZERO_IS_SCRIPT */
7105 d = instr(s,"perl -");
7107 d = instr(s,"perl");
7109 /* avoid getting into infinite loops when shebang
7110 * line contains "Perl" rather than "perl" */
7112 for (d = ipathend-4; d >= ipath; --d) {
7113 if (isALPHA_FOLD_EQ(*d, 'p')
7114 && !ibcmp(d, "perl", 4))
7124 #ifdef ALTERNATE_SHEBANG
7126 * If the ALTERNATE_SHEBANG on this system starts with a
7127 * character that can be part of a Perl expression, then if
7128 * we see it but not "perl", we're probably looking at the
7129 * start of Perl code, not a request to hand off to some
7130 * other interpreter. Similarly, if "perl" is there, but
7131 * not in the first 'word' of the line, we assume the line
7132 * contains the start of the Perl program.
7134 if (d && *s != '#') {
7135 const char *c = ipath;
7136 while (*c && !memCHRs("; \t\r\n\f\v#", *c))
7139 d = NULL; /* "perl" not in first word; ignore */
7141 *s = '#'; /* Don't try to parse shebang line */
7143 #endif /* ALTERNATE_SHEBANG */
7148 && !instr(s,"indir")
7149 && instr(PL_origargv[0],"perl"))
7155 while (s < PL_bufend && isSPACE(*s))
7157 if (s < PL_bufend) {
7158 Newx(newargv,PL_origargc+3,char*);
7160 while (s < PL_bufend && !isSPACE(*s))
7163 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
7166 newargv = PL_origargv;
7169 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
7171 Perl_croak(aTHX_ "Can't exec %s", ipath);
7174 while (*d && !isSPACE(*d))
7176 while (SPACE_OR_TAB(*d))
7180 const bool switches_done = PL_doswitches;
7181 const U32 oldpdb = PL_perldb;
7182 const bool oldn = PL_minus_n;
7183 const bool oldp = PL_minus_p;
7187 bool baduni = FALSE;
7189 const char *d2 = d1 + 1;
7190 if (parse_unicode_opts((const char **)&d2)
7194 if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
7195 const char * const m = d1;
7196 while (*d1 && !isSPACE(*d1))
7198 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
7201 d1 = moreswitches(d1);
7203 if (PL_doswitches && !switches_done) {
7204 int argc = PL_origargc;
7205 char **argv = PL_origargv;
7208 } while (argc && argv[0][0] == '-' && argv[0][1]);
7209 init_argv_symbols(argc,argv);
7211 if ( (PERLDB_LINE_OR_SAVESRC && !oldpdb)
7212 || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
7213 /* if we have already added "LINE: while (<>) {",
7214 we must not do it again */
7216 SvPVCLEAR(PL_linestr);
7217 PL_bufptr = PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
7218 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7219 PL_last_lop = PL_last_uni = NULL;
7220 PL_preambled = FALSE;
7221 if (PERLDB_LINE_OR_SAVESRC)
7222 (void)gv_fetchfile(PL_origfilename);
7230 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
7231 PL_lex_state = LEX_FORMLINE;
7232 force_next(FORMRBRACK);
7233 TOKEN(PERLY_SEMICOLON);
7241 yyl_fatcomma(pTHX_ char *s, STRLEN len)
7245 = newSVOP(OP_CONST, 0,
7246 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7247 pl_yylval.opval->op_private = OPpCONST_BARE;
7252 yyl_safe_bareword(pTHX_ char *s, const char lastchar)
7254 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7255 && PL_parser->saw_infix_sigil)
7257 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7258 "Operator or semicolon missing before %c%" UTF8f,
7260 UTF8fARG(UTF, strlen(PL_tokenbuf),
7262 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7263 "Ambiguous use of %c resolved as operator %c",
7264 lastchar, lastchar);
7270 yyl_constant_op(pTHX_ char *s, SV *sv, CV *cv, OP *rv2cv_op, PADOFFSET off)
7274 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7275 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7276 if (SvTYPE(sv) == SVt_PVAV)
7277 pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7280 pl_yylval.opval->op_private = 0;
7281 pl_yylval.opval->op_folded = 1;
7282 pl_yylval.opval->op_flags |= OPf_SPECIAL;
7287 op_free(pl_yylval.opval);
7289 off ? newCVREF(0, rv2cv_op) : rv2cv_op;
7290 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7291 PL_last_lop = PL_oldbufptr;
7292 PL_last_lop_op = OP_ENTERSUB;
7294 /* Is there a prototype? */
7296 int k = yyl_subproto(aTHX_ s, cv);
7301 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7303 force_next(off ? PRIVATEREF : BAREWORD);
7304 if (!PL_lex_allbrackets
7305 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7307 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7313 /* Honour "reserved word" warnings, and enforce strict subs */
7315 yyl_strictwarn_bareword(pTHX_ const char lastchar)
7317 /* after "print" and similar functions (corresponding to
7318 * "F? L" in opcode.pl), whatever wasn't already parsed as
7319 * a filehandle should be subject to "strict subs".
7320 * Likewise for the optional indirect-object argument to system
7321 * or exec, which can't be a bareword */
7322 if ((PL_last_lop_op == OP_PRINT
7323 || PL_last_lop_op == OP_PRTF
7324 || PL_last_lop_op == OP_SAY
7325 || PL_last_lop_op == OP_SYSTEM
7326 || PL_last_lop_op == OP_EXEC)
7327 && (PL_hints & HINT_STRICT_SUBS))
7329 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7332 if (lastchar != '-' && ckWARN(WARN_RESERVED)) {
7333 char *d = PL_tokenbuf;
7336 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0)) {
7337 /* PL_warn_reserved is constant */
7338 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
7339 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7341 GCC_DIAG_RESTORE_STMT;
7347 yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 orig_keyword, struct code c)
7350 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7352 bool no_op_error = FALSE;
7353 /* Use this var to track whether intuit_method has been
7354 called. intuit_method returns 0 or > 255. */
7357 if (PL_expect == XOPERATOR) {
7358 if (PL_bufptr == PL_linestart) {
7359 CopLINE_dec(PL_curcop);
7360 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7361 CopLINE_inc(PL_curcop);
7364 /* We want to call no_op with s pointing after the
7365 bareword, so defer it. But we want it to come
7366 before the Bad name croak. */
7370 /* Get the rest if it looks like a package qualifier */
7372 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7374 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7377 no_op("Bareword",s);
7378 no_op_error = FALSE;
7381 Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
7382 UTF8fARG(UTF, len, PL_tokenbuf),
7383 *s == '\'' ? "'" : "::");
7389 no_op("Bareword",s);
7391 /* See if the name is "Foo::",
7392 in which case Foo is a bareword
7393 (and a package name). */
7395 if (len > 2 && PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':') {
7396 if (ckWARN(WARN_BAREWORD)
7397 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7398 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7399 "Bareword \"%" UTF8f
7400 "\" refers to nonexistent package",
7401 UTF8fARG(UTF, len, PL_tokenbuf));
7403 PL_tokenbuf[len] = '\0';
7412 /* if we saw a global override before, get the right name */
7415 c.sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len);
7417 SV *sv = newSVpvs("CORE::GLOBAL::");
7423 /* Presume this is going to be a bareword of some sort. */
7425 pl_yylval.opval = newSVOP(OP_CONST, 0, c.sv);
7426 pl_yylval.opval->op_private = OPpCONST_BARE;
7428 /* And if "Foo::", then that's what it certainly is. */
7430 return yyl_safe_bareword(aTHX_ s, lastchar);
7433 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(c.sv));
7434 const_op->op_private = OPpCONST_BARE;
7435 c.rv2cv_op = newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
7439 : SvROK(c.gv) && SvTYPE(SvRV(c.gv)) == SVt_PVCV
7442 : rv2cv_op_cv(c.rv2cv_op, RV2CVOPCV_RETURN_STUB);
7445 /* See if it's the indirect object for a list operator. */
7448 && PL_oldoldbufptr < PL_bufptr
7449 && (PL_oldoldbufptr == PL_last_lop
7450 || PL_oldoldbufptr == PL_last_uni)
7451 && /* NO SKIPSPACE BEFORE HERE! */
7453 || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
7456 bool immediate_paren = *s == '(';
7459 /* (Now we can afford to cross potential line boundary.) */
7462 /* intuit_method() can indirectly call lex_next_chunk(),
7465 s_off = s - SvPVX(PL_linestr);
7466 /* Two barewords in a row may indicate method call. */
7467 if ( ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
7469 && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7471 /* the code at method: doesn't use s */
7474 s = SvPVX(PL_linestr) + s_off;
7476 /* If not a declared subroutine, it's an indirect object. */
7477 /* (But it's an indir obj regardless for sort.) */
7478 /* Also, if "_" follows a filetest operator, it's a bareword */
7481 ( !immediate_paren && (PL_last_lop_op == OP_SORT
7483 && (PL_last_lop_op != OP_MAPSTART
7484 && PL_last_lop_op != OP_GREPSTART))))
7485 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7486 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
7490 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7491 yyl_strictwarn_bareword(aTHX_ lastchar);
7492 op_free(c.rv2cv_op);
7493 return yyl_safe_bareword(aTHX_ s, lastchar);
7497 PL_expect = XOPERATOR;
7500 /* Is this a word before a => operator? */
7501 if (*s == '=' && s[1] == '>' && !pkgname) {
7502 op_free(c.rv2cv_op);
7504 if (c.gvp || (c.lex && !c.off)) {
7505 assert (cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7506 /* This is our own scalar, created a few lines
7507 above, so this is safe. */
7508 SvREADONLY_off(c.sv);
7509 sv_setpv(c.sv, PL_tokenbuf);
7510 if (UTF && !IN_BYTES
7511 && is_utf8_string((U8*)PL_tokenbuf, len))
7513 SvREADONLY_on(c.sv);
7518 /* If followed by a paren, it's certainly a subroutine. */
7523 while (SPACE_OR_TAB(*d))
7525 if (*d == ')' && (c.sv = cv_const_sv_or_av(c.cv)))
7526 return yyl_constant_op(aTHX_ d + 1, c.sv, c.cv, c.rv2cv_op, c.off);
7528 NEXTVAL_NEXTTOKE.opval =
7529 c.off ? c.rv2cv_op : pl_yylval.opval;
7531 op_free(pl_yylval.opval), force_next(PRIVATEREF);
7532 else op_free(c.rv2cv_op), force_next(BAREWORD);
7534 TOKEN(PERLY_AMPERSAND);
7537 /* If followed by var or block, call it a method (unless sub) */
7539 if ((*s == '$' || *s == '{') && !c.cv && FEATURE_INDIRECT_IS_ENABLED) {
7540 op_free(c.rv2cv_op);
7541 PL_last_lop = PL_oldbufptr;
7542 PL_last_lop_op = OP_METHOD;
7543 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7544 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7545 PL_expect = XBLOCKTERM;
7547 return REPORT(METHOD);
7550 /* If followed by a bareword, see if it looks like indir obj. */
7554 && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$')
7555 && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7558 if (c.lex && !c.off) {
7559 assert(cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7560 SvREADONLY_off(c.sv);
7561 sv_setpvn(c.sv, PL_tokenbuf, len);
7562 if (UTF && !IN_BYTES
7563 && is_utf8_string((U8*)PL_tokenbuf, len))
7565 else SvUTF8_off(c.sv);
7567 op_free(c.rv2cv_op);
7568 if (key == METHOD && !PL_lex_allbrackets
7569 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7571 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7576 /* Not a method, so call it a subroutine (if defined) */
7579 /* Check for a constant sub */
7580 c.sv = cv_const_sv_or_av(c.cv);
7581 return yyl_constant_op(aTHX_ s, c.sv, c.cv, c.rv2cv_op, c.off);
7584 /* Call it a bare word */
7586 if (PL_hints & HINT_STRICT_SUBS)
7587 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7589 yyl_strictwarn_bareword(aTHX_ lastchar);
7591 op_free(c.rv2cv_op);
7593 return yyl_safe_bareword(aTHX_ s, lastchar);
7597 yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct code c)
7600 default: /* not a keyword */
7601 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7604 FUN0OP( newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0)) );
7608 newSVOP(OP_CONST, 0,
7609 Perl_newSVpvf(aTHX_ "%" IVdf, (IV)CopLINE(PL_curcop)))
7612 case KEY___PACKAGE__:
7614 newSVOP(OP_CONST, 0, (PL_curstash
7615 ? newSVhek(HvNAME_HEK(PL_curstash))
7621 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D'))
7622 yyl_data_handle(aTHX);
7623 return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s);
7626 FUN0OP(CvCLONE(PL_compcv)
7627 ? newOP(OP_RUNCV, 0)
7628 : newPVOP(OP_RUNCV,0,NULL));
7637 if (PL_expect == XSTATE)
7638 return yyl_sub(aTHX_ PL_bufptr, key);
7639 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7648 LOP(OP_ACCEPT,XTERM);
7651 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7656 LOP(OP_ATAN2,XTERM);
7662 LOP(OP_BINMODE,XTERM);
7665 LOP(OP_BLESS,XTERM);
7674 /* We have to disambiguate the two senses of
7675 "continue". If the next token is a '{' then
7676 treat it as the start of a continue block;
7677 otherwise treat it as a control operator.
7687 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7697 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7706 LOP(OP_CRYPT,XTERM);
7709 LOP(OP_CHMOD,XTERM);
7712 LOP(OP_CHOWN,XTERM);
7715 LOP(OP_CONNECT,XTERM);
7730 return yyl_do(aTHX_ s, orig_keyword);
7733 PL_hints |= HINT_BLOCK_SCOPE;
7743 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7744 STR_WITH_LEN("NDBM_File::"),
7745 STR_WITH_LEN("DB_File::"),
7746 STR_WITH_LEN("GDBM_File::"),
7747 STR_WITH_LEN("SDBM_File::"),
7748 STR_WITH_LEN("ODBM_File::"),
7750 LOP(OP_DBMOPEN,XTERM);
7762 pl_yylval.ival = CopLINE(PL_curcop);
7766 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7778 if (*s == '{') { /* block eval */
7779 PL_expect = XTERMBLOCK;
7780 UNIBRACK(OP_ENTERTRY);
7782 else { /* string eval */
7784 UNIBRACK(OP_ENTEREVAL);
7789 UNIBRACK(-OP_ENTEREVAL);
7803 case KEY_endhostent:
7809 case KEY_endservent:
7812 case KEY_endprotoent:
7823 return yyl_foreach(aTHX_ s);
7826 LOP(OP_FORMLINE,XTERM);
7835 LOP(OP_FCNTL,XTERM);
7841 LOP(OP_FLOCK,XTERM);
7844 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7849 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7854 LOP(OP_GREPSTART, XREF);
7871 case KEY_getpriority:
7872 LOP(OP_GETPRIORITY,XTERM);
7874 case KEY_getprotobyname:
7877 case KEY_getprotobynumber:
7878 LOP(OP_GPBYNUMBER,XTERM);
7880 case KEY_getprotoent:
7892 case KEY_getpeername:
7893 UNI(OP_GETPEERNAME);
7895 case KEY_gethostbyname:
7898 case KEY_gethostbyaddr:
7899 LOP(OP_GHBYADDR,XTERM);
7901 case KEY_gethostent:
7904 case KEY_getnetbyname:
7907 case KEY_getnetbyaddr:
7908 LOP(OP_GNBYADDR,XTERM);
7913 case KEY_getservbyname:
7914 LOP(OP_GSBYNAME,XTERM);
7916 case KEY_getservbyport:
7917 LOP(OP_GSBYPORT,XTERM);
7919 case KEY_getservent:
7922 case KEY_getsockname:
7923 UNI(OP_GETSOCKNAME);
7925 case KEY_getsockopt:
7926 LOP(OP_GSOCKOPT,XTERM);
7941 pl_yylval.ival = CopLINE(PL_curcop);
7942 Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
7943 "given is experimental");
7947 LOP( orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB, XTERM );
7953 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7955 pl_yylval.ival = CopLINE(PL_curcop);
7959 LOP(OP_INDEX,XTERM);
7965 LOP(OP_IOCTL,XTERM);
7968 Perl_ck_warner_d(aTHX_
7969 packWARN(WARN_EXPERIMENTAL__ISA), "isa is experimental");
7997 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8002 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8016 LOP(OP_LISTEN,XTERM);
8025 s = scan_pat(s,OP_MATCH);
8026 TERM(sublex_start());
8029 LOP(OP_MAPSTART, XREF);
8032 LOP(OP_MKDIR,XTERM);
8035 LOP(OP_MSGCTL,XTERM);
8038 LOP(OP_MSGGET,XTERM);
8041 LOP(OP_MSGRCV,XTERM);
8044 LOP(OP_MSGSND,XTERM);
8049 return yyl_my(aTHX_ s, key);
8055 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8060 s = tokenize_use(0, s);
8064 if (*s == '(' || (s = skipspace(s), *s == '('))
8067 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8068 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8074 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8076 char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8077 for (t=d; isSPACE(*t);)
8079 if ( *t && memCHRs("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8081 && !(t[0] == '=' && t[1] == '>')
8082 && !(t[0] == ':' && t[1] == ':')
8083 && !keyword(s, d-s, 0)
8085 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8086 "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")",
8087 UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8093 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8095 pl_yylval.ival = OP_OR;
8105 LOP(OP_OPEN_DIR,XTERM);
8108 checkcomma(s,PL_tokenbuf,"filehandle");
8112 checkcomma(s,PL_tokenbuf,"filehandle");
8131 s = force_word(s,BAREWORD,FALSE,TRUE);
8133 s = force_strict_version(s);
8137 LOP(OP_PIPE_OP,XTERM);
8140 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8142 missingterm(NULL, 0);
8143 COPLINE_SET_FROM_MULTI_END;
8144 pl_yylval.ival = OP_CONST;
8145 TERM(sublex_start());
8151 return yyl_qw(aTHX_ s, len);
8154 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8156 missingterm(NULL, 0);
8157 pl_yylval.ival = OP_STRINGIFY;
8158 if (SvIVX(PL_lex_stuff) == '\'')
8159 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
8160 TERM(sublex_start());
8163 s = scan_pat(s,OP_QR);
8164 TERM(sublex_start());
8167 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8169 missingterm(NULL, 0);
8170 pl_yylval.ival = OP_BACKTICK;
8171 TERM(sublex_start());
8177 return yyl_require(aTHX_ s, orig_keyword);
8186 LOP(OP_RENAME,XTERM);
8195 LOP(OP_RINDEX,XTERM);
8204 UNIDOR(OP_READLINE);
8207 UNIDOR(OP_BACKTICK);
8216 LOP(OP_REVERSE,XTERM);
8219 UNIDOR(OP_READLINK);
8226 if (pl_yylval.opval)
8227 TERM(sublex_start());
8229 TOKEN(1); /* force error */
8232 checkcomma(s,PL_tokenbuf,"filehandle");
8242 LOP(OP_SELECT,XTERM);
8248 LOP(OP_SEMCTL,XTERM);
8251 LOP(OP_SEMGET,XTERM);
8254 LOP(OP_SEMOP,XTERM);
8260 LOP(OP_SETPGRP,XTERM);
8262 case KEY_setpriority:
8263 LOP(OP_SETPRIORITY,XTERM);
8265 case KEY_sethostent:
8271 case KEY_setservent:
8274 case KEY_setprotoent:
8284 LOP(OP_SEEKDIR,XTERM);
8286 case KEY_setsockopt:
8287 LOP(OP_SSOCKOPT,XTERM);
8293 LOP(OP_SHMCTL,XTERM);
8296 LOP(OP_SHMGET,XTERM);
8299 LOP(OP_SHMREAD,XTERM);
8302 LOP(OP_SHMWRITE,XTERM);
8305 LOP(OP_SHUTDOWN,XTERM);
8314 LOP(OP_SOCKET,XTERM);
8316 case KEY_socketpair:
8317 LOP(OP_SOCKPAIR,XTERM);
8320 checkcomma(s,PL_tokenbuf,"subroutine name");
8323 s = force_word(s,BAREWORD,TRUE,TRUE);
8327 LOP(OP_SPLIT,XTERM);
8330 LOP(OP_SPRINTF,XTERM);
8333 LOP(OP_SPLICE,XTERM);
8348 LOP(OP_SUBSTR,XTERM);
8352 return yyl_sub(aTHX_ s, key);
8355 LOP(OP_SYSTEM,XREF);
8358 LOP(OP_SYMLINK,XTERM);
8361 LOP(OP_SYSCALL,XTERM);
8364 LOP(OP_SYSOPEN,XTERM);
8367 LOP(OP_SYSSEEK,XTERM);
8370 LOP(OP_SYSREAD,XTERM);
8373 LOP(OP_SYSWRITE,XTERM);
8378 TERM(sublex_start());
8399 LOP(OP_TRUNCATE,XTERM);
8411 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8413 pl_yylval.ival = CopLINE(PL_curcop);
8417 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8419 pl_yylval.ival = CopLINE(PL_curcop);
8423 LOP(OP_UNLINK,XTERM);
8429 LOP(OP_UNPACK,XTERM);
8432 LOP(OP_UTIME,XTERM);
8438 LOP(OP_UNSHIFT,XTERM);
8441 s = tokenize_use(1, s);
8451 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8453 pl_yylval.ival = CopLINE(PL_curcop);
8454 Perl_ck_warner_d(aTHX_
8455 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8456 "when is experimental");
8460 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8462 pl_yylval.ival = CopLINE(PL_curcop);
8466 PL_hints |= HINT_BLOCK_SCOPE;
8473 LOP(OP_WAITPID,XTERM);
8479 /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8480 * we use the same number on EBCDIC */
8481 gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8485 if (PL_expect == XOPERATOR) {
8486 if (*s == '=' && !PL_lex_allbrackets
8487 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8494 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
8497 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8499 pl_yylval.ival = OP_XOR;
8505 yyl_key_core(pTHX_ char *s, STRLEN len, struct code c)
8508 I32 orig_keyword = 0;
8512 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8513 if ((*s == ':' && s[1] == ':')
8514 || (!(key = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
8516 Copy(PL_bufptr, PL_tokenbuf, olen, char);
8517 return yyl_just_a_word(aTHX_ d, olen, 0, c);
8520 Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword",
8521 UTF8fARG(UTF, len, PL_tokenbuf));
8524 else if (key == KEY_require || key == KEY_do
8526 /* that's a way to remember we saw "CORE::" */
8529 /* Known to be a reserved word at this point */
8530 return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8534 yyl_keylookup(pTHX_ char *s, GV *gv)
8539 struct code c = no_code;
8540 I32 orig_keyword = 0;
8546 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8548 /* Some keywords can be followed by any delimiter, including ':' */
8549 anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
8551 /* x::* is just a word, unless x is "CORE" */
8552 if (!anydelim && *s == ':' && s[1] == ':') {
8553 if (memEQs(PL_tokenbuf, len, "CORE"))
8554 return yyl_key_core(aTHX_ s, len, c);
8555 return yyl_just_a_word(aTHX_ s, len, 0, c);
8559 while (d < PL_bufend && isSPACE(*d))
8560 d++; /* no comments skipped here, or s### is misparsed */
8562 /* Is this a word before a => operator? */
8563 if (*d == '=' && d[1] == '>') {
8564 return yyl_fatcomma(aTHX_ s, len);
8567 /* Check for plugged-in keyword */
8571 char *saved_bufptr = PL_bufptr;
8573 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
8575 if (result == KEYWORD_PLUGIN_DECLINE) {
8576 /* not a plugged-in keyword */
8577 PL_bufptr = saved_bufptr;
8578 } else if (result == KEYWORD_PLUGIN_STMT) {
8579 pl_yylval.opval = o;
8581 if (!PL_nexttoke) PL_expect = XSTATE;
8582 return REPORT(PLUGSTMT);
8583 } else if (result == KEYWORD_PLUGIN_EXPR) {
8584 pl_yylval.opval = o;
8586 if (!PL_nexttoke) PL_expect = XOPERATOR;
8587 return REPORT(PLUGEXPR);
8589 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", PL_tokenbuf);
8593 /* Is this a label? */
8594 if (!anydelim && PL_expect == XSTATE
8595 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8598 newSVOP(OP_CONST, 0,
8599 newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0));
8604 /* Check for lexical sub */
8605 if (PL_expect != XOPERATOR) {
8606 char tmpbuf[sizeof PL_tokenbuf + 1];
8608 Copy(PL_tokenbuf, tmpbuf+1, len, char);
8609 c.off = pad_findmy_pvn(tmpbuf, len+1, 0);
8610 if (c.off != NOT_IN_PAD) {
8611 assert(c.off); /* we assume this is boolean-true below */
8612 if (PAD_COMPNAME_FLAGS_isOUR(c.off)) {
8613 HV * const stash = PAD_COMPNAME_OURSTASH(c.off);
8614 HEK * const stashname = HvNAME_HEK(stash);
8615 c.sv = newSVhek(stashname);
8616 sv_catpvs(c.sv, "::");
8617 sv_catpvn_flags(c.sv, PL_tokenbuf, len,
8618 (UTF ? SV_CATUTF8 : SV_CATBYTES));
8619 c.gv = gv_fetchsv(c.sv, GV_NOADD_NOINIT | SvUTF8(c.sv),
8625 return yyl_just_a_word(aTHX_ s, len, 0, c);
8629 c.rv2cv_op = newOP(OP_PADANY, 0);
8630 c.rv2cv_op->op_targ = c.off;
8631 c.cv = find_lexical_cv(c.off);
8634 return yyl_just_a_word(aTHX_ s, len, 0, c);
8639 /* Check for built-in keyword */
8640 key = keyword(PL_tokenbuf, len, 0);
8643 key = yyl_secondclass_keyword(aTHX_ s, len, key, &orig_keyword, &c.gv, &c.gvp);
8645 if (key && key != KEY___DATA__ && key != KEY___END__
8646 && (!anydelim || *s != '#')) {
8647 /* no override, and not s### either; skipspace is safe here
8648 * check for => on following line */
8650 STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
8651 STRLEN soff = s - SvPVX(PL_linestr);
8653 arrow = *s == '=' && s[1] == '>';
8654 PL_bufptr = SvPVX(PL_linestr) + bufoff;
8655 s = SvPVX(PL_linestr) + soff;
8657 return yyl_fatcomma(aTHX_ s, len);
8660 return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8664 yyl_try(pTHX_ char *s)
8673 if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s)) {
8674 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
8678 yyl_croak_unrecognised(aTHX_ s);
8682 /* emulate EOF on ^D or ^Z */
8683 if ((tok = yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s)) != YYL_RETRY)
8690 if ((!PL_rsfp || PL_lex_inwhat)
8691 && (!PL_parser->filtered || s+1 < PL_bufend)) {
8695 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
8697 yyerror((const char *)
8699 ? "Format not terminated"
8700 : "Missing right curly or square bracket"));
8703 PerlIO_printf(Perl_debug_log, "### Tokener got EOF\n");
8707 if (s++ < PL_bufend)
8708 goto retry; /* ignore stray nulls */
8711 if (!PL_in_eval && !PL_preambled) {
8712 PL_preambled = TRUE;
8714 /* Generate a string of Perl code to load the debugger.
8715 * If PERL5DB is set, it will return the contents of that,
8716 * otherwise a compile-time require of perl5db.pl. */
8718 const char * const pdb = PerlEnv_getenv("PERL5DB");
8721 sv_setpv(PL_linestr, pdb);
8722 sv_catpvs(PL_linestr,";");
8724 SETERRNO(0,SS_NORMAL);
8725 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
8727 PL_parser->preambling = CopLINE(PL_curcop);
8729 SvPVCLEAR(PL_linestr);
8730 if (PL_preambleav) {
8731 SV **svp = AvARRAY(PL_preambleav);
8732 SV **const end = svp + AvFILLp(PL_preambleav);
8734 sv_catsv(PL_linestr, *svp);
8736 sv_catpvs(PL_linestr, ";");
8738 sv_free(MUTABLE_SV(PL_preambleav));
8739 PL_preambleav = NULL;
8742 sv_catpvs(PL_linestr,
8743 "use feature ':" STRINGIFY(PERL_REVISION) "." STRINGIFY(PERL_VERSION) "';");
8744 if (PL_minus_n || PL_minus_p) {
8745 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
8747 sv_catpvs(PL_linestr,"chomp;");
8750 if ( ( *PL_splitstr == '/'
8751 || *PL_splitstr == '\''
8752 || *PL_splitstr == '"')
8753 && strchr(PL_splitstr + 1, *PL_splitstr))
8755 /* strchr is ok, because -F pattern can't contain
8757 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
8760 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
8761 bytes can be used as quoting characters. :-) */
8762 const char *splits = PL_splitstr;
8763 sv_catpvs(PL_linestr, "our @F=split(q\0");
8766 if (*splits == '\\')
8767 sv_catpvn(PL_linestr, splits, 1);
8768 sv_catpvn(PL_linestr, splits, 1);
8769 } while (*splits++);
8770 /* This loop will embed the trailing NUL of
8771 PL_linestr as the last thing it does before
8773 sv_catpvs(PL_linestr, ");");
8777 sv_catpvs(PL_linestr,"our @F=split(' ');");
8780 sv_catpvs(PL_linestr, "\n");
8781 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
8782 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
8783 PL_last_lop = PL_last_uni = NULL;
8784 if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
8785 update_debugger_info(PL_linestr, NULL, 0);
8788 if ((tok = yyl_fake_eof(aTHX_ 0, cBOOL(PL_rsfp), s)) != YYL_RETRY)
8793 #ifdef PERL_STRICT_CR
8794 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
8796 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
8798 case ' ': case '\t': case '\f': case '\v':
8804 const bool needs_semicolon = yyl_eol_needs_semicolon(aTHX_ &s);
8805 if (needs_semicolon)
8806 TOKEN(PERLY_SEMICOLON);
8812 return yyl_hyphen(aTHX_ s);
8815 return yyl_plus(aTHX_ s);
8818 return yyl_star(aTHX_ s);
8821 return yyl_percent(aTHX_ s);
8824 return yyl_caret(aTHX_ s);
8827 return yyl_leftsquare(aTHX_ s);
8830 return yyl_tilde(aTHX_ s);
8833 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
8836 OPERATOR(PERLY_COMMA);
8839 return yyl_just_a_word(aTHX_ s, 0, 0, no_code);
8840 return yyl_colon(aTHX_ s + 1);
8843 return yyl_leftparen(aTHX_ s + 1);
8846 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8851 TOKEN(PERLY_SEMICOLON);
8854 return yyl_rightparen(aTHX_ s);
8857 return yyl_rightsquare(aTHX_ s);
8860 return yyl_leftcurly(aTHX_ s + 1, 0);
8863 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
8865 return yyl_rightcurly(aTHX_ s, 0);
8868 return yyl_ampersand(aTHX_ s);
8871 return yyl_verticalbar(aTHX_ s);
8874 if (s[1] == '=' && (s == PL_linestart || s[-1] == '\n')
8875 && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "====="))
8877 s = vcs_conflict_marker(s + 7);
8883 const char tmp = *s++;
8885 if (!PL_lex_allbrackets
8886 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8894 if (!PL_lex_allbrackets
8895 && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
8900 OPERATOR(PERLY_COMMA);
8904 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
8905 && memCHRs("+-*/%.^&|<",tmp))
8906 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8907 "Reversed %c= operator",(int)tmp);
8909 if (PL_expect == XSTATE
8911 && (s == PL_linestart+1 || s[-2] == '\n') )
8913 if ( (PL_in_eval && !PL_rsfp && !PL_parser->filtered)
8914 || PL_lex_state != LEX_NORMAL)
8919 incline(s, PL_bufend);
8920 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut"))
8922 s = (char *) memchr(s,'\n', d - s);
8927 incline(s, PL_bufend);
8935 PL_parser->in_pod = 1;
8939 if (PL_expect == XBLOCK) {
8941 #ifdef PERL_STRICT_CR
8942 while (SPACE_OR_TAB(*t))
8944 while (SPACE_OR_TAB(*t) || *t == '\r')
8947 if (*t == '\n' || *t == '#') {
8948 ENTER_with_name("lex_format");
8949 SAVEI8(PL_parser->form_lex_state);
8950 SAVEI32(PL_lex_formbrack);
8951 PL_parser->form_lex_state = PL_lex_state;
8952 PL_lex_formbrack = PL_lex_brackets + 1;
8953 PL_parser->sub_error_count = PL_error_count;
8954 return yyl_leftcurly(aTHX_ s, 1);
8957 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
8965 return yyl_bang(aTHX_ s + 1);
8968 if (s[1] == '<' && (s == PL_linestart || s[-1] == '\n')
8969 && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "<<<<<"))
8971 s = vcs_conflict_marker(s + 7);
8974 return yyl_leftpointy(aTHX_ s);
8977 if (s[1] == '>' && (s == PL_linestart || s[-1] == '\n')
8978 && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), ">>>>>"))
8980 s = vcs_conflict_marker(s + 7);
8983 return yyl_rightpointy(aTHX_ s + 1);
8986 return yyl_dollar(aTHX_ s);
8989 return yyl_snail(aTHX_ s);
8991 case '/': /* may be division, defined-or, or pattern */
8992 return yyl_slash(aTHX_ s);
8994 case '?': /* conditional */
8996 if (!PL_lex_allbrackets
8997 && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
9002 PL_lex_allbrackets++;
9003 OPERATOR(PERLY_QUESTION_MARK);
9006 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
9007 #ifdef PERL_STRICT_CR
9010 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
9012 && (s == PL_linestart || s[-1] == '\n') )
9015 /* formbrack==2 means dot seen where arguments expected */
9016 return yyl_rightcurly(aTHX_ s, 2);
9018 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
9022 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
9025 if (!PL_lex_allbrackets
9026 && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
9034 pl_yylval.ival = OPf_SPECIAL;
9040 if (*s == '=' && !PL_lex_allbrackets
9041 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
9049 case '0': case '1': case '2': case '3': case '4':
9050 case '5': case '6': case '7': case '8': case '9':
9051 s = scan_num(s, &pl_yylval);
9052 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
9053 if (PL_expect == XOPERATOR)
9058 return yyl_sglquote(aTHX_ s);
9061 return yyl_dblquote(aTHX_ s);
9064 return yyl_backtick(aTHX_ s);
9067 return yyl_backslash(aTHX_ s + 1);
9070 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
9071 char *start = s + 2;
9072 while (isDIGIT(*start) || *start == '_')
9074 if (*start == '.' && isDIGIT(start[1])) {
9075 s = scan_num(s, &pl_yylval);
9078 else if ((*start == ':' && start[1] == ':')
9079 || (PL_expect == XSTATE && *start == ':')) {
9080 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9084 else if (PL_expect == XSTATE) {
9086 while (d < PL_bufend && isSPACE(*d)) d++;
9088 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9093 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
9094 if (!isALPHA(*start) && (PL_expect == XTERM
9095 || PL_expect == XREF || PL_expect == XSTATE
9096 || PL_expect == XTERMORDORDOR)) {
9097 GV *const gv = gv_fetchpvn_flags(s, start - s,
9098 UTF ? SVf_UTF8 : 0, SVt_PVCV);
9100 s = scan_num(s, &pl_yylval);
9105 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9110 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
9114 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9145 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9155 Works out what to call the token just pulled out of the input
9156 stream. The yacc parser takes care of taking the ops we return and
9157 stitching them into a tree.
9160 The type of the next token
9163 Check if we have already built the token; if so, use it.
9164 Switch based on the current state:
9165 - if we have a case modifier in a string, deal with that
9166 - handle other cases of interpolation inside a string
9167 - scan the next line if we are inside a format
9168 In the normal state, switch on the next character:
9170 if alphabetic, go to key lookup
9171 unrecognized character - croak
9172 - 0/4/26: handle end-of-line or EOF
9173 - cases for whitespace
9174 - \n and #: handle comments and line numbers
9175 - various operators, brackets and sigils
9178 - 'v': vstrings (or go to key lookup)
9179 - 'x' repetition operator (or go to key lookup)
9180 - other ASCII alphanumerics (key lookup begins here):
9183 scan built-in keyword (but do nothing with it yet)
9184 check for statement label
9185 check for lexical subs
9186 return yyl_just_a_word if there is one
9187 see whether built-in keyword is overridden
9188 switch on keyword number:
9189 - default: return yyl_just_a_word:
9190 not a built-in keyword; handle bareword lookup
9191 disambiguate between method and sub call
9192 fall back to bareword
9193 - cases for built-in keywords
9197 #define RSFP_FILENO (PL_rsfp)
9199 #define RSFP_FILENO (PerlIO_fileno(PL_rsfp))
9206 char *s = PL_bufptr;
9208 if (UNLIKELY(PL_parser->recheck_utf8_validity)) {
9209 const U8* first_bad_char_loc;
9210 if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr,
9211 PL_bufend - PL_bufptr,
9212 &first_bad_char_loc)))
9214 _force_out_malformed_utf8_message(first_bad_char_loc,
9217 1 /* 1 means die */ );
9218 NOT_REACHED; /* NOTREACHED */
9220 PL_parser->recheck_utf8_validity = FALSE;
9223 SV* tmp = newSVpvs("");
9224 PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n",
9225 (IV)CopLINE(PL_curcop),
9226 lex_state_names[PL_lex_state],
9227 exp_name[PL_expect],
9228 pv_display(tmp, s, strlen(s), 0, 60));
9232 /* when we've already built the next token, just pull it out of the queue */
9235 pl_yylval = PL_nextval[PL_nexttoke];
9238 next_type = PL_nexttype[PL_nexttoke];
9239 if (next_type & (7<<24)) {
9240 if (next_type & (1<<24)) {
9241 if (PL_lex_brackets > 100)
9242 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
9243 PL_lex_brackstack[PL_lex_brackets++] =
9244 (char) ((next_type >> 16) & 0xff);
9246 if (next_type & (2<<24))
9247 PL_lex_allbrackets++;
9248 if (next_type & (4<<24))
9249 PL_lex_allbrackets--;
9250 next_type &= 0xffff;
9252 return REPORT(next_type == 'p' ? pending_ident() : next_type);
9256 switch (PL_lex_state) {
9258 case LEX_INTERPNORMAL:
9261 /* interpolated case modifiers like \L \U, including \Q and \E.
9262 when we get here, PL_bufptr is at the \
9264 case LEX_INTERPCASEMOD:
9265 /* handle \E or end of string */
9266 return yyl_interpcasemod(aTHX_ s);
9268 case LEX_INTERPPUSH:
9269 return REPORT(sublex_push());
9271 case LEX_INTERPSTART:
9272 if (PL_bufptr == PL_bufend)
9273 return REPORT(sublex_done());
9275 if(*PL_bufptr != '(')
9276 PerlIO_printf(Perl_debug_log, "### Interpolated variable\n");
9279 /* for /@a/, we leave the joining for the regex engine to do
9280 * (unless we're within \Q etc) */
9281 PL_lex_dojoin = (*PL_bufptr == '@'
9282 && (!PL_lex_inpat || PL_lex_casemods));
9283 PL_lex_state = LEX_INTERPNORMAL;
9284 if (PL_lex_dojoin) {
9285 NEXTVAL_NEXTTOKE.ival = 0;
9286 force_next(PERLY_COMMA);
9287 force_ident("\"", '$');
9288 NEXTVAL_NEXTTOKE.ival = 0;
9290 NEXTVAL_NEXTTOKE.ival = 0;
9291 force_next((2<<24)|'(');
9292 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
9295 /* Convert (?{...}) and friends to 'do {...}' */
9296 if (PL_lex_inpat && *PL_bufptr == '(') {
9297 PL_parser->lex_shared->re_eval_start = PL_bufptr;
9299 if (*PL_bufptr != '{')
9301 PL_expect = XTERMBLOCK;
9305 if (PL_lex_starts++) {
9307 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9308 if (!PL_lex_casemods && PL_lex_inpat)
9311 AopNOASSIGN(OP_CONCAT);
9315 case LEX_INTERPENDMAYBE:
9316 if (intuit_more(PL_bufptr, PL_bufend)) {
9317 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
9323 if (PL_lex_dojoin) {
9324 const U8 dojoin_was = PL_lex_dojoin;
9325 PL_lex_dojoin = FALSE;
9326 PL_lex_state = LEX_INTERPCONCAT;
9327 PL_lex_allbrackets--;
9328 return REPORT(dojoin_was == 1 ? (int)')' : (int)POSTJOIN);
9330 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
9331 && SvEVALED(PL_lex_repl))
9333 if (PL_bufptr != PL_bufend)
9334 Perl_croak(aTHX_ "Bad evalled substitution pattern");
9337 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
9338 re_eval_str. If the here-doc body’s length equals the previous
9339 value of re_eval_start, re_eval_start will now be null. So
9340 check re_eval_str as well. */
9341 if (PL_parser->lex_shared->re_eval_start
9342 || PL_parser->lex_shared->re_eval_str) {
9344 if (*PL_bufptr != ')')
9345 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
9347 /* having compiled a (?{..}) expression, return the original
9348 * text too, as a const */
9349 if (PL_parser->lex_shared->re_eval_str) {
9350 sv = PL_parser->lex_shared->re_eval_str;
9351 PL_parser->lex_shared->re_eval_str = NULL;
9353 PL_bufptr - PL_parser->lex_shared->re_eval_start);
9354 SvPV_shrink_to_cur(sv);
9356 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
9357 PL_bufptr - PL_parser->lex_shared->re_eval_start);
9358 NEXTVAL_NEXTTOKE.opval =
9359 newSVOP(OP_CONST, 0,
9362 PL_parser->lex_shared->re_eval_start = NULL;
9364 return REPORT(PERLY_COMMA);
9368 case LEX_INTERPCONCAT:
9370 if (PL_lex_brackets)
9371 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
9372 (long) PL_lex_brackets);
9374 if (PL_bufptr == PL_bufend)
9375 return REPORT(sublex_done());
9377 /* m'foo' still needs to be parsed for possible (?{...}) */
9378 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
9379 SV *sv = newSVsv(PL_linestr);
9381 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
9385 int save_error_count = PL_error_count;
9387 s = scan_const(PL_bufptr);
9389 /* Set flag if this was a pattern and there were errors. op.c will
9390 * refuse to compile a pattern with this flag set. Otherwise, we
9391 * could get segfaults, etc. */
9392 if (PL_lex_inpat && PL_error_count > save_error_count) {
9393 ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR;
9396 PL_lex_state = LEX_INTERPCASEMOD;
9398 PL_lex_state = LEX_INTERPSTART;
9401 if (s != PL_bufptr) {
9402 NEXTVAL_NEXTTOKE = pl_yylval;
9405 if (PL_lex_starts++) {
9406 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9407 if (!PL_lex_casemods && PL_lex_inpat)
9410 AopNOASSIGN(OP_CONCAT);
9420 if (PL_parser->sub_error_count != PL_error_count) {
9421 /* There was an error parsing a formline, which tends to
9423 Unlike interpolated sub-parsing, we can't treat any of
9424 these as recoverable, so no need to check sub_no_recover.
9428 assert(PL_lex_formbrack);
9429 s = scan_formline(PL_bufptr);
9430 if (!PL_lex_formbrack)
9431 return yyl_rightcurly(aTHX_ s, 1);
9436 /* We really do *not* want PL_linestr ever becoming a COW. */
9437 assert (!SvIsCOW(PL_linestr));
9439 PL_oldoldbufptr = PL_oldbufptr;
9442 if (PL_in_my == KEY_sigvar) {
9443 PL_parser->saw_infix_sigil = 0;
9444 return yyl_sigvar(aTHX_ s);
9448 /* yyl_try() and its callees might consult PL_parser->saw_infix_sigil.
9449 On its return, we then need to set it to indicate whether the token
9450 we just encountered was an infix operator that (if we hadn't been
9451 expecting an operator) have been a sigil.
9453 bool expected_operator = (PL_expect == XOPERATOR);
9454 int ret = yyl_try(aTHX_ s);
9455 switch (pl_yylval.ival) {
9460 if (expected_operator) {
9461 PL_parser->saw_infix_sigil = 1;
9466 PL_parser->saw_infix_sigil = 0;
9476 Looks up an identifier in the pad or in a package
9478 PL_in_my == KEY_sigvar indicates that this is a subroutine signature variable
9479 rather than a plain pad var.
9482 PRIVATEREF if this is a lexical name.
9483 BAREWORD if this belongs to a package.
9486 if we're in a my declaration
9487 croak if they tried to say my($foo::bar)
9488 build the ops for a my() declaration
9489 if it's an access to a my() variable
9490 build ops for access to a my() variable
9491 if in a dq string, and they've said @foo and we can't find @foo
9493 build ops for a bareword
9497 S_pending_ident(pTHX)
9500 const char pit = (char)pl_yylval.ival;
9501 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
9502 /* All routes through this function want to know if there is a colon. */
9503 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
9505 DEBUG_T({ PerlIO_printf(Perl_debug_log,
9506 "### Pending identifier '%s'\n", PL_tokenbuf); });
9507 assert(tokenbuf_len >= 2);
9509 /* if we're in a my(), we can't allow dynamics here.
9510 $foo'bar has already been turned into $foo::bar, so
9511 just check for colons.
9513 if it's a legal name, the OP is a PADANY.
9516 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9518 /* diag_listed_as: No package name allowed for variable %s
9520 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
9522 *PL_tokenbuf=='&' ? "subroutine" : "variable",
9523 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
9524 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
9529 /* "my" variable %s can't be in a package */
9530 /* PL_no_myglob is constant */
9531 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
9532 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
9533 PL_in_my == KEY_my ? "my" : "state",
9534 *PL_tokenbuf == '&' ? "subroutine" : "variable",
9536 UTF ? SVf_UTF8 : 0);
9537 GCC_DIAG_RESTORE_STMT;
9540 if (PL_in_my == KEY_sigvar) {
9541 /* A signature 'padop' needs in addition, an op_first to
9542 * point to a child sigdefelem, and an extra field to hold
9543 * the signature index. We can achieve both by using an
9544 * UNOP_AUX and (ab)using the op_aux field to hold the
9545 * index. If we ever need more fields, use a real malloced
9546 * aux strut instead.
9548 o = newUNOP_AUX(OP_ARGELEM, 0, NULL,
9549 INT2PTR(UNOP_AUX_item *,
9550 (PL_parser->sig_elems)));
9551 o->op_private |= ( PL_tokenbuf[0] == '$' ? OPpARGELEM_SV
9552 : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV
9556 o = newOP(OP_PADANY, 0);
9557 o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
9558 UTF ? SVf_UTF8 : 0);
9559 if (PL_in_my == KEY_sigvar)
9562 pl_yylval.opval = o;
9568 build the ops for accesses to a my() variable.
9573 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9575 if (tmp != NOT_IN_PAD) {
9576 /* might be an "our" variable" */
9577 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9578 /* build ops for a bareword */
9579 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9580 HEK * const stashname = HvNAME_HEK(stash);
9581 SV * const sym = newSVhek(stashname);
9582 sv_catpvs(sym, "::");
9583 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9584 pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
9585 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9589 ((PL_tokenbuf[0] == '$') ? SVt_PV
9590 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9595 pl_yylval.opval = newOP(OP_PADANY, 0);
9596 pl_yylval.opval->op_targ = tmp;
9602 Whine if they've said @foo or @foo{key} in a doublequoted string,
9603 and @foo (or %foo) isn't a variable we can find in the symbol
9606 if (ckWARN(WARN_AMBIGUOUS)
9608 && PL_lex_state != LEX_NORMAL
9609 && !PL_lex_brackets)
9611 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9612 ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
9614 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9617 /* Downgraded from fatal to warning 20000522 mjd */
9618 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9619 "Possible unintended interpolation of %" UTF8f
9621 UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9625 /* build ops for a bareword */
9626 pl_yylval.opval = newSVOP(OP_CONST, 0,
9627 newSVpvn_flags(PL_tokenbuf + 1,
9628 tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9629 UTF ? SVf_UTF8 : 0 ));
9630 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9632 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9633 (PL_in_eval ? GV_ADDMULTI : GV_ADD)
9634 | ( UTF ? SVf_UTF8 : 0 ),
9635 ((PL_tokenbuf[0] == '$') ? SVt_PV
9636 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9642 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9644 PERL_ARGS_ASSERT_CHECKCOMMA;
9646 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
9647 if (ckWARN(WARN_SYNTAX)) {
9650 for (w = s+2; *w && level; w++) {
9658 /* the list of chars below is for end of statements or
9659 * block / parens, boolean operators (&&, ||, //) and branch
9660 * constructs (or, and, if, until, unless, while, err, for).
9661 * Not a very solid hack... */
9662 if (!*w || !memCHRs(";&/|})]oaiuwef!=", *w))
9663 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9664 "%s (...) interpreted as function",name);
9667 while (s < PL_bufend && isSPACE(*s))
9671 while (s < PL_bufend && isSPACE(*s))
9673 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
9674 const char * const w = s;
9675 s += UTF ? UTF8SKIP(s) : 1;
9676 while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
9677 s += UTF ? UTF8SKIP(s) : 1;
9678 while (s < PL_bufend && isSPACE(*s))
9682 if (keyword(w, s - w, 0))
9685 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9686 if (gv && GvCVu(gv))
9691 Copy(w, tmpbuf+1, s - w, char);
9693 off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
9694 if (off != NOT_IN_PAD) return;
9696 Perl_croak(aTHX_ "No comma allowed after %s", what);
9701 /* S_new_constant(): do any overload::constant lookup.
9703 Either returns sv, or mortalizes/frees sv and returns a new SV*.
9704 Best used as sv=new_constant(..., sv, ...).
9705 If s, pv are NULL, calls subroutine with one argument,
9706 and <type> is used with error messages only.
9707 <type> is assumed to be well formed UTF-8.
9709 If error_msg is not NULL, *error_msg will be set to any error encountered.
9710 Otherwise yyerror() will be used to output it */
9713 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9714 SV *sv, SV *pv, const char *type, STRLEN typelen,
9715 const char ** error_msg)
9718 HV * table = GvHV(PL_hintgv); /* ^H */
9723 const char *why1 = "", *why2 = "", *why3 = "";
9724 const char * optional_colon = ":"; /* Only some messages have a colon */
9727 PERL_ARGS_ASSERT_NEW_CONSTANT;
9728 /* We assume that this is true: */
9731 sv_2mortal(sv); /* Parent created it permanently */
9734 || ! (PL_hints & HINT_LOCALIZE_HH))
9737 optional_colon = "";
9741 cvp = hv_fetch(table, key, keylen, FALSE);
9742 if (!cvp || !SvOK(*cvp)) {
9745 why3 = "} is not defined";
9751 pv = newSVpvn_flags(s, len, SVs_TEMP);
9753 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9755 typesv = &PL_sv_undef;
9757 PUSHSTACKi(PERLSI_OVERLOAD);
9769 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9773 /* Check the eval first */
9774 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9776 const char * errstr;
9777 sv_catpvs(errsv, "Propagated");
9778 errstr = SvPV_const(errsv, errlen);
9779 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9781 res = SvREFCNT_inc_simple_NN(sv);
9785 SvREFCNT_inc_simple_void_NN(res);
9798 (void)sv_2mortal(sv);
9800 why1 = "Call to &{$^H{";
9802 why3 = "}} did not return a defined value";
9806 msg = Perl_form(aTHX_ "Constant(%.*s)%s %s%s%s",
9807 (int)(type ? typelen : len),
9815 yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9817 return SvREFCNT_inc_simple_NN(sv);
9820 PERL_STATIC_INLINE void
9821 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
9822 bool is_utf8, bool check_dollar, bool tick_warn)
9825 const char *olds = *s;
9826 PERL_ARGS_ASSERT_PARSE_IDENT;
9828 while (*s < PL_bufend) {
9830 Perl_croak(aTHX_ "%s", ident_too_long);
9831 if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) {
9832 /* The UTF-8 case must come first, otherwise things
9833 * like c\N{COMBINING TILDE} would start failing, as the
9834 * isWORDCHAR_A case below would gobble the 'c' up.
9837 char *t = *s + UTF8SKIP(*s);
9838 while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
9841 if (*d + (t - *s) > e)
9842 Perl_croak(aTHX_ "%s", ident_too_long);
9843 Copy(*s, *d, t - *s, char);
9847 else if ( isWORDCHAR_A(**s) ) {
9850 } while (isWORDCHAR_A(**s) && *d < e);
9852 else if ( allow_package
9854 && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8))
9861 else if (allow_package && **s == ':' && (*s)[1] == ':'
9862 /* Disallow things like Foo::$bar. For the curious, this is
9863 * the code path that triggers the "Bad name after" warning
9864 * when looking for barewords.
9866 && !(check_dollar && (*s)[2] == '$')) {
9873 if (UNLIKELY(tick_warn && saw_tick && PL_lex_state == LEX_INTERPNORMAL
9874 && !PL_lex_brackets && ckWARN(WARN_SYNTAX))) {
9877 Newx(this_d, *s - olds + saw_tick + 2, char); /* +2 for $# */
9880 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9881 "Old package separator used in string");
9882 if (olds[-1] == '#')
9886 if (*olds == '\'') {
9893 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9894 "\t(Did you mean \"%" UTF8f "\" instead?)\n",
9895 UTF8fARG(is_utf8, d2-this_d, this_d));
9900 /* Returns a NUL terminated string, with the length of the string written to
9904 Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9907 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9908 bool is_utf8 = cBOOL(UTF);
9910 PERL_ARGS_ASSERT_SCAN_WORD;
9912 parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, FALSE);
9918 /* Is the byte 'd' a legal single character identifier name? 'u' is true
9919 * iff Unicode semantics are to be used. The legal ones are any of:
9920 * a) all ASCII characters except:
9921 * 1) control and space-type ones, like NUL, SOH, \t, and SPACE;
9923 * The final case currently doesn't get this far in the program, so we
9924 * don't test for it. If that were to change, it would be ok to allow it.
9925 * b) When not under Unicode rules, any upper Latin1 character
9926 * c) Otherwise, when unicode rules are used, all XIDS characters.
9928 * Because all ASCII characters have the same representation whether
9929 * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
9930 * '{' without knowing if is UTF-8 or not. */
9931 #define VALID_LEN_ONE_IDENT(s, e, is_utf8) \
9932 (isGRAPH_A(*(s)) || ((is_utf8) \
9933 ? isIDFIRST_utf8_safe(s, e) \
9935 && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
9938 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
9940 I32 herelines = PL_parser->herelines;
9941 SSize_t bracket = -1;
9944 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9945 bool is_utf8 = cBOOL(UTF);
9946 I32 orig_copline = 0, tmp_copline = 0;
9948 PERL_ARGS_ASSERT_SCAN_IDENT;
9950 if (isSPACE(*s) || !*s)
9952 if (isDIGIT(*s)) { /* handle $0 and $1 $2 and $10 and etc */
9953 bool is_zero= *s == '0' ? TRUE : FALSE;
9954 char *digit_start= d;
9956 while (s < PL_bufend && isDIGIT(*s)) {
9958 Perl_croak(aTHX_ "%s", ident_too_long);
9961 if (is_zero && d - digit_start > 1)
9962 Perl_croak(aTHX_ ident_var_zero_multi_digit);
9964 else { /* See if it is a "normal" identifier */
9965 parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE);
9970 /* Either a digit variable, or parse_ident() found an identifier
9971 (anything valid as a bareword), so job done and return. */
9972 if (PL_lex_state != LEX_NORMAL)
9973 PL_lex_state = LEX_INTERPENDMAYBE;
9977 /* Here, it is not a run-of-the-mill identifier name */
9979 if (*s == '$' && s[1]
9980 && ( isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8)
9981 || isDIGIT_A((U8)s[1])
9984 || memBEGINs(s+1, (STRLEN) (PL_bufend - (s+1)), "::")) )
9986 /* Dereferencing a value in a scalar variable.
9987 The alternatives are different syntaxes for a scalar variable.
9988 Using ' as a leading package separator isn't allowed. :: is. */
9991 /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */
9993 bracket = s - SvPVX(PL_linestr);
9995 orig_copline = CopLINE(PL_curcop);
9996 if (s < PL_bufend && isSPACE(*s)) {
10000 if ((s <= PL_bufend - ((is_utf8)
10003 && VALID_LEN_ONE_IDENT(s, PL_bufend, is_utf8))
10006 const STRLEN skip = UTF8SKIP(s);
10009 for ( i = 0; i < skip; i++ )
10014 /* special case to handle ${10}, ${11} the same way we handle ${1} etc */
10016 bool is_zero= *d == '0' ? TRUE : FALSE;
10017 char *digit_start= d;
10018 while (s < PL_bufend && isDIGIT(*s)) {
10021 Perl_croak(aTHX_ "%s", ident_too_long);
10024 if (is_zero && d - digit_start > 1)
10025 Perl_croak(aTHX_ ident_var_zero_multi_digit);
10030 /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
10031 if (*d == '^' && *s && isCONTROLVAR(*s)) {
10035 /* Warn about ambiguous code after unary operators if {...} notation isn't
10036 used. There's no difference in ambiguity; it's merely a heuristic
10037 about when not to warn. */
10038 else if (ck_uni && bracket == -1)
10040 if (bracket != -1) {
10043 /* If we were processing {...} notation then... */
10044 if (isIDFIRST_lazy_if_safe(d, e, is_utf8)
10045 || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
10048 /* note we have to check for a normal identifier first,
10049 * as it handles utf8 symbols, and only after that has
10050 * been ruled out can we look at the caret words */
10051 if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) {
10052 /* if it starts as a valid identifier, assume that it is one.
10053 (the later check for } being at the expected point will trap
10054 cases where this doesn't pan out.) */
10055 d += is_utf8 ? UTF8SKIP(d) : 1;
10056 parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE);
10059 else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
10061 while (isWORDCHAR(*s) && d < e) {
10065 Perl_croak(aTHX_ "%s", ident_too_long);
10068 tmp_copline = CopLINE(PL_curcop);
10069 if (s < PL_bufend && isSPACE(*s)) {
10072 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10073 /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation. */
10074 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10075 const char * const brack =
10077 ((*s == '[') ? "[...]" : "{...}");
10078 orig_copline = CopLINE(PL_curcop);
10079 CopLINE_set(PL_curcop, tmp_copline);
10080 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
10081 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10082 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10083 funny, dest, brack, funny, dest, brack);
10084 CopLINE_set(PL_curcop, orig_copline);
10087 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10088 PL_lex_allbrackets++;
10093 if ( !tmp_copline )
10094 tmp_copline = CopLINE(PL_curcop);
10095 if ((skip = s < PL_bufend && isSPACE(*s))) {
10096 /* Avoid incrementing line numbers or resetting PL_linestart,
10097 in case we have to back up. */
10098 STRLEN s_off = s - SvPVX(PL_linestr);
10100 s = SvPVX(PL_linestr) + s_off;
10105 /* Expect to find a closing } after consuming any trailing whitespace.
10108 /* Now increment line numbers if applicable. */
10112 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10113 PL_lex_state = LEX_INTERPEND;
10116 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) {
10117 if (ckWARN(WARN_AMBIGUOUS)
10118 && (keyword(dest, d - dest, 0)
10119 || get_cvn_flags(dest, d - dest, is_utf8
10123 SV *tmp = newSVpvn_flags( dest, d - dest,
10124 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
10127 orig_copline = CopLINE(PL_curcop);
10128 CopLINE_set(PL_curcop, tmp_copline);
10129 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10130 "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
10131 funny, SVfARG(tmp), funny, SVfARG(tmp));
10132 CopLINE_set(PL_curcop, orig_copline);
10137 /* Didn't find the closing } at the point we expected, so restore
10138 state such that the next thing to process is the opening { and */
10139 s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
10140 CopLINE_set(PL_curcop, orig_copline);
10141 PL_parser->herelines = herelines;
10143 PL_parser->sub_no_recover = TRUE;
10146 else if ( PL_lex_state == LEX_INTERPNORMAL
10147 && !PL_lex_brackets
10148 && !intuit_more(s, PL_bufend))
10149 PL_lex_state = LEX_INTERPEND;
10154 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
10156 /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
10157 * found in the parse starting at 's', based on the subset that are valid
10158 * in this context input to this routine in 'valid_flags'. Advances s.
10159 * Returns TRUE if the input should be treated as a valid flag, so the next
10160 * char may be as well; otherwise FALSE. 'charset' should point to a NUL
10161 * upon first call on the current regex. This routine will set it to any
10162 * charset modifier found. The caller shouldn't change it. This way,
10163 * another charset modifier encountered in the parse can be detected as an
10164 * error, as we have decided to allow only one */
10166 const char c = **s;
10167 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
10169 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
10170 if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) {
10171 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
10172 UTF ? SVf_UTF8 : 0);
10174 /* Pretend that it worked, so will continue processing before
10183 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
10184 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
10185 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
10186 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
10187 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
10188 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
10189 case LOCALE_PAT_MOD:
10191 goto multiple_charsets;
10193 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
10196 case UNICODE_PAT_MOD:
10198 goto multiple_charsets;
10200 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
10203 case ASCII_RESTRICT_PAT_MOD:
10205 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
10209 /* Error if previous modifier wasn't an 'a', but if it was, see
10210 * if, and accept, a second occurrence (only) */
10211 if (*charset != 'a'
10212 || get_regex_charset(*pmfl)
10213 != REGEX_ASCII_RESTRICTED_CHARSET)
10215 goto multiple_charsets;
10217 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
10221 case DEPENDS_PAT_MOD:
10223 goto multiple_charsets;
10225 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
10234 if (*charset != c) {
10235 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
10237 else if (c == 'a') {
10238 /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
10239 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
10242 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
10245 /* Pretend that it worked, so will continue processing before dieing */
10251 S_scan_pat(pTHX_ char *start, I32 type)
10255 const char * const valid_flags =
10256 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
10257 char charset = '\0'; /* character set modifier */
10258 unsigned int x_mod_count = 0;
10260 PERL_ARGS_ASSERT_SCAN_PAT;
10262 s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
10264 Perl_croak(aTHX_ "Search pattern not terminated");
10266 pm = (PMOP*)newPMOP(type, 0);
10267 if (PL_multi_open == '?') {
10268 /* This is the only point in the code that sets PMf_ONCE: */
10269 pm->op_pmflags |= PMf_ONCE;
10271 /* Hence it's safe to do this bit of PMOP book-keeping here, which
10272 allows us to restrict the list needed by reset to just the ??
10274 assert(type != OP_TRANS);
10276 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
10279 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
10282 elements = mg->mg_len / sizeof(PMOP**);
10283 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
10284 ((PMOP**)mg->mg_ptr) [elements++] = pm;
10285 mg->mg_len = elements * sizeof(PMOP**);
10286 PmopSTASH_set(pm,PL_curstash);
10290 /* if qr/...(?{..}).../, then need to parse the pattern within a new
10291 * anon CV. False positives like qr/[(?{]/ are harmless */
10293 if (type == OP_QR) {
10295 char *e, *p = SvPV(PL_lex_stuff, len);
10297 for (; p < e; p++) {
10298 if (p[0] == '(' && p[1] == '?'
10299 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
10301 pm->op_pmflags |= PMf_HAS_CV;
10305 pm->op_pmflags |= PMf_IS_QR;
10308 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
10309 &s, &charset, &x_mod_count))
10311 /* issue a warning if /c is specified,but /g is not */
10312 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
10314 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
10315 "Use of /c modifier is meaningless without /g" );
10318 PL_lex_op = (OP*)pm;
10319 pl_yylval.ival = OP_MATCH;
10324 S_scan_subst(pTHX_ char *start)
10330 line_t linediff = 0;
10332 char charset = '\0'; /* character set modifier */
10333 unsigned int x_mod_count = 0;
10336 PERL_ARGS_ASSERT_SCAN_SUBST;
10338 pl_yylval.ival = OP_NULL;
10340 s = scan_str(start, TRUE, FALSE, FALSE, &t);
10343 Perl_croak(aTHX_ "Substitution pattern not terminated");
10347 first_start = PL_multi_start;
10348 first_line = CopLINE(PL_curcop);
10349 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10351 SvREFCNT_dec_NN(PL_lex_stuff);
10352 PL_lex_stuff = NULL;
10353 Perl_croak(aTHX_ "Substitution replacement not terminated");
10355 PL_multi_start = first_start; /* so whole substitution is taken together */
10357 pm = (PMOP*)newPMOP(OP_SUBST, 0);
10361 if (*s == EXEC_PAT_MOD) {
10365 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
10366 &s, &charset, &x_mod_count))
10372 if ((pm->op_pmflags & PMf_CONTINUE)) {
10373 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
10377 SV * const repl = newSVpvs("");
10380 pm->op_pmflags |= PMf_EVAL;
10381 for (; es > 1; es--) {
10382 sv_catpvs(repl, "eval ");
10384 sv_catpvs(repl, "do {");
10385 sv_catsv(repl, PL_parser->lex_sub_repl);
10386 sv_catpvs(repl, "}");
10387 SvREFCNT_dec(PL_parser->lex_sub_repl);
10388 PL_parser->lex_sub_repl = repl;
10392 linediff = CopLINE(PL_curcop) - first_line;
10394 CopLINE_set(PL_curcop, first_line);
10396 if (linediff || es) {
10397 /* the IVX field indicates that the replacement string is a s///e;
10398 * the NVX field indicates how many src code lines the replacement
10400 sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
10401 ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = linediff;
10402 ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
10406 PL_lex_op = (OP*)pm;
10407 pl_yylval.ival = OP_SUBST;
10412 S_scan_trans(pTHX_ char *start)
10419 bool nondestruct = 0;
10422 PERL_ARGS_ASSERT_SCAN_TRANS;
10424 pl_yylval.ival = OP_NULL;
10426 s = scan_str(start,FALSE,FALSE,FALSE,&t);
10428 Perl_croak(aTHX_ "Transliteration pattern not terminated");
10432 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10434 SvREFCNT_dec_NN(PL_lex_stuff);
10435 PL_lex_stuff = NULL;
10436 Perl_croak(aTHX_ "Transliteration replacement not terminated");
10439 complement = del = squash = 0;
10443 complement = OPpTRANS_COMPLEMENT;
10446 del = OPpTRANS_DELETE;
10449 squash = OPpTRANS_SQUASH;
10461 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
10462 o->op_private &= ~OPpTRANS_ALL;
10463 o->op_private |= del|squash|complement;
10466 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
10473 Takes a pointer to the first < in <<FOO.
10474 Returns a pointer to the byte following <<FOO.
10476 This function scans a heredoc, which involves different methods
10477 depending on whether we are in a string eval, quoted construct, etc.
10478 This is because PL_linestr could containing a single line of input, or
10479 a whole string being evalled, or the contents of the current quote-
10482 The two basic methods are:
10483 - Steal lines from the input stream
10484 - Scan the heredoc in PL_linestr and remove it therefrom
10486 In a file scope or filtered eval, the first method is used; in a
10487 string eval, the second.
10489 In a quote-like operator, we have to choose between the two,
10490 depending on where we can find a newline. We peek into outer lex-
10491 ing scopes until we find one with a newline in it. If we reach the
10492 outermost lexing scope and it is a file, we use the stream method.
10493 Otherwise it is treated as an eval.
10497 S_scan_heredoc(pTHX_ char *s)
10499 I32 op_type = OP_SCALAR;
10507 I32 indent_len = 0;
10508 bool indented = FALSE;
10509 const bool infile = PL_rsfp || PL_parser->filtered;
10510 const line_t origline = CopLINE(PL_curcop);
10511 LEXSHARED *shared = PL_parser->lex_shared;
10513 PERL_ARGS_ASSERT_SCAN_HEREDOC;
10516 d = PL_tokenbuf + 1;
10517 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
10518 *PL_tokenbuf = '\n';
10521 if (*peek == '~') {
10526 while (SPACE_OR_TAB(*peek))
10529 if (*peek == '`' || *peek == '\'' || *peek =='"') {
10532 s = delimcpy(d, e, s, PL_bufend, term, &len);
10533 if (s == PL_bufend)
10534 Perl_croak(aTHX_ "Unterminated delimiter for here document");
10540 /* <<\FOO is equivalent to <<'FOO' */
10545 if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
10546 Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
10550 while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
10551 peek += UTF ? UTF8SKIP(peek) : 1;
10554 len = (peek - s >= e - d) ? (e - d) : (peek - s);
10555 Copy(s, d, len, char);
10560 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
10561 Perl_croak(aTHX_ "Delimiter for here document is too long");
10565 len = d - PL_tokenbuf;
10567 #ifndef PERL_STRICT_CR
10568 d = (char *) memchr(s, '\r', PL_bufend - s);
10570 char * const olds = s;
10572 while (s < PL_bufend) {
10578 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
10587 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10592 tmpstr = newSV_type(SVt_PVIV);
10593 SvGROW(tmpstr, 80);
10594 if (term == '\'') {
10595 op_type = OP_CONST;
10596 SvIV_set(tmpstr, -1);
10598 else if (term == '`') {
10599 op_type = OP_BACKTICK;
10600 SvIV_set(tmpstr, '\\');
10603 PL_multi_start = origline + 1 + PL_parser->herelines;
10604 PL_multi_open = PL_multi_close = '<';
10606 /* inside a string eval or quote-like operator */
10607 if (!infile || PL_lex_inwhat) {
10610 char * const olds = s;
10611 PERL_CONTEXT * const cx = CX_CUR();
10612 /* These two fields are not set until an inner lexing scope is
10613 entered. But we need them set here. */
10614 shared->ls_bufptr = s;
10615 shared->ls_linestr = PL_linestr;
10617 if (PL_lex_inwhat) {
10618 /* Look for a newline. If the current buffer does not have one,
10619 peek into the line buffer of the parent lexing scope, going
10620 up as many levels as necessary to find one with a newline
10623 while (!(s = (char *)memchr(
10624 (void *)shared->ls_bufptr, '\n',
10625 SvEND(shared->ls_linestr)-shared->ls_bufptr
10628 shared = shared->ls_prev;
10629 /* shared is only null if we have gone beyond the outermost
10630 lexing scope. In a file, we will have broken out of the
10631 loop in the previous iteration. In an eval, the string buf-
10632 fer ends with "\n;", so the while condition above will have
10633 evaluated to false. So shared can never be null. Or so you
10634 might think. Odd syntax errors like s;@{<<; can gobble up
10635 the implicit semicolon at the end of a flie, causing the
10636 file handle to be closed even when we are not in a string
10637 eval. So shared may be null in that case.
10638 (Closing '>>}' here to balance the earlier open brace for
10639 editors that look for matched pairs.) */
10640 if (UNLIKELY(!shared))
10642 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10643 most lexing scope. In a file, shared->ls_linestr at that
10644 level is just one line, so there is no body to steal. */
10645 if (infile && !shared->ls_prev) {
10651 else { /* eval or we've already hit EOF */
10652 s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10657 linestr = shared->ls_linestr;
10658 bufend = SvEND(linestr);
10663 while (s < bufend - len + 1) {
10665 ++PL_parser->herelines;
10667 if (memEQ(s, PL_tokenbuf + 1, len - 1)) {
10671 /* Only valid if it's preceded by whitespace only */
10672 while (backup != myolds && --backup >= myolds) {
10673 if (! SPACE_OR_TAB(*backup)) {
10679 /* No whitespace or all! */
10680 if (backup == s || *backup == '\n') {
10681 Newx(indent, indent_len + 1, char);
10682 memcpy(indent, backup + 1, indent_len);
10683 indent[indent_len] = 0;
10684 s--; /* before our delimiter */
10685 PL_parser->herelines--; /* this line doesn't count */
10692 while (s < bufend - len + 1
10693 && memNE(s,PL_tokenbuf,len) )
10696 ++PL_parser->herelines;
10700 if (s >= bufend - len + 1) {
10704 sv_setpvn(tmpstr,d+1,s-d);
10706 /* the preceding stmt passes a newline */
10707 PL_parser->herelines++;
10709 /* s now points to the newline after the heredoc terminator.
10710 d points to the newline before the body of the heredoc.
10713 /* We are going to modify linestr in place here, so set
10714 aside copies of the string if necessary for re-evals or
10716 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10717 check shared->re_eval_str. */
10718 if (shared->re_eval_start || shared->re_eval_str) {
10719 /* Set aside the rest of the regexp */
10720 if (!shared->re_eval_str)
10721 shared->re_eval_str =
10722 newSVpvn(shared->re_eval_start,
10723 bufend - shared->re_eval_start);
10724 shared->re_eval_start -= s-d;
10727 if (cxstack_ix >= 0
10728 && CxTYPE(cx) == CXt_EVAL
10729 && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
10730 && cx->blk_eval.cur_text == linestr)
10732 cx->blk_eval.cur_text = newSVsv(linestr);
10733 cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
10736 /* Copy everything from s onwards back to d. */
10737 Move(s,d,bufend-s + 1,char);
10738 SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10739 /* Setting PL_bufend only applies when we have not dug deeper
10740 into other scopes, because sublex_done sets PL_bufend to
10741 SvEND(PL_linestr). */
10742 if (shared == PL_parser->lex_shared)
10743 PL_bufend = SvEND(linestr);
10748 char *oldbufptr_save;
10749 char *oldoldbufptr_save;
10751 SvPVCLEAR(tmpstr); /* avoid "uninitialized" warning */
10752 term = PL_tokenbuf[1];
10754 linestr_save = PL_linestr; /* must restore this afterwards */
10755 d = s; /* and this */
10756 oldbufptr_save = PL_oldbufptr;
10757 oldoldbufptr_save = PL_oldoldbufptr;
10758 PL_linestr = newSVpvs("");
10759 PL_bufend = SvPVX(PL_linestr);
10762 PL_bufptr = PL_bufend;
10763 CopLINE_set(PL_curcop,
10764 origline + 1 + PL_parser->herelines);
10766 if ( !lex_next_chunk(LEX_NO_TERM)
10767 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n'))
10769 /* Simply freeing linestr_save might seem simpler here, as it
10770 does not matter what PL_linestr points to, since we are
10771 about to croak; but in a quote-like op, linestr_save
10772 will have been prospectively freed already, via
10773 SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
10774 restore PL_linestr. */
10775 SvREFCNT_dec_NN(PL_linestr);
10776 PL_linestr = linestr_save;
10777 PL_oldbufptr = oldbufptr_save;
10778 PL_oldoldbufptr = oldoldbufptr_save;
10782 CopLINE_set(PL_curcop, origline);
10784 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10785 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10786 /* ^That should be enough to avoid this needing to grow: */
10787 sv_catpvs(PL_linestr, "\n\0");
10788 assert(s == SvPVX(PL_linestr));
10789 PL_bufend = SvEND(PL_linestr);
10793 PL_parser->herelines++;
10794 PL_last_lop = PL_last_uni = NULL;
10796 #ifndef PERL_STRICT_CR
10797 if (PL_bufend - PL_linestart >= 2) {
10798 if ( (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
10799 || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10801 PL_bufend[-2] = '\n';
10803 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10805 else if (PL_bufend[-1] == '\r')
10806 PL_bufend[-1] = '\n';
10808 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10809 PL_bufend[-1] = '\n';
10812 if (indented && (PL_bufend-s) >= len) {
10813 char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
10816 char *backup = found;
10819 /* Only valid if it's preceded by whitespace only */
10820 while (backup != s && --backup >= s) {
10821 if (! SPACE_OR_TAB(*backup)) {
10827 /* All whitespace or none! */
10828 if (backup == found || SPACE_OR_TAB(*backup)) {
10829 Newx(indent, indent_len + 1, char);
10830 memcpy(indent, backup, indent_len);
10831 indent[indent_len] = 0;
10832 SvREFCNT_dec(PL_linestr);
10833 PL_linestr = linestr_save;
10834 PL_linestart = SvPVX(linestr_save);
10835 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10836 PL_oldbufptr = oldbufptr_save;
10837 PL_oldoldbufptr = oldoldbufptr_save;
10843 /* Didn't find it */
10844 sv_catsv(tmpstr,PL_linestr);
10847 if (*s == term && PL_bufend-s >= len
10848 && memEQ(s,PL_tokenbuf + 1,len))
10850 SvREFCNT_dec(PL_linestr);
10851 PL_linestr = linestr_save;
10852 PL_linestart = SvPVX(linestr_save);
10853 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10854 PL_oldbufptr = oldbufptr_save;
10855 PL_oldoldbufptr = oldoldbufptr_save;
10860 sv_catsv(tmpstr,PL_linestr);
10866 PL_multi_end = origline + PL_parser->herelines;
10868 if (indented && indent) {
10869 STRLEN linecount = 1;
10870 STRLEN herelen = SvCUR(tmpstr);
10871 char *ss = SvPVX(tmpstr);
10872 char *se = ss + herelen;
10873 SV *newstr = newSV(herelen+1);
10876 /* Trim leading whitespace */
10878 /* newline only? Copy and move on */
10880 sv_catpvs(newstr,"\n");
10884 /* Found our indentation? Strip it */
10886 else if (se - ss >= indent_len
10887 && memEQ(ss, indent, indent_len))
10892 while ((ss + le) < se && *(ss + le) != '\n')
10895 sv_catpvn(newstr, ss, le);
10898 /* Line doesn't begin with our indentation? Croak */
10903 "Indentation on line %d of here-doc doesn't match delimiter",
10909 /* avoid sv_setsv() as we dont wan't to COW here */
10910 sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
10912 SvREFCNT_dec_NN(newstr);
10915 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10916 SvPV_shrink_to_cur(tmpstr);
10920 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10924 PL_lex_stuff = tmpstr;
10925 pl_yylval.ival = op_type;
10931 SvREFCNT_dec(tmpstr);
10932 CopLINE_set(PL_curcop, origline);
10933 missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1);
10937 /* scan_inputsymbol
10938 takes: position of first '<' in input buffer
10939 returns: position of first char following the matching '>' in
10941 side-effects: pl_yylval and lex_op are set.
10946 <<>> read from ARGV without magic open
10947 <FH> read from filehandle
10948 <pkg::FH> read from package qualified filehandle
10949 <pkg'FH> read from package qualified filehandle
10950 <$fh> read from filehandle in $fh
10951 <*.h> filename glob
10956 S_scan_inputsymbol(pTHX_ char *start)
10958 char *s = start; /* current position in buffer */
10961 bool nomagicopen = FALSE;
10962 char *d = PL_tokenbuf; /* start of temp holding space */
10963 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
10965 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10967 end = (char *) memchr(s, '\n', PL_bufend - s);
10970 if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
10971 nomagicopen = TRUE;
10977 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
10979 /* die if we didn't have space for the contents of the <>,
10980 or if it didn't end, or if we see a newline
10983 if (len >= (I32)sizeof PL_tokenbuf)
10984 Perl_croak(aTHX_ "Excessively long <> operator");
10986 Perl_croak(aTHX_ "Unterminated <> operator");
10991 Remember, only scalar variables are interpreted as filehandles by
10992 this code. Anything more complex (e.g., <$fh{$num}>) will be
10993 treated as a glob() call.
10994 This code makes use of the fact that except for the $ at the front,
10995 a scalar variable and a filehandle look the same.
10997 if (*d == '$' && d[1]) d++;
10999 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
11000 while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') {
11001 d += UTF ? UTF8SKIP(d) : 1;
11004 /* If we've tried to read what we allow filehandles to look like, and
11005 there's still text left, then it must be a glob() and not a getline.
11006 Use scan_str to pull out the stuff between the <> and treat it
11007 as nothing more than a string.
11010 if (d - PL_tokenbuf != len) {
11011 pl_yylval.ival = OP_GLOB;
11012 s = scan_str(start,FALSE,FALSE,FALSE,NULL);
11014 Perl_croak(aTHX_ "Glob not terminated");
11018 bool readline_overriden = FALSE;
11020 /* we're in a filehandle read situation */
11023 /* turn <> into <ARGV> */
11025 Copy("ARGV",d,5,char);
11027 /* Check whether readline() is overriden */
11028 if ((gv_readline = gv_override("readline",8)))
11029 readline_overriden = TRUE;
11031 /* if <$fh>, create the ops to turn the variable into a
11035 /* try to find it in the pad for this block, otherwise find
11036 add symbol table ops
11038 const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
11039 if (tmp != NOT_IN_PAD) {
11040 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11041 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11042 HEK * const stashname = HvNAME_HEK(stash);
11043 SV * const sym = sv_2mortal(newSVhek(stashname));
11044 sv_catpvs(sym, "::");
11045 sv_catpv(sym, d+1);
11050 OP * const o = newOP(OP_PADSV, 0);
11052 PL_lex_op = readline_overriden
11053 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11054 op_append_elem(OP_LIST, o,
11055 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11056 : newUNOP(OP_READLINE, 0, o);
11064 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
11066 PL_lex_op = readline_overriden
11067 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11068 op_append_elem(OP_LIST,
11069 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11070 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11071 : newUNOP(OP_READLINE, 0,
11072 newUNOP(OP_RV2SV, 0,
11073 newGVOP(OP_GV, 0, gv)));
11075 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
11076 pl_yylval.ival = OP_NULL;
11079 /* If it's none of the above, it must be a literal filehandle
11080 (<Foo::BAR> or <FOO>) so build a simple readline OP */
11082 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
11083 PL_lex_op = readline_overriden
11084 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11085 op_append_elem(OP_LIST,
11086 newGVOP(OP_GV, 0, gv),
11087 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11088 : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
11089 pl_yylval.ival = OP_NULL;
11099 start position in buffer
11100 keep_bracketed_quoted preserve \ quoting of embedded delimiters, but
11101 only if they are of the open/close form
11102 keep_delims preserve the delimiters around the string
11103 re_reparse compiling a run-time /(?{})/:
11104 collapse // to /, and skip encoding src
11105 delimp if non-null, this is set to the position of
11106 the closing delimiter, or just after it if
11107 the closing and opening delimiters differ
11108 (i.e., the opening delimiter of a substitu-
11110 returns: position to continue reading from buffer
11111 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11112 updates the read buffer.
11114 This subroutine pulls a string out of the input. It is called for:
11115 q single quotes q(literal text)
11116 ' single quotes 'literal text'
11117 qq double quotes qq(interpolate $here please)
11118 " double quotes "interpolate $here please"
11119 qx backticks qx(/bin/ls -l)
11120 ` backticks `/bin/ls -l`
11121 qw quote words @EXPORT_OK = qw( func() $spam )
11122 m// regexp match m/this/
11123 s/// regexp substitute s/this/that/
11124 tr/// string transliterate tr/this/that/
11125 y/// string transliterate y/this/that/
11126 ($*@) sub prototypes sub foo ($)
11127 (stuff) sub attr parameters sub foo : attr(stuff)
11128 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
11130 In most of these cases (all but <>, patterns and transliterate)
11131 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
11132 calls scan_str(). s/// makes yylex() call scan_subst() which calls
11133 scan_str(). tr/// and y/// make yylex() call scan_trans() which
11136 It skips whitespace before the string starts, and treats the first
11137 character as the delimiter. If the delimiter is one of ([{< then
11138 the corresponding "close" character )]}> is used as the closing
11139 delimiter. It allows quoting of delimiters, and if the string has
11140 balanced delimiters ([{<>}]) it allows nesting.
11142 On success, the SV with the resulting string is put into lex_stuff or,
11143 if that is already non-NULL, into lex_repl. The second case occurs only
11144 when parsing the RHS of the special constructs s/// and tr/// (y///).
11145 For convenience, the terminating delimiter character is stuffed into
11150 Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
11154 SV *sv; /* scalar value: string */
11155 const char *tmps; /* temp string, used for delimiter matching */
11156 char *s = start; /* current position in the buffer */
11157 char term; /* terminating character */
11158 char *to; /* current position in the sv's data */
11159 I32 brackets = 1; /* bracket nesting level */
11160 bool d_is_utf8 = FALSE; /* is there any utf8 content? */
11161 IV termcode; /* terminating char. code */
11162 U8 termstr[UTF8_MAXBYTES+1]; /* terminating string */
11163 STRLEN termlen; /* length of terminating string */
11166 /* The delimiters that have a mirror-image closing one */
11167 const char * opening_delims = "([{<";
11168 const char * closing_delims = ")]}>";
11170 /* The only non-UTF character that isn't a stand alone grapheme is
11171 * white-space, hence can't be a delimiter. */
11172 const char * non_grapheme_msg = "Use of unassigned code point or"
11173 " non-standalone grapheme for a delimiter"
11175 PERL_ARGS_ASSERT_SCAN_STR;
11177 /* skip space before the delimiter */
11182 /* mark where we are, in case we need to report errors */
11185 /* after skipping whitespace, the next character is the terminator */
11187 if (!UTF || UTF8_IS_INVARIANT(term)) {
11188 termcode = termstr[0] = term;
11192 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
11193 if (UTF && UNLIKELY(! is_grapheme((U8 *) start,
11198 yyerror(non_grapheme_msg);
11201 Copy(s, termstr, termlen, U8);
11204 /* mark where we are */
11205 PL_multi_start = CopLINE(PL_curcop);
11206 PL_multi_open = termcode;
11207 herelines = PL_parser->herelines;
11209 /* If the delimiter has a mirror-image closing one, get it */
11210 if (term && (tmps = strchr(opening_delims, term))) {
11211 termcode = termstr[0] = term = closing_delims[tmps - opening_delims];
11214 PL_multi_close = termcode;
11216 if (PL_multi_open == PL_multi_close) {
11217 keep_bracketed_quoted = FALSE;
11220 /* create a new SV to hold the contents. 79 is the SV's initial length.
11221 What a random number. */
11222 sv = newSV_type(SVt_PVIV);
11224 SvIV_set(sv, termcode);
11225 (void)SvPOK_only(sv); /* validate pointer */
11227 /* move past delimiter and try to read a complete string */
11229 sv_catpvn(sv, s, termlen);
11232 /* extend sv if need be */
11233 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11234 /* set 'to' to the next character in the sv's string */
11235 to = SvPVX(sv)+SvCUR(sv);
11237 /* if open delimiter is the close delimiter read unbridle */
11238 if (PL_multi_open == PL_multi_close) {
11239 for (; s < PL_bufend; s++,to++) {
11240 /* embedded newlines increment the current line number */
11241 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
11242 COPLINE_INC_WITH_HERELINES;
11243 /* handle quoted delimiters */
11244 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
11245 if (!keep_bracketed_quoted
11247 || (re_reparse && s[1] == '\\'))
11250 else /* any other quotes are simply copied straight through */
11253 /* terminate when run out of buffer (the for() condition), or
11254 have found the terminator */
11255 else if (*s == term) { /* First byte of terminator matches */
11256 if (termlen == 1) /* If is the only byte, are done */
11259 /* If the remainder of the terminator matches, also are
11260 * done, after checking that is a separate grapheme */
11261 if ( s + termlen <= PL_bufend
11262 && memEQ(s + 1, (char*)termstr + 1, termlen - 1))
11265 && UNLIKELY(! is_grapheme((U8 *) start,
11270 yyerror(non_grapheme_msg);
11275 else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) {
11283 /* if the terminator isn't the same as the start character (e.g.,
11284 matched brackets), we have to allow more in the quoting, and
11285 be prepared for nested brackets.
11288 /* read until we run out of string, or we find the terminator */
11289 for (; s < PL_bufend; s++,to++) {
11290 /* embedded newlines increment the line count */
11291 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
11292 COPLINE_INC_WITH_HERELINES;
11293 /* backslashes can escape the open or closing characters */
11294 if (*s == '\\' && s+1 < PL_bufend) {
11295 if (!keep_bracketed_quoted
11296 && ( ((UV)s[1] == PL_multi_open)
11297 || ((UV)s[1] == PL_multi_close) ))
11304 /* allow nested opens and closes */
11305 else if ((UV)*s == PL_multi_close && --brackets <= 0)
11307 else if ((UV)*s == PL_multi_open)
11309 else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11314 /* terminate the copied string and update the sv's end-of-string */
11316 SvCUR_set(sv, to - SvPVX_const(sv));
11319 * this next chunk reads more into the buffer if we're not done yet
11323 break; /* handle case where we are done yet :-) */
11325 #ifndef PERL_STRICT_CR
11326 if (to - SvPVX_const(sv) >= 2) {
11327 if ( (to[-2] == '\r' && to[-1] == '\n')
11328 || (to[-2] == '\n' && to[-1] == '\r'))
11332 SvCUR_set(sv, to - SvPVX_const(sv));
11334 else if (to[-1] == '\r')
11337 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11341 /* if we're out of file, or a read fails, bail and reset the current
11342 line marker so we can report where the unterminated string began
11344 COPLINE_INC_WITH_HERELINES;
11345 PL_bufptr = PL_bufend;
11346 if (!lex_next_chunk(0)) {
11348 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11351 s = start = PL_bufptr;
11354 /* at this point, we have successfully read the delimited string */
11357 sv_catpvn(sv, s, termlen);
11363 PL_multi_end = CopLINE(PL_curcop);
11364 CopLINE_set(PL_curcop, PL_multi_start);
11365 PL_parser->herelines = herelines;
11367 /* if we allocated too much space, give some back */
11368 if (SvCUR(sv) + 5 < SvLEN(sv)) {
11369 SvLEN_set(sv, SvCUR(sv) + 1);
11370 SvPV_shrink_to_cur(sv);
11373 /* decide whether this is the first or second quoted string we've read
11378 PL_parser->lex_sub_repl = sv;
11381 if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
11387 takes: pointer to position in buffer
11388 returns: pointer to new position in buffer
11389 side-effects: builds ops for the constant in pl_yylval.op
11391 Read a number in any of the formats that Perl accepts:
11393 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
11394 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
11395 0b[01](_?[01])* binary integers
11396 0o?[0-7](_?[0-7])* octal integers
11397 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* hexadecimal integers
11398 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+ hexadecimal floats
11400 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
11403 If it reads a number without a decimal point or an exponent, it will
11404 try converting the number to an integer and see if it can do so
11405 without loss of precision.
11409 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
11411 const char *s = start; /* current position in buffer */
11412 char *d; /* destination in temp buffer */
11413 char *e; /* end of temp buffer */
11414 NV nv; /* number read, as a double */
11415 SV *sv = NULL; /* place to put the converted number */
11416 bool floatit; /* boolean: int or float? */
11417 const char *lastub = NULL; /* position of last underbar */
11418 static const char* const number_too_long = "Number too long";
11419 bool warned_about_underscore = 0;
11420 I32 shift; /* shift per digit for hex/oct/bin, hoisted here for fp */
11421 #define WARN_ABOUT_UNDERSCORE() \
11423 if (!warned_about_underscore) { \
11424 warned_about_underscore = 1; \
11425 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \
11426 "Misplaced _ in number"); \
11429 /* Hexadecimal floating point.
11431 * In many places (where we have quads and NV is IEEE 754 double)
11432 * we can fit the mantissa bits of a NV into an unsigned quad.
11433 * (Note that UVs might not be quads even when we have quads.)
11434 * This will not work everywhere, though (either no quads, or
11435 * using long doubles), in which case we have to resort to NV,
11436 * which will probably mean horrible loss of precision due to
11437 * multiple fp operations. */
11438 bool hexfp = FALSE;
11439 int total_bits = 0;
11440 int significant_bits = 0;
11441 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
11442 # define HEXFP_UQUAD
11443 Uquad_t hexfp_uquad = 0;
11444 int hexfp_frac_bits = 0;
11449 NV hexfp_mult = 1.0;
11450 UV high_non_zero = 0; /* highest digit */
11451 int non_zero_integer_digits = 0;
11452 bool new_octal = FALSE; /* octal with "0o" prefix */
11454 PERL_ARGS_ASSERT_SCAN_NUM;
11456 /* We use the first character to decide what type of number this is */
11460 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
11462 /* if it starts with a 0, it could be an octal number, a decimal in
11463 0.13 disguise, or a hexadecimal number, or a binary number. */
11467 u holds the "number so far"
11468 overflowed was the number more than we can hold?
11470 Shift is used when we add a digit. It also serves as an "are
11471 we in octal/hex/binary?" indicator to disallow hex characters
11472 when in octal mode.
11476 bool overflowed = FALSE;
11477 bool just_zero = TRUE; /* just plain 0 or binary number? */
11478 bool has_digs = FALSE;
11479 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11480 static const char* const bases[5] =
11481 { "", "binary", "", "octal", "hexadecimal" };
11482 static const char* const Bases[5] =
11483 { "", "Binary", "", "Octal", "Hexadecimal" };
11484 static const char* const maxima[5] =
11486 "0b11111111111111111111111111111111",
11491 /* check for hex */
11492 if (isALPHA_FOLD_EQ(s[1], 'x')) {
11496 } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
11501 /* check for a decimal in disguise */
11502 else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
11504 /* so it must be octal */
11508 if (isALPHA_FOLD_EQ(*s, 'o')) {
11516 WARN_ABOUT_UNDERSCORE();
11520 /* read the rest of the number */
11522 /* x is used in the overflow test,
11523 b is the digit we're adding on. */
11528 /* if we don't mention it, we're done */
11532 /* _ are ignored -- but warned about if consecutive */
11534 if (lastub && s == lastub + 1)
11535 WARN_ABOUT_UNDERSCORE();
11539 /* 8 and 9 are not octal */
11540 case '8': case '9':
11542 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11546 case '2': case '3': case '4':
11547 case '5': case '6': case '7':
11549 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11552 case '0': case '1':
11553 b = *s++ & 15; /* ASCII digit -> value of digit */
11557 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11558 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11559 /* make sure they said 0x */
11562 b = (*s++ & 7) + 9;
11564 /* Prepare to put the digit we have onto the end
11565 of the number so far. We check for overflows.
11572 assert(shift >= 0);
11573 x = u << shift; /* make room for the digit */
11575 total_bits += shift;
11577 if ((x >> shift) != u
11578 && !(PL_hints & HINT_NEW_BINARY)) {
11581 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11582 "Integer overflow in %s number",
11585 u = x | b; /* add the digit to the end */
11588 n *= nvshift[shift];
11589 /* If an NV has not enough bits in its
11590 * mantissa to represent an UV this summing of
11591 * small low-order numbers is a waste of time
11592 * (because the NV cannot preserve the
11593 * low-order bits anyway): we could just
11594 * remember when did we overflow and in the
11595 * end just multiply n by the right
11600 if (high_non_zero == 0 && b > 0)
11604 non_zero_integer_digits++;
11606 /* this could be hexfp, but peek ahead
11607 * to avoid matching ".." */
11608 if (UNLIKELY(HEXFP_PEEK(s))) {
11616 /* if we get here, we had success: make a scalar value from
11621 /* final misplaced underbar check */
11623 WARN_ABOUT_UNDERSCORE();
11625 if (UNLIKELY(HEXFP_PEEK(s))) {
11626 /* Do sloppy (on the underbars) but quick detection
11627 * (and value construction) for hexfp, the decimal
11628 * detection will shortly be more thorough with the
11629 * underbar checks. */
11631 significant_bits = non_zero_integer_digits * shift;
11634 #else /* HEXFP_NV */
11637 /* Ignore the leading zero bits of
11638 * the high (first) non-zero digit. */
11639 if (high_non_zero) {
11640 if (high_non_zero < 0x8)
11641 significant_bits--;
11642 if (high_non_zero < 0x4)
11643 significant_bits--;
11644 if (high_non_zero < 0x2)
11645 significant_bits--;
11652 bool accumulate = TRUE;
11654 int lim = 1 << shift;
11655 for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) ||
11657 if (isXDIGIT(*h)) {
11658 significant_bits += shift;
11661 if (significant_bits < NV_MANT_DIG) {
11662 /* We are in the long "run" of xdigits,
11663 * accumulate the full four bits. */
11664 assert(shift >= 0);
11665 hexfp_uquad <<= shift;
11667 hexfp_frac_bits += shift;
11668 } else if (significant_bits - shift < NV_MANT_DIG) {
11669 /* We are at a hexdigit either at,
11670 * or straddling, the edge of mantissa.
11671 * We will try grabbing as many as
11672 * possible bits. */
11674 significant_bits - NV_MANT_DIG;
11678 hexfp_uquad <<= tail;
11679 assert((shift - tail) >= 0);
11680 hexfp_uquad |= b >> (shift - tail);
11681 hexfp_frac_bits += tail;
11683 /* Ignore the trailing zero bits
11684 * of the last non-zero xdigit.
11686 * The assumption here is that if
11687 * one has input of e.g. the xdigit
11688 * eight (0x8), there is only one
11689 * bit being input, not the full
11690 * four bits. Conversely, if one
11691 * specifies a zero xdigit, the
11692 * assumption is that one really
11693 * wants all those bits to be zero. */
11695 if ((b & 0x1) == 0x0) {
11696 significant_bits--;
11697 if ((b & 0x2) == 0x0) {
11698 significant_bits--;
11699 if ((b & 0x4) == 0x0) {
11700 significant_bits--;
11706 accumulate = FALSE;
11709 /* Keep skipping the xdigits, and
11710 * accumulating the significant bits,
11711 * but do not shift the uquad
11712 * (which would catastrophically drop
11713 * high-order bits) or accumulate the
11714 * xdigits anymore. */
11716 #else /* HEXFP_NV */
11718 nv_mult /= nvshift[shift];
11720 hexfp_nv += b * nv_mult;
11722 accumulate = FALSE;
11726 if (significant_bits >= NV_MANT_DIG)
11727 accumulate = FALSE;
11731 if ((total_bits > 0 || significant_bits > 0) &&
11732 isALPHA_FOLD_EQ(*h, 'p')) {
11733 bool negexp = FALSE;
11737 else if (*h == '-') {
11743 while (isDIGIT(*h) || *h == '_') {
11746 hexfp_exp += *h - '0';
11749 && -hexfp_exp < NV_MIN_EXP - 1) {
11750 /* NOTE: this means that the exponent
11751 * underflow warning happens for
11752 * the IEEE 754 subnormals (denormals),
11753 * because DBL_MIN_EXP etc are the lowest
11754 * possible binary (or, rather, DBL_RADIX-base)
11755 * exponent for normals, not subnormals.
11757 * This may or may not be a good thing. */
11758 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11759 "Hexadecimal float: exponent underflow");
11765 && hexfp_exp > NV_MAX_EXP - 1) {
11766 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11767 "Hexadecimal float: exponent overflow");
11775 hexfp_exp = -hexfp_exp;
11777 hexfp_exp -= hexfp_frac_bits;
11779 hexfp_mult = Perl_pow(2.0, hexfp_exp);
11786 if (!just_zero && !has_digs) {
11787 /* 0x, 0o or 0b with no digits, treat it as an error.
11788 Originally this backed up the parse before the b or
11789 x, but that has the potential for silent changes in
11790 behaviour, like for: "0x.3" and "0x+$foo".
11793 char *oldbp = PL_bufptr;
11794 if (*d) ++d; /* so the user sees the bad non-digit */
11795 PL_bufptr = (char *)d; /* so yyerror reports the context */
11796 yyerror(Perl_form(aTHX_ "No digits found for %s literal",
11802 if (n > 4294967295.0)
11803 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11804 "%s number > %s non-portable",
11806 new_octal ? "0o37777777777" : maxima[shift]);
11811 if (u > 0xffffffff)
11812 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11813 "%s number > %s non-portable",
11815 new_octal ? "0o37777777777" : maxima[shift]);
11819 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
11820 sv = new_constant(start, s - start, "integer",
11821 sv, NULL, NULL, 0, NULL);
11822 else if (PL_hints & HINT_NEW_BINARY)
11823 sv = new_constant(start, s - start, "binary",
11824 sv, NULL, NULL, 0, NULL);
11829 handle decimal numbers.
11830 we're also sent here when we read a 0 as the first digit
11832 case '1': case '2': case '3': case '4': case '5':
11833 case '6': case '7': case '8': case '9': case '.':
11836 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
11859 NOT_REACHED; /* NOTREACHED */
11863 /* read next group of digits and _ and copy into d */
11866 || UNLIKELY(hexfp && isXDIGIT(*s)))
11868 /* skip underscores, checking for misplaced ones
11872 if (lastub && s == lastub + 1)
11873 WARN_ABOUT_UNDERSCORE();
11877 /* check for end of fixed-length buffer */
11879 Perl_croak(aTHX_ "%s", number_too_long);
11880 /* if we're ok, copy the character */
11885 /* final misplaced underbar check */
11886 if (lastub && s == lastub + 1)
11887 WARN_ABOUT_UNDERSCORE();
11889 /* read a decimal portion if there is one. avoid
11890 3..5 being interpreted as the number 3. followed
11893 if (*s == '.' && s[1] != '.') {
11898 WARN_ABOUT_UNDERSCORE();
11902 /* copy, ignoring underbars, until we run out of digits.
11906 || UNLIKELY(hexfp && isXDIGIT(*s));
11909 /* fixed length buffer check */
11911 Perl_croak(aTHX_ "%s", number_too_long);
11913 if (lastub && s == lastub + 1)
11914 WARN_ABOUT_UNDERSCORE();
11920 /* fractional part ending in underbar? */
11922 WARN_ABOUT_UNDERSCORE();
11923 if (*s == '.' && isDIGIT(s[1])) {
11924 /* oops, it's really a v-string, but without the "v" */
11930 /* read exponent part, if present */
11931 if ((isALPHA_FOLD_EQ(*s, 'e')
11932 || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
11933 && memCHRs("+-0123456789_", s[1]))
11935 int exp_digits = 0;
11936 const char *save_s = s;
11939 /* regardless of whether user said 3E5 or 3e5, use lower 'e',
11940 ditto for p (hexfloats) */
11941 if ((isALPHA_FOLD_EQ(*s, 'e'))) {
11942 /* At least some Mach atof()s don't grok 'E' */
11945 else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
11952 /* stray preinitial _ */
11954 WARN_ABOUT_UNDERSCORE();
11958 /* allow positive or negative exponent */
11959 if (*s == '+' || *s == '-')
11962 /* stray initial _ */
11964 WARN_ABOUT_UNDERSCORE();
11968 /* read digits of exponent */
11969 while (isDIGIT(*s) || *s == '_') {
11973 Perl_croak(aTHX_ "%s", number_too_long);
11977 if (((lastub && s == lastub + 1)
11978 || (!isDIGIT(s[1]) && s[1] != '_')))
11979 WARN_ABOUT_UNDERSCORE();
11985 /* no exponent digits, the [eEpP] could be for something else,
11986 * though in practice we don't get here for p since that's preparsed
11987 * earlier, and results in only the 0xX being consumed, so behave similarly
11988 * for decimal floats and consume only the D.DD, leaving the [eE] to the
12001 We try to do an integer conversion first if no characters
12002 indicating "float" have been found.
12007 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12009 if (flags == IS_NUMBER_IN_UV) {
12011 sv = newSViv(uv); /* Prefer IVs over UVs. */
12014 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12015 if (uv <= (UV) IV_MIN)
12016 sv = newSViv(-(IV)uv);
12023 /* terminate the string */
12025 if (UNLIKELY(hexfp)) {
12026 # ifdef NV_MANT_DIG
12027 if (significant_bits > NV_MANT_DIG)
12028 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12029 "Hexadecimal float: mantissa overflow");
12032 nv = hexfp_uquad * hexfp_mult;
12033 #else /* HEXFP_NV */
12034 nv = hexfp_nv * hexfp_mult;
12037 nv = Atof(PL_tokenbuf);
12043 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12044 const char *const key = floatit ? "float" : "integer";
12045 const STRLEN keylen = floatit ? 5 : 7;
12046 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12047 key, keylen, sv, NULL, NULL, 0, NULL);
12051 /* if it starts with a v, it could be a v-string */
12054 sv = newSV(5); /* preallocate storage space */
12055 ENTER_with_name("scan_vstring");
12057 s = scan_vstring(s, PL_bufend, sv);
12058 SvREFCNT_inc_simple_void_NN(sv);
12059 LEAVE_with_name("scan_vstring");
12063 /* make the op for the constant and return */
12066 lvalp->opval = newSVOP(OP_CONST, 0, sv);
12068 lvalp->opval = NULL;
12074 S_scan_formline(pTHX_ char *s)
12076 SV * const stuff = newSVpvs("");
12077 bool needargs = FALSE;
12078 bool eofmt = FALSE;
12080 PERL_ARGS_ASSERT_SCAN_FORMLINE;
12082 while (!needargs) {
12086 #ifdef PERL_STRICT_CR
12087 while (SPACE_OR_TAB(*t))
12090 while (SPACE_OR_TAB(*t) || *t == '\r')
12093 if (*t == '\n' || t == PL_bufend) {
12098 eol = (char *) memchr(s,'\n',PL_bufend-s);
12103 for (t = s; t < eol; t++) {
12104 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12106 goto enough; /* ~~ must be first line in formline */
12108 if (*t == '@' || *t == '^')
12112 sv_catpvn(stuff, s, eol-s);
12113 #ifndef PERL_STRICT_CR
12114 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12115 char *end = SvPVX(stuff) + SvCUR(stuff);
12118 SvCUR_set(stuff, SvCUR(stuff) - 1);
12126 if ((PL_rsfp || PL_parser->filtered)
12127 && PL_parser->form_lex_state == LEX_NORMAL) {
12129 PL_bufptr = PL_bufend;
12130 COPLINE_INC_WITH_HERELINES;
12131 got_some = lex_next_chunk(0);
12132 CopLINE_dec(PL_curcop);
12137 incline(s, PL_bufend);
12140 if (!SvCUR(stuff) || needargs)
12141 PL_lex_state = PL_parser->form_lex_state;
12142 if (SvCUR(stuff)) {
12143 PL_expect = XSTATE;
12145 const char *s2 = s;
12146 while (isSPACE(*s2) && *s2 != '\n')
12149 PL_expect = XTERMBLOCK;
12150 NEXTVAL_NEXTTOKE.ival = 0;
12153 NEXTVAL_NEXTTOKE.ival = 0;
12154 force_next(FORMLBRACK);
12157 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12160 NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff);
12164 SvREFCNT_dec(stuff);
12166 PL_lex_formbrack = 0;
12172 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12174 const I32 oldsavestack_ix = PL_savestack_ix;
12175 CV* const outsidecv = PL_compcv;
12177 SAVEI32(PL_subline);
12178 save_item(PL_subname);
12179 SAVESPTR(PL_compcv);
12181 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
12182 CvFLAGS(PL_compcv) |= flags;
12184 PL_subline = CopLINE(PL_curcop);
12185 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12186 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
12187 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12188 if (outsidecv && CvPADLIST(outsidecv))
12189 CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
12191 return oldsavestack_ix;
12195 /* Do extra initialisation of a CV (typically one just created by
12196 * start_subparse()) if that CV is for a named sub
12200 Perl_init_named_cv(pTHX_ CV *cv, OP *nameop)
12202 PERL_ARGS_ASSERT_INIT_NAMED_CV;
12204 if (nameop->op_type == OP_CONST) {
12205 const char *const name = SvPV_nolen_const(((SVOP*)nameop)->op_sv);
12206 if ( strEQ(name, "BEGIN")
12207 || strEQ(name, "END")
12208 || strEQ(name, "INIT")
12209 || strEQ(name, "CHECK")
12210 || strEQ(name, "UNITCHECK")
12215 /* State subs inside anonymous subs need to be
12216 clonable themselves. */
12217 if ( CvANON(CvOUTSIDE(cv))
12218 || CvCLONE(CvOUTSIDE(cv))
12219 || !PadnameIsSTATE(PadlistNAMESARRAY(CvPADLIST(
12221 ))[nameop->op_targ])
12228 S_yywarn(pTHX_ const char *const s, U32 flags)
12230 PERL_ARGS_ASSERT_YYWARN;
12232 PL_in_eval |= EVAL_WARNONLY;
12233 yyerror_pv(s, flags);
12238 Perl_abort_execution(pTHX_ const char * const msg, const char * const name)
12240 PERL_ARGS_ASSERT_ABORT_EXECUTION;
12243 Perl_croak(aTHX_ "%s%s had compilation errors.\n", msg, name);
12246 "%sExecution of %s aborted due to compilation errors.\n", msg, name);
12248 NOT_REACHED; /* NOTREACHED */
12254 /* Called, after at least one error has been found, to abort the parse now,
12255 * instead of trying to forge ahead */
12257 yyerror_pvn(NULL, 0, 0);
12261 Perl_yyerror(pTHX_ const char *const s)
12263 PERL_ARGS_ASSERT_YYERROR;
12264 return yyerror_pvn(s, strlen(s), 0);
12268 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
12270 PERL_ARGS_ASSERT_YYERROR_PV;
12271 return yyerror_pvn(s, strlen(s), flags);
12275 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
12277 const char *context = NULL;
12280 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
12281 int yychar = PL_parser->yychar;
12283 /* Output error message 's' with length 'len'. 'flags' are SV flags that
12284 * apply. If the number of errors found is large enough, it abandons
12285 * parsing. If 's' is NULL, there is no message, and it abandons
12286 * processing unconditionally */
12289 if (!yychar || (yychar == PERLY_SEMICOLON && !PL_rsfp))
12290 sv_catpvs(where_sv, "at EOF");
12291 else if ( PL_oldoldbufptr
12292 && PL_bufptr > PL_oldoldbufptr
12293 && PL_bufptr - PL_oldoldbufptr < 200
12294 && PL_oldoldbufptr != PL_oldbufptr
12295 && PL_oldbufptr != PL_bufptr)
12299 The code below is removed for NetWare because it
12300 abends/crashes on NetWare when the script has error such as
12301 not having the closing quotes like:
12302 if ($var eq "value)
12303 Checking of white spaces is anyway done in NetWare code.
12306 while (isSPACE(*PL_oldoldbufptr))
12309 context = PL_oldoldbufptr;
12310 contlen = PL_bufptr - PL_oldoldbufptr;
12312 else if ( PL_oldbufptr
12313 && PL_bufptr > PL_oldbufptr
12314 && PL_bufptr - PL_oldbufptr < 200
12315 && PL_oldbufptr != PL_bufptr) {
12318 The code below is removed for NetWare because it
12319 abends/crashes on NetWare when the script has error such as
12320 not having the closing quotes like:
12321 if ($var eq "value)
12322 Checking of white spaces is anyway done in NetWare code.
12325 while (isSPACE(*PL_oldbufptr))
12328 context = PL_oldbufptr;
12329 contlen = PL_bufptr - PL_oldbufptr;
12331 else if (yychar > 255)
12332 sv_catpvs(where_sv, "next token ???");
12333 else if (yychar == YYEMPTY) {
12334 if (PL_lex_state == LEX_NORMAL)
12335 sv_catpvs(where_sv, "at end of line");
12336 else if (PL_lex_inpat)
12337 sv_catpvs(where_sv, "within pattern");
12339 sv_catpvs(where_sv, "within string");
12342 sv_catpvs(where_sv, "next char ");
12344 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12345 else if (isPRINT_LC(yychar)) {
12346 const char string = yychar;
12347 sv_catpvn(where_sv, &string, 1);
12350 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12352 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
12353 Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ",
12354 OutCopFILE(PL_curcop),
12355 (IV)(PL_parser->preambling == NOLINE
12356 ? CopLINE(PL_curcop)
12357 : PL_parser->preambling));
12359 Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
12360 UTF8fARG(UTF, contlen, context));
12362 Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
12363 if ( PL_multi_start < PL_multi_end
12364 && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1)
12366 Perl_sv_catpvf(aTHX_ msg,
12367 " (Might be a runaway multi-line %c%c string starting on"
12368 " line %" IVdf ")\n",
12369 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
12372 if (PL_in_eval & EVAL_WARNONLY) {
12373 PL_in_eval &= ~EVAL_WARNONLY;
12374 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
12380 if (s == NULL || PL_error_count >= 10) {
12381 const char * msg = "";
12382 const char * const name = OutCopFILE(PL_curcop);
12385 SV * errsv = ERRSV;
12386 if (SvCUR(errsv)) {
12387 msg = Perl_form(aTHX_ "%" SVf, SVfARG(errsv));
12392 abort_execution(msg, name);
12395 Perl_croak(aTHX_ "%s%s has too many errors.\n", msg, name);
12399 PL_in_my_stash = NULL;
12404 S_swallow_bom(pTHX_ U8 *s)
12406 const STRLEN slen = SvCUR(PL_linestr);
12408 PERL_ARGS_ASSERT_SWALLOW_BOM;
12412 if (s[1] == 0xFE) {
12413 /* UTF-16 little-endian? (or UTF-32LE?) */
12414 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
12415 /* diag_listed_as: Unsupported script encoding %s */
12416 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
12417 #ifndef PERL_NO_UTF16_FILTER
12419 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
12422 if (PL_bufend > (char*)s) {
12423 s = add_utf16_textfilter(s, TRUE);
12426 /* diag_listed_as: Unsupported script encoding %s */
12427 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
12432 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
12433 #ifndef PERL_NO_UTF16_FILTER
12435 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12438 if (PL_bufend > (char *)s) {
12439 s = add_utf16_textfilter(s, FALSE);
12442 /* diag_listed_as: Unsupported script encoding %s */
12443 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
12447 case BOM_UTF8_FIRST_BYTE: {
12448 if (memBEGINs(s+1, slen - 1, BOM_UTF8_TAIL)) {
12450 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12452 s += sizeof(BOM_UTF8) - 1; /* UTF-8 */
12459 if (s[2] == 0xFE && s[3] == 0xFF) {
12460 /* UTF-32 big-endian */
12461 /* diag_listed_as: Unsupported script encoding %s */
12462 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
12465 else if (s[2] == 0 && s[3] != 0) {
12468 * are a good indicator of UTF-16BE. */
12469 #ifndef PERL_NO_UTF16_FILTER
12471 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
12473 s = add_utf16_textfilter(s, FALSE);
12475 /* diag_listed_as: Unsupported script encoding %s */
12476 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
12483 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12486 * are a good indicator of UTF-16LE. */
12487 #ifndef PERL_NO_UTF16_FILTER
12489 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12491 s = add_utf16_textfilter(s, TRUE);
12493 /* diag_listed_as: Unsupported script encoding %s */
12494 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
12502 #ifndef PERL_NO_UTF16_FILTER
12504 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12506 SV *const filter = FILTER_DATA(idx);
12507 /* We re-use this each time round, throwing the contents away before we
12509 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
12510 SV *const utf8_buffer = filter;
12511 IV status = IoPAGE(filter);
12512 const bool reverse = cBOOL(IoLINES(filter));
12515 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
12517 /* As we're automatically added, at the lowest level, and hence only called
12518 from this file, we can be sure that we're not called in block mode. Hence
12519 don't bother writing code to deal with block mode. */
12521 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
12524 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
12526 DEBUG_P(PerlIO_printf(Perl_debug_log,
12527 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
12528 FPTR2DPTR(void *, S_utf16_textfilter),
12529 reverse ? 'l' : 'b', idx, maxlen, status,
12530 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12537 /* First, look in our buffer of existing UTF-8 data: */
12538 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
12542 } else if (status == 0) {
12544 IoPAGE(filter) = 0;
12545 nl = SvEND(utf8_buffer);
12548 STRLEN got = nl - SvPVX(utf8_buffer);
12549 /* Did we have anything to append? */
12551 sv_catpvn(sv, SvPVX(utf8_buffer), got);
12552 /* Everything else in this code works just fine if SVp_POK isn't
12553 set. This, however, needs it, and we need it to work, else
12554 we loop infinitely because the buffer is never consumed. */
12555 sv_chop(utf8_buffer, nl);
12559 /* OK, not a complete line there, so need to read some more UTF-16.
12560 Read an extra octect if the buffer currently has an odd number. */
12564 if (SvCUR(utf16_buffer) >= 2) {
12565 /* Location of the high octet of the last complete code point.
12566 Gosh, UTF-16 is a pain. All the benefits of variable length,
12567 *coupled* with all the benefits of partial reads and
12569 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
12570 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
12572 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
12576 /* We have the first half of a surrogate. Read more. */
12577 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
12580 status = FILTER_READ(idx + 1, utf16_buffer,
12581 160 + (SvCUR(utf16_buffer) & 1));
12582 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
12583 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
12586 IoPAGE(filter) = status;
12591 /* 'chars' isn't quite the right name, as code points above 0xFFFF
12592 * require 4 bytes per char */
12593 chars = SvCUR(utf16_buffer) >> 1;
12594 have = SvCUR(utf8_buffer);
12596 /* Assume the worst case size as noted by the functions: twice the
12597 * number of input bytes */
12598 SvGROW(utf8_buffer, have + chars * 4 + 1);
12601 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
12602 (U8*)SvPVX_const(utf8_buffer) + have,
12603 chars * 2, &newlen);
12605 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
12606 (U8*)SvPVX_const(utf8_buffer) + have,
12607 chars * 2, &newlen);
12609 SvCUR_set(utf8_buffer, have + newlen);
12612 /* No need to keep this SV "well-formed" with a '\0' after the end, as
12613 it's private to us, and utf16_to_utf8{,reversed} take a
12614 (pointer,length) pair, rather than a NUL-terminated string. */
12615 if(SvCUR(utf16_buffer) & 1) {
12616 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
12617 SvCUR_set(utf16_buffer, 1);
12619 SvCUR_set(utf16_buffer, 0);
12622 DEBUG_P(PerlIO_printf(Perl_debug_log,
12623 "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
12625 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12626 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
12631 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
12633 SV *filter = filter_add(S_utf16_textfilter, NULL);
12635 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
12637 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
12639 IoLINES(filter) = reversed;
12640 IoPAGE(filter) = 1; /* Not EOF */
12642 /* Sadly, we have to return a valid pointer, come what may, so we have to
12643 ignore any error return from this. */
12644 SvCUR_set(PL_linestr, 0);
12645 if (FILTER_READ(0, PL_linestr, 0)) {
12646 SvUTF8_on(PL_linestr);
12648 SvUTF8_on(PL_linestr);
12650 PL_bufend = SvEND(PL_linestr);
12651 return (U8*)SvPVX(PL_linestr);
12656 Returns a pointer to the next character after the parsed
12657 vstring, as well as updating the passed in sv.
12659 Function must be called like
12661 sv = sv_2mortal(newSV(5));
12662 s = scan_vstring(s,e,sv);
12664 where s and e are the start and end of the string.
12665 The sv should already be large enough to store the vstring
12666 passed in, for performance reasons.
12668 This function may croak if fatal warnings are enabled in the
12669 calling scope, hence the sv_2mortal in the example (to prevent
12670 a leak). Make sure to do SvREFCNT_inc afterwards if you use
12676 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
12678 const char *pos = s;
12679 const char *start = s;
12681 PERL_ARGS_ASSERT_SCAN_VSTRING;
12683 if (*pos == 'v') pos++; /* get past 'v' */
12684 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12686 if ( *pos != '.') {
12687 /* this may not be a v-string if followed by => */
12688 const char *next = pos;
12689 while (next < e && isSPACE(*next))
12691 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
12692 /* return string not v-string */
12693 sv_setpvn(sv,(char *)s,pos-s);
12694 return (char *)pos;
12698 if (!isALPHA(*pos)) {
12699 U8 tmpbuf[UTF8_MAXBYTES+1];
12702 s++; /* get past 'v' */
12707 /* this is atoi() that tolerates underscores */
12710 const char *end = pos;
12712 while (--end >= s) {
12714 const UV orev = rev;
12715 rev += (*end - '0') * mult;
12718 /* diag_listed_as: Integer overflow in %s number */
12719 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12720 "Integer overflow in decimal number");
12724 /* Append native character for the rev point */
12725 tmpend = uvchr_to_utf8(tmpbuf, rev);
12726 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12727 if (!UVCHR_IS_INVARIANT(rev))
12729 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
12735 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12739 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12746 Perl_keyword_plugin_standard(pTHX_
12747 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
12749 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
12750 PERL_UNUSED_CONTEXT;
12751 PERL_UNUSED_ARG(keyword_ptr);
12752 PERL_UNUSED_ARG(keyword_len);
12753 PERL_UNUSED_ARG(op_ptr);
12754 return KEYWORD_PLUGIN_DECLINE;
12758 =for apidoc wrap_keyword_plugin
12760 Puts a C function into the chain of keyword plugins. This is the
12761 preferred way to manipulate the L</PL_keyword_plugin> variable.
12762 C<new_plugin> is a pointer to the C function that is to be added to the
12763 keyword plugin chain, and C<old_plugin_p> points to the storage location
12764 where a pointer to the next function in the chain will be stored. The
12765 value of C<new_plugin> is written into the L</PL_keyword_plugin> variable,
12766 while the value previously stored there is written to C<*old_plugin_p>.
12768 L</PL_keyword_plugin> is global to an entire process, and a module wishing
12769 to hook keyword parsing may find itself invoked more than once per
12770 process, typically in different threads. To handle that situation, this
12771 function is idempotent. The location C<*old_plugin_p> must initially
12772 (once per process) contain a null pointer. A C variable of static
12773 duration (declared at file scope, typically also marked C<static> to give
12774 it internal linkage) will be implicitly initialised appropriately, if it
12775 does not have an explicit initialiser. This function will only actually
12776 modify the plugin chain if it finds C<*old_plugin_p> to be null. This
12777 function is also thread safe on the small scale. It uses appropriate
12778 locking to avoid race conditions in accessing L</PL_keyword_plugin>.
12780 When this function is called, the function referenced by C<new_plugin>
12781 must be ready to be called, except for C<*old_plugin_p> being unfilled.
12782 In a threading situation, C<new_plugin> may be called immediately, even
12783 before this function has returned. C<*old_plugin_p> will always be
12784 appropriately set before C<new_plugin> is called. If C<new_plugin>
12785 decides not to do anything special with the identifier that it is given
12786 (which is the usual case for most calls to a keyword plugin), it must
12787 chain the plugin function referenced by C<*old_plugin_p>.
12789 Taken all together, XS code to install a keyword plugin should typically
12790 look something like this:
12792 static Perl_keyword_plugin_t next_keyword_plugin;
12793 static OP *my_keyword_plugin(pTHX_
12794 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
12796 if (memEQs(keyword_ptr, keyword_len,
12797 "my_new_keyword")) {
12800 return next_keyword_plugin(aTHX_
12801 keyword_ptr, keyword_len, op_ptr);
12805 wrap_keyword_plugin(my_keyword_plugin,
12806 &next_keyword_plugin);
12808 Direct access to L</PL_keyword_plugin> should be avoided.
12814 Perl_wrap_keyword_plugin(pTHX_
12815 Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p)
12818 PERL_UNUSED_CONTEXT;
12819 PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN;
12820 if (*old_plugin_p) return;
12821 KEYWORD_PLUGIN_MUTEX_LOCK;
12822 if (!*old_plugin_p) {
12823 *old_plugin_p = PL_keyword_plugin;
12824 PL_keyword_plugin = new_plugin;
12826 KEYWORD_PLUGIN_MUTEX_UNLOCK;
12829 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
12831 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
12833 SAVEI32(PL_lex_brackets);
12834 if (PL_lex_brackets > 100)
12835 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
12836 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
12837 SAVEI32(PL_lex_allbrackets);
12838 PL_lex_allbrackets = 0;
12839 SAVEI8(PL_lex_fakeeof);
12840 PL_lex_fakeeof = (U8)fakeeof;
12841 if(yyparse(gramtype) && !PL_parser->error_count)
12842 qerror(Perl_mess(aTHX_ "Parse error"));
12845 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
12847 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
12851 SAVEVPTR(PL_eval_root);
12852 PL_eval_root = NULL;
12853 parse_recdescent(gramtype, fakeeof);
12859 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
12861 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
12864 if (flags & ~PARSE_OPTIONAL)
12865 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
12866 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
12867 if (!exprop && !(flags & PARSE_OPTIONAL)) {
12868 if (!PL_parser->error_count)
12869 qerror(Perl_mess(aTHX_ "Parse error"));
12870 exprop = newOP(OP_NULL, 0);
12876 =for apidoc parse_arithexpr
12878 Parse a Perl arithmetic expression. This may contain operators of precedence
12879 down to the bit shift operators. The expression must be followed (and thus
12880 terminated) either by a comparison or lower-precedence operator or by
12881 something that would normally terminate an expression such as semicolon.
12882 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12883 otherwise it is mandatory. It is up to the caller to ensure that the
12884 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12885 the source of the code to be parsed and the lexical context for the
12888 The op tree representing the expression is returned. If an optional
12889 expression is absent, a null pointer is returned, otherwise the pointer
12892 If an error occurs in parsing or compilation, in most cases a valid op
12893 tree is returned anyway. The error is reflected in the parser state,
12894 normally resulting in a single exception at the top level of parsing
12895 which covers all the compilation errors that occurred. Some compilation
12896 errors, however, will throw an exception immediately.
12898 =for apidoc Amnh||PARSE_OPTIONAL
12905 Perl_parse_arithexpr(pTHX_ U32 flags)
12907 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
12911 =for apidoc parse_termexpr
12913 Parse a Perl term expression. This may contain operators of precedence
12914 down to the assignment operators. The expression must be followed (and thus
12915 terminated) either by a comma or lower-precedence operator or by
12916 something that would normally terminate an expression such as semicolon.
12917 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12918 otherwise it is mandatory. It is up to the caller to ensure that the
12919 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12920 the source of the code to be parsed and the lexical context for the
12923 The op tree representing the expression is returned. If an optional
12924 expression is absent, a null pointer is returned, otherwise the pointer
12927 If an error occurs in parsing or compilation, in most cases a valid op
12928 tree is returned anyway. The error is reflected in the parser state,
12929 normally resulting in a single exception at the top level of parsing
12930 which covers all the compilation errors that occurred. Some compilation
12931 errors, however, will throw an exception immediately.
12937 Perl_parse_termexpr(pTHX_ U32 flags)
12939 return parse_expr(LEX_FAKEEOF_COMMA, flags);
12943 =for apidoc parse_listexpr
12945 Parse a Perl list expression. This may contain operators of precedence
12946 down to the comma operator. The expression must be followed (and thus
12947 terminated) either by a low-precedence logic operator such as C<or> or by
12948 something that would normally terminate an expression such as semicolon.
12949 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12950 otherwise it is mandatory. It is up to the caller to ensure that the
12951 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12952 the source of the code to be parsed and the lexical context for the
12955 The op tree representing the expression is returned. If an optional
12956 expression is absent, a null pointer is returned, otherwise the pointer
12959 If an error occurs in parsing or compilation, in most cases a valid op
12960 tree is returned anyway. The error is reflected in the parser state,
12961 normally resulting in a single exception at the top level of parsing
12962 which covers all the compilation errors that occurred. Some compilation
12963 errors, however, will throw an exception immediately.
12969 Perl_parse_listexpr(pTHX_ U32 flags)
12971 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
12975 =for apidoc parse_fullexpr
12977 Parse a single complete Perl expression. This allows the full
12978 expression grammar, including the lowest-precedence operators such
12979 as C<or>. The expression must be followed (and thus terminated) by a
12980 token that an expression would normally be terminated by: end-of-file,
12981 closing bracketing punctuation, semicolon, or one of the keywords that
12982 signals a postfix expression-statement modifier. If C<flags> has the
12983 C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
12984 mandatory. It is up to the caller to ensure that the dynamic parser
12985 state (L</PL_parser> et al) is correctly set to reflect the source of
12986 the code to be parsed and the lexical context for the expression.
12988 The op tree representing the expression is returned. If an optional
12989 expression is absent, a null pointer is returned, otherwise the pointer
12992 If an error occurs in parsing or compilation, in most cases a valid op
12993 tree is returned anyway. The error is reflected in the parser state,
12994 normally resulting in a single exception at the top level of parsing
12995 which covers all the compilation errors that occurred. Some compilation
12996 errors, however, will throw an exception immediately.
13002 Perl_parse_fullexpr(pTHX_ U32 flags)
13004 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
13008 =for apidoc parse_block
13010 Parse a single complete Perl code block. This consists of an opening
13011 brace, a sequence of statements, and a closing brace. The block
13012 constitutes a lexical scope, so C<my> variables and various compile-time
13013 effects can be contained within it. It is up to the caller to ensure
13014 that the dynamic parser state (L</PL_parser> et al) is correctly set to
13015 reflect the source of the code to be parsed and the lexical context for
13018 The op tree representing the code block is returned. This is always a
13019 real op, never a null pointer. It will normally be a C<lineseq> list,
13020 including C<nextstate> or equivalent ops. No ops to construct any kind
13021 of runtime scope are included by virtue of it being a block.
13023 If an error occurs in parsing or compilation, in most cases a valid op
13024 tree (most likely null) is returned anyway. The error is reflected in
13025 the parser state, normally resulting in a single exception at the top
13026 level of parsing which covers all the compilation errors that occurred.
13027 Some compilation errors, however, will throw an exception immediately.
13029 The C<flags> parameter is reserved for future use, and must always
13036 Perl_parse_block(pTHX_ U32 flags)
13039 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
13040 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
13044 =for apidoc parse_barestmt
13046 Parse a single unadorned Perl statement. This may be a normal imperative
13047 statement or a declaration that has compile-time effect. It does not
13048 include any label or other affixture. It is up to the caller to ensure
13049 that the dynamic parser state (L</PL_parser> et al) is correctly set to
13050 reflect the source of the code to be parsed and the lexical context for
13053 The op tree representing the statement is returned. This may be a
13054 null pointer if the statement is null, for example if it was actually
13055 a subroutine definition (which has compile-time side effects). If not
13056 null, it will be ops directly implementing the statement, suitable to
13057 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
13058 equivalent op (except for those embedded in a scope contained entirely
13059 within the statement).
13061 If an error occurs in parsing or compilation, in most cases a valid op
13062 tree (most likely null) is returned anyway. The error is reflected in
13063 the parser state, normally resulting in a single exception at the top
13064 level of parsing which covers all the compilation errors that occurred.
13065 Some compilation errors, however, will throw an exception immediately.
13067 The C<flags> parameter is reserved for future use, and must always
13074 Perl_parse_barestmt(pTHX_ U32 flags)
13077 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
13078 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
13082 =for apidoc parse_label
13084 Parse a single label, possibly optional, of the type that may prefix a
13085 Perl statement. It is up to the caller to ensure that the dynamic parser
13086 state (L</PL_parser> et al) is correctly set to reflect the source of
13087 the code to be parsed. If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
13088 label is optional, otherwise it is mandatory.
13090 The name of the label is returned in the form of a fresh scalar. If an
13091 optional label is absent, a null pointer is returned.
13093 If an error occurs in parsing, which can only occur if the label is
13094 mandatory, a valid label is returned anyway. The error is reflected in
13095 the parser state, normally resulting in a single exception at the top
13096 level of parsing which covers all the compilation errors that occurred.
13102 Perl_parse_label(pTHX_ U32 flags)
13104 if (flags & ~PARSE_OPTIONAL)
13105 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
13107 PL_parser->yychar = yylex();
13108 if (PL_parser->yychar == LABEL) {
13109 SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv;
13110 PL_parser->yychar = YYEMPTY;
13111 cSVOPx(pl_yylval.opval)->op_sv = NULL;
13112 op_free(pl_yylval.opval);
13120 STRLEN wlen, bufptr_pos;
13123 if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
13125 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
13126 if (word_takes_any_delimiter(s, wlen))
13128 bufptr_pos = s - SvPVX(PL_linestr);
13130 lex_read_space(LEX_KEEP_PREVIOUS);
13132 s = SvPVX(PL_linestr) + bufptr_pos;
13133 if (t[0] == ':' && t[1] != ':') {
13134 PL_oldoldbufptr = PL_oldbufptr;
13137 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
13141 if (flags & PARSE_OPTIONAL) {
13144 qerror(Perl_mess(aTHX_ "Parse error"));
13145 return newSVpvs("x");
13152 =for apidoc parse_fullstmt
13154 Parse a single complete Perl statement. This may be a normal imperative
13155 statement or a declaration that has compile-time effect, and may include
13156 optional labels. It is up to the caller to ensure that the dynamic
13157 parser state (L</PL_parser> et al) is correctly set to reflect the source
13158 of the code to be parsed and the lexical context for the statement.
13160 The op tree representing the statement is returned. This may be a
13161 null pointer if the statement is null, for example if it was actually
13162 a subroutine definition (which has compile-time side effects). If not
13163 null, it will be the result of a L</newSTATEOP> call, normally including
13164 a C<nextstate> or equivalent op.
13166 If an error occurs in parsing or compilation, in most cases a valid op
13167 tree (most likely null) is returned anyway. The error is reflected in
13168 the parser state, normally resulting in a single exception at the top
13169 level of parsing which covers all the compilation errors that occurred.
13170 Some compilation errors, however, will throw an exception immediately.
13172 The C<flags> parameter is reserved for future use, and must always
13179 Perl_parse_fullstmt(pTHX_ U32 flags)
13182 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
13183 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
13187 =for apidoc parse_stmtseq
13189 Parse a sequence of zero or more Perl statements. These may be normal
13190 imperative statements, including optional labels, or declarations
13191 that have compile-time effect, or any mixture thereof. The statement
13192 sequence ends when a closing brace or end-of-file is encountered in a
13193 place where a new statement could have validly started. It is up to
13194 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
13195 is correctly set to reflect the source of the code to be parsed and the
13196 lexical context for the statements.
13198 The op tree representing the statement sequence is returned. This may
13199 be a null pointer if the statements were all null, for example if there
13200 were no statements or if there were only subroutine definitions (which
13201 have compile-time side effects). If not null, it will be a C<lineseq>
13202 list, normally including C<nextstate> or equivalent ops.
13204 If an error occurs in parsing or compilation, in most cases a valid op
13205 tree is returned anyway. The error is reflected in the parser state,
13206 normally resulting in a single exception at the top level of parsing
13207 which covers all the compilation errors that occurred. Some compilation
13208 errors, however, will throw an exception immediately.
13210 The C<flags> parameter is reserved for future use, and must always
13217 Perl_parse_stmtseq(pTHX_ U32 flags)
13222 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
13223 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
13224 c = lex_peek_unichar(0);
13225 if (c != -1 && c != /*{*/'}')
13226 qerror(Perl_mess(aTHX_ "Parse error"));
13231 =for apidoc parse_subsignature
13233 Parse a subroutine signature declaration. This is the contents of the
13234 parentheses following a named or anonymous subroutine declaration when the
13235 C<signatures> feature is enabled. Note that this function neither expects
13236 nor consumes the opening and closing parentheses around the signature; it
13237 is the caller's job to handle these.
13239 This function must only be called during parsing of a subroutine; after
13240 L</start_subparse> has been called. It might allocate lexical variables on
13241 the pad for the current subroutine.
13243 The op tree to unpack the arguments from the stack at runtime is returned.
13244 This op tree should appear at the beginning of the compiled function. The
13245 caller may wish to use L</op_append_list> to build their function body
13246 after it, or splice it together with the body before calling L</newATTRSUB>.
13248 The C<flags> parameter is reserved for future use, and must always
13255 Perl_parse_subsignature(pTHX_ U32 flags)
13258 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_subsignature");
13259 return parse_recdescent_for_op(GRAMSUBSIGNATURE, LEX_FAKEEOF_NONEXPR);
13263 * ex: set ts=8 sts=4 sw=4 et: