3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 =head1 Lexer interface
26 This is the lower layer of the Perl parser, managing characters and tokens.
28 =for apidoc AmnU|yy_parser *|PL_parser
30 Pointer to a structure encapsulating the state of the parsing operation
31 currently in progress. The pointer can be locally changed to perform
32 a nested parse without interfering with the state of an outer parse.
33 Individual members of C<PL_parser> have their own documentation.
39 #define PERL_IN_TOKE_C
41 #include "invlist_inline.h"
43 #define new_constant(a,b,c,d,e,f,g, h) \
44 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h)
46 #define pl_yylval (PL_parser->yylval)
48 /* XXX temporary backwards compatibility */
49 #define PL_lex_brackets (PL_parser->lex_brackets)
50 #define PL_lex_allbrackets (PL_parser->lex_allbrackets)
51 #define PL_lex_fakeeof (PL_parser->lex_fakeeof)
52 #define PL_lex_brackstack (PL_parser->lex_brackstack)
53 #define PL_lex_casemods (PL_parser->lex_casemods)
54 #define PL_lex_casestack (PL_parser->lex_casestack)
55 #define PL_lex_dojoin (PL_parser->lex_dojoin)
56 #define PL_lex_formbrack (PL_parser->lex_formbrack)
57 #define PL_lex_inpat (PL_parser->lex_inpat)
58 #define PL_lex_inwhat (PL_parser->lex_inwhat)
59 #define PL_lex_op (PL_parser->lex_op)
60 #define PL_lex_repl (PL_parser->lex_repl)
61 #define PL_lex_starts (PL_parser->lex_starts)
62 #define PL_lex_stuff (PL_parser->lex_stuff)
63 #define PL_multi_start (PL_parser->multi_start)
64 #define PL_multi_open (PL_parser->multi_open)
65 #define PL_multi_close (PL_parser->multi_close)
66 #define PL_preambled (PL_parser->preambled)
67 #define PL_linestr (PL_parser->linestr)
68 #define PL_expect (PL_parser->expect)
69 #define PL_copline (PL_parser->copline)
70 #define PL_bufptr (PL_parser->bufptr)
71 #define PL_oldbufptr (PL_parser->oldbufptr)
72 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
73 #define PL_linestart (PL_parser->linestart)
74 #define PL_bufend (PL_parser->bufend)
75 #define PL_last_uni (PL_parser->last_uni)
76 #define PL_last_lop (PL_parser->last_lop)
77 #define PL_last_lop_op (PL_parser->last_lop_op)
78 #define PL_lex_state (PL_parser->lex_state)
79 #define PL_rsfp (PL_parser->rsfp)
80 #define PL_rsfp_filters (PL_parser->rsfp_filters)
81 #define PL_in_my (PL_parser->in_my)
82 #define PL_in_my_stash (PL_parser->in_my_stash)
83 #define PL_tokenbuf (PL_parser->tokenbuf)
84 #define PL_multi_end (PL_parser->multi_end)
85 #define PL_error_count (PL_parser->error_count)
87 # define PL_nexttoke (PL_parser->nexttoke)
88 # define PL_nexttype (PL_parser->nexttype)
89 # define PL_nextval (PL_parser->nextval)
92 #define SvEVALED(sv) \
93 (SvTYPE(sv) >= SVt_PVNV \
94 && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen)
96 static const char ident_too_long[] = "Identifier too long";
97 static const char ident_var_zero_multi_digit[] = "Numeric variables with more than one digit may not start with '0'";
99 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
101 #define XENUMMASK 0x3f
102 #define XFAKEEOF 0x40
103 #define XFAKEBRACK 0x80
105 #ifdef USE_UTF8_SCRIPTS
106 # define UTF cBOOL(!IN_BYTES)
108 # define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
111 /* The maximum number of characters preceding the unrecognized one to display */
112 #define UNRECOGNIZED_PRECEDE_COUNT 10
114 /* In variables named $^X, these are the legal values for X.
115 * 1999-02-27 mjd-perl-patch@plover.com */
116 #define isCONTROLVAR(x) (isUPPER(x) || memCHRs("[\\]^_?", (x)))
118 #define SPACE_OR_TAB(c) isBLANK_A(c)
120 #define HEXFP_PEEK(s) \
122 (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
123 isALPHA_FOLD_EQ(s[0], 'p'))
125 /* LEX_* are values for PL_lex_state, the state of the lexer.
126 * They are arranged oddly so that the guard on the switch statement
127 * can get by with a single comparison (if the compiler is smart enough).
129 * These values refer to the various states within a sublex parse,
130 * i.e. within a double quotish string
133 /* #define LEX_NOTPARSING 11 is done in perl.h. */
135 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
136 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
137 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
138 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
139 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
141 /* at end of code, eg "$x" followed by: */
142 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
143 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
145 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
146 string or after \E, $foo, etc */
147 #define LEX_INTERPCONST 2 /* NOT USED */
148 #define LEX_FORMLINE 1 /* expecting a format line */
150 /* returned to yyl_try() to request it to retry the parse loop, expected to only
151 be returned directly by yyl_fake_eof(), but functions that call yyl_fake_eof()
154 yylex (aka Perl_yylex) returns 0 on EOF rather than returning -1,
155 other token values are 258 or higher (see perly.h), so -1 should be
158 #define YYL_RETRY (-1)
161 static const char* const lex_state_names[] = {
176 #include "keywords.h"
178 /* CLINE is a macro that ensures PL_copline has a sane value */
180 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
183 * Convenience functions to return different tokens and prime the
184 * lexer for the next token. They all take an argument.
186 * TOKEN : generic token (used for '(', DOLSHARP, etc)
187 * OPERATOR : generic operator
188 * AOPERATOR : assignment operator
189 * PREBLOCK : beginning the block after an if, while, foreach, ...
190 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
191 * PREREF : *EXPR where EXPR is not a simple identifier
192 * TERM : expression term
193 * POSTDEREF : postfix dereference (->$* ->@[...] etc.)
194 * LOOPX : loop exiting command (goto, last, dump, etc)
195 * FTST : file test operator
196 * FUN0 : zero-argument function
197 * FUN0OP : zero-argument function, with its op created in this file
198 * FUN1 : not used, except for not, which isn't a UNIOP
199 * BOop : bitwise or or xor
201 * BCop : bitwise complement
202 * SHop : shift operator
203 * PWop : power operator
204 * PMop : pattern-matching operator
205 * Aop : addition-level operator
206 * AopNOASSIGN : addition-level operator that is never part of .=
207 * Mop : multiplication-level operator
208 * ChEop : chaining equality-testing operator
209 * NCEop : non-chaining comparison operator at equality precedence
210 * ChRop : chaining relational operator <= != gt
211 * NCRop : non-chaining relational operator isa
213 * Also see LOP and lop() below.
216 #ifdef DEBUGGING /* Serve -DT. */
217 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
219 # define REPORT(retval) (retval)
222 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
223 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
224 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
225 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
226 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
227 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
228 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
229 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
230 #define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \
232 PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
234 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
235 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
236 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
237 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
238 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
239 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
240 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
242 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
243 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
244 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
245 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
246 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
247 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
248 #define ChEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHEQOP))
249 #define NCEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCEQOP))
250 #define ChRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHRELOP))
251 #define NCRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCRELOP))
253 /* This bit of chicanery makes a unary function followed by
254 * a parenthesis into a function with one argument, highest precedence.
255 * The UNIDOR macro is for unary functions that can be followed by the //
256 * operator (such as C<shift // 0>).
258 #define UNI3(f,x,have_x) { \
259 pl_yylval.ival = f; \
260 if (have_x) PL_expect = x; \
262 PL_last_uni = PL_oldbufptr; \
263 PL_last_lop_op = (f) < 0 ? -(f) : (f); \
265 return REPORT( (int)FUNC1 ); \
267 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
269 #define UNI(f) UNI3(f,XTERM,1)
270 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
271 #define UNIPROTO(f,optional) { \
272 if (optional) PL_last_uni = PL_oldbufptr; \
276 #define UNIBRACK(f) UNI3(f,0,0)
278 /* return has special case parsing.
280 * List operators have low precedence. Functions have high precedence.
281 * Every built in, *except return*, if written with () around its arguments, is
282 * parsed as a function. Hence every other list built in:
284 * $ perl -lwe 'sub foo { join 2,4,6 * 1.5 } print for foo()' # join 2,4,9
286 * $ perl -lwe 'sub foo { join(2,4,6) * 1.5 } print for foo()' # 426 * 1.5
288 * $ perl -lwe 'sub foo { join+(2,4,6) * 1.5 } print for foo()'
289 * Useless use of a constant (2) in void context at -e line 1.
290 * Useless use of a constant (4) in void context at -e line 1.
294 * empty line output because C<(2, 4, 6) * 1.5> is the comma operator, not a
295 * list. * forces scalar context, 6 * 1.5 is 9, and join(9) is the empty string.
299 * $ perl -lwe 'sub foo { return 2,4,6 * 1.5 } print for foo()'
303 * $ perl -lwe 'sub foo { return(2,4,6) * 1.5 } print for foo()'
304 * Useless use of a constant (2) in void context at -e line 1.
305 * Useless use of a constant (4) in void context at -e line 1.
307 * $ perl -lwe 'sub foo { return+(2,4,6) * 1.5 } print for foo()'
308 * Useless use of a constant (2) in void context at -e line 1.
309 * Useless use of a constant (4) in void context at -e line 1.
314 * $ perl -lwe 'sub foo { return(2,4,6) } print for foo()'
319 * This last example is what we expect, but it's clearly inconsistent with how
320 * C<return(2,4,6) * 1.5> *ought* to behave, if the rules were consistently
324 * Perl 3 attempted to be consistent:
326 * The rules are more consistent about where parens are needed and
327 * where they are not. In particular, unary operators and list operators now
328 * behave like functions if they're called like functions.
330 * However, the behaviour for return was reverted to the "old" parsing with
335 * did not do what was expected, since return was swallowing the
336 * parens in order to consider itself a function. The solution,
337 * since return never wants any trailing expression such as
338 * return (1,2,3) + 2;
339 * is to simply make return an exception to the paren-makes-a-function
340 * rule, and treat it the way it always was, so that it doesn't
343 * To demonstrate the special-case parsing, replace OLDLOP(OP_RETURN); with
344 * LOP(OP_RETURN, XTERM);
346 * and constructs such as
348 * return (Internals::V())[2]
350 * turn into syntax errors
355 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
356 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
357 pl_yylval.ival = (f); \
363 #define COPLINE_INC_WITH_HERELINES \
365 CopLINE_inc(PL_curcop); \
366 if (PL_parser->herelines) \
367 CopLINE(PL_curcop) += PL_parser->herelines, \
368 PL_parser->herelines = 0; \
370 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
371 * is no sublex_push to follow. */
372 #define COPLINE_SET_FROM_MULTI_END \
374 CopLINE_set(PL_curcop, PL_multi_end); \
375 if (PL_multi_end != PL_multi_start) \
376 PL_parser->herelines = 0; \
380 /* A file-local structure for passing around information about subroutines and
381 * related definable words */
391 static const struct code no_code = { NULL, NULL, NULL, NULL, NULL, 0, FALSE };
395 /* how to interpret the pl_yylval associated with the token */
399 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
404 #define DEBUG_TOKEN(Type, Name) \
405 { Name, TOKENTYPE_##Type, #Name }
407 static struct debug_tokens {
409 enum token_type type;
411 } const debug_tokens[] =
413 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
414 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
415 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
416 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
417 { ANON_SIGSUB, TOKENTYPE_IVAL, "ANON_SIGSUB" },
418 { ARROW, TOKENTYPE_NONE, "ARROW" },
419 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
420 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
421 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
422 { CATCH, TOKENTYPE_IVAL, "CATCH" },
423 { CHEQOP, TOKENTYPE_OPNUM, "CHEQOP" },
424 { CHRELOP, TOKENTYPE_OPNUM, "CHRELOP" },
425 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
426 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
427 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
428 { DO, TOKENTYPE_NONE, "DO" },
429 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
430 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
431 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
432 { ELSE, TOKENTYPE_NONE, "ELSE" },
433 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
434 { FOR, TOKENTYPE_IVAL, "FOR" },
435 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
436 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
437 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
438 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
439 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
440 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
441 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
442 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
443 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
444 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
445 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
446 { IF, TOKENTYPE_IVAL, "IF" },
447 { LABEL, TOKENTYPE_OPVAL, "LABEL" },
448 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
449 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
450 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
451 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
452 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
453 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
454 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
455 { MY, TOKENTYPE_IVAL, "MY" },
456 { NCEQOP, TOKENTYPE_OPNUM, "NCEQOP" },
457 { NCRELOP, TOKENTYPE_OPNUM, "NCRELOP" },
458 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
459 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
460 { OROP, TOKENTYPE_IVAL, "OROP" },
461 { OROR, TOKENTYPE_NONE, "OROR" },
462 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
463 DEBUG_TOKEN (IVAL, PERLY_AMPERSAND),
464 DEBUG_TOKEN (IVAL, PERLY_BRACE_CLOSE),
465 DEBUG_TOKEN (IVAL, PERLY_BRACE_OPEN),
466 DEBUG_TOKEN (IVAL, PERLY_BRACKET_CLOSE),
467 DEBUG_TOKEN (IVAL, PERLY_BRACKET_OPEN),
468 DEBUG_TOKEN (IVAL, PERLY_COLON),
469 DEBUG_TOKEN (IVAL, PERLY_COMMA),
470 DEBUG_TOKEN (IVAL, PERLY_DOT),
471 DEBUG_TOKEN (IVAL, PERLY_EQUAL_SIGN),
472 DEBUG_TOKEN (IVAL, PERLY_EXCLAMATION_MARK),
473 DEBUG_TOKEN (IVAL, PERLY_MINUS),
474 DEBUG_TOKEN (IVAL, PERLY_PAREN_OPEN),
475 DEBUG_TOKEN (IVAL, PERLY_PERCENT_SIGN),
476 DEBUG_TOKEN (IVAL, PERLY_PLUS),
477 DEBUG_TOKEN (IVAL, PERLY_QUESTION_MARK),
478 DEBUG_TOKEN (IVAL, PERLY_SEMICOLON),
479 DEBUG_TOKEN (IVAL, PERLY_SLASH),
480 DEBUG_TOKEN (IVAL, PERLY_SNAIL),
481 DEBUG_TOKEN (IVAL, PERLY_STAR),
482 DEBUG_TOKEN (IVAL, PERLY_TILDE),
483 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
484 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
485 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
486 { POSTJOIN, TOKENTYPE_NONE, "POSTJOIN" },
487 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
488 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
489 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
490 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
491 { PREINC, TOKENTYPE_NONE, "PREINC" },
492 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
493 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
494 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
495 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
496 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
497 { SIGSUB, TOKENTYPE_NONE, "SIGSUB" },
498 { SUB, TOKENTYPE_NONE, "SUB" },
499 { SUBLEXEND, TOKENTYPE_NONE, "SUBLEXEND" },
500 { SUBLEXSTART, TOKENTYPE_NONE, "SUBLEXSTART" },
501 { THING, TOKENTYPE_OPVAL, "THING" },
502 { TRY, TOKENTYPE_IVAL, "TRY" },
503 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
504 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
505 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
506 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
507 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
508 { USE, TOKENTYPE_IVAL, "USE" },
509 { WHEN, TOKENTYPE_IVAL, "WHEN" },
510 { WHILE, TOKENTYPE_IVAL, "WHILE" },
511 { BAREWORD, TOKENTYPE_OPVAL, "BAREWORD" },
512 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
513 { 0, TOKENTYPE_NONE, NULL }
518 /* dump the returned token in rv, plus any optional arg in pl_yylval */
521 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
523 PERL_ARGS_ASSERT_TOKEREPORT;
526 const char *name = NULL;
527 enum token_type type = TOKENTYPE_NONE;
528 const struct debug_tokens *p;
529 SV* const report = newSVpvs("<== ");
531 for (p = debug_tokens; p->token; p++) {
532 if (p->token == (int)rv) {
539 Perl_sv_catpv(aTHX_ report, name);
540 else if (isGRAPH(rv))
542 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
544 sv_catpvs(report, " (pending identifier)");
547 sv_catpvs(report, "EOF");
549 Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
554 Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
556 case TOKENTYPE_OPNUM:
557 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
558 PL_op_name[lvalp->ival]);
561 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
563 case TOKENTYPE_OPVAL:
565 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
566 PL_op_name[lvalp->opval->op_type]);
567 if (lvalp->opval->op_type == OP_CONST) {
568 Perl_sv_catpvf(aTHX_ report, " %s",
569 SvPEEK(cSVOPx_sv(lvalp->opval)));
574 sv_catpvs(report, "(opval=null)");
577 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
583 /* print the buffer with suitable escapes */
586 S_printbuf(pTHX_ const char *const fmt, const char *const s)
588 SV* const tmp = newSVpvs("");
590 PERL_ARGS_ASSERT_PRINTBUF;
592 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */
593 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
594 GCC_DIAG_RESTORE_STMT;
603 * This subroutine looks for an '=' next to the operator that has just been
604 * parsed and turns it into an ASSIGNOP if it finds one.
608 S_ao(pTHX_ int toketype)
610 if (*PL_bufptr == '=') {
614 case ANDAND: pl_yylval.ival = OP_ANDASSIGN; break;
615 case OROR: pl_yylval.ival = OP_ORASSIGN; break;
616 case DORDOR: pl_yylval.ival = OP_DORASSIGN; break;
621 return REPORT(toketype);
626 * When Perl expects an operator and finds something else, no_op
627 * prints the warning. It always prints "<something> found where
628 * operator expected. It prints "Missing semicolon on previous line?"
629 * if the surprise occurs at the start of the line. "do you need to
630 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
631 * where the compiler doesn't know if foo is a method call or a function.
632 * It prints "Missing operator before end of line" if there's nothing
633 * after the missing operator, or "... before <...>" if there is something
634 * after the missing operator.
636 * PL_bufptr is expected to point to the start of the thing that was found,
637 * and s after the next token or partial token.
641 S_no_op(pTHX_ const char *const what, char *s)
643 char * const oldbp = PL_bufptr;
644 const bool is_first = (PL_oldbufptr == PL_linestart);
646 PERL_ARGS_ASSERT_NO_OP;
652 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
653 if (ckWARN_d(WARN_SYNTAX)) {
655 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
656 "\t(Missing semicolon on previous line?)\n");
657 else if (PL_oldoldbufptr && isIDFIRST_lazy_if_safe(PL_oldoldbufptr,
662 for (t = PL_oldoldbufptr;
663 (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
664 t += UTF ? UTF8SKIP(t) : 1)
668 if (t < PL_bufptr && isSPACE(*t))
669 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
670 "\t(Do you need to predeclare %" UTF8f "?)\n",
671 UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
675 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
676 "\t(Missing operator before %" UTF8f "?)\n",
677 UTF8fARG(UTF, s - oldbp, oldbp));
685 * Complain about missing quote/regexp/heredoc terminator.
686 * If it's called with NULL then it cauterizes the line buffer.
687 * If we're in a delimited string and the delimiter is a control
688 * character, it's reformatted into a two-char sequence like ^C.
693 S_missingterm(pTHX_ char *s, STRLEN len)
695 char tmpbuf[UTF8_MAXBYTES + 1];
700 char * const nl = (char *) my_memrchr(s, '\n', len);
707 else if (PL_multi_close < 32) {
709 tmpbuf[1] = (char)toCTRL(PL_multi_close);
715 if (LIKELY(PL_multi_close < 256)) {
716 *tmpbuf = (char)PL_multi_close;
721 char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close);
728 q = memchr(s, '"', len) ? '\'' : '"';
729 sv = newSVpvn_flags(s, len, SVs_TEMP);
732 Perl_croak(aTHX_ "Can't find string terminator %c%" SVf "%c"
733 " anywhere before EOF", q, SVfARG(sv), q);
739 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
740 * utf16-to-utf8-reversed.
743 #ifdef PERL_CR_FILTER
747 const char *s = SvPVX_const(sv);
748 const char * const e = s + SvCUR(sv);
750 PERL_ARGS_ASSERT_STRIP_RETURN;
752 /* outer loop optimized to do nothing if there are no CR-LFs */
754 if (*s++ == '\r' && *s == '\n') {
755 /* hit a CR-LF, need to copy the rest */
759 if (*s == '\r' && s[1] == '\n')
770 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
772 const I32 count = FILTER_READ(idx+1, sv, maxlen);
773 if (count > 0 && !maxlen)
780 =for apidoc lex_start
782 Creates and initialises a new lexer/parser state object, supplying
783 a context in which to lex and parse from a new source of Perl code.
784 A pointer to the new state object is placed in L</PL_parser>. An entry
785 is made on the save stack so that upon unwinding, the new state object
786 will be destroyed and the former value of L</PL_parser> will be restored.
787 Nothing else need be done to clean up the parsing context.
789 The code to be parsed comes from C<line> and C<rsfp>. C<line>, if
790 non-null, provides a string (in SV form) containing code to be parsed.
791 A copy of the string is made, so subsequent modification of C<line>
792 does not affect parsing. C<rsfp>, if non-null, provides an input stream
793 from which code will be read to be parsed. If both are non-null, the
794 code in C<line> comes first and must consist of complete lines of input,
795 and C<rsfp> supplies the remainder of the source.
797 The C<flags> parameter is reserved for future use. Currently it is only
798 used by perl internally, so extensions should always pass zero.
803 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
804 can share filters with the current parser.
805 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
806 caller, hence isn't owned by the parser, so shouldn't be closed on parser
807 destruction. This is used to handle the case of defaulting to reading the
808 script from the standard input because no filename was given on the command
809 line (without getting confused by situation where STDIN has been closed, so
810 the script handle is opened on fd 0) */
813 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
815 const char *s = NULL;
816 yy_parser *parser, *oparser;
818 if (flags && flags & ~LEX_START_FLAGS)
819 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
821 /* create and initialise a parser */
823 Newxz(parser, 1, yy_parser);
824 parser->old_parser = oparser = PL_parser;
827 parser->stack = NULL;
828 parser->stack_max1 = NULL;
831 /* on scope exit, free this parser and restore any outer one */
833 parser->saved_curcop = PL_curcop;
835 /* initialise lexer state */
837 parser->nexttoke = 0;
838 parser->error_count = oparser ? oparser->error_count : 0;
839 parser->copline = parser->preambling = NOLINE;
840 parser->lex_state = LEX_NORMAL;
841 parser->expect = XSTATE;
843 parser->recheck_utf8_validity = TRUE;
844 parser->rsfp_filters =
845 !(flags & LEX_START_SAME_FILTER) || !oparser
847 : MUTABLE_AV(SvREFCNT_inc(
848 oparser->rsfp_filters
849 ? oparser->rsfp_filters
850 : (oparser->rsfp_filters = newAV())
853 Newx(parser->lex_brackstack, 120, char);
854 Newx(parser->lex_casestack, 12, char);
855 *parser->lex_casestack = '\0';
856 Newxz(parser->lex_shared, 1, LEXSHARED);
860 const U8* first_bad_char_loc;
862 s = SvPV_const(line, len);
865 && UNLIKELY(! is_utf8_string_loc((U8 *) s,
867 &first_bad_char_loc)))
869 _force_out_malformed_utf8_message(first_bad_char_loc,
870 (U8 *) s + SvCUR(line),
872 1 /* 1 means die */ );
873 NOT_REACHED; /* NOTREACHED */
876 parser->linestr = flags & LEX_START_COPIED
877 ? SvREFCNT_inc_simple_NN(line)
878 : newSVpvn_flags(s, len, SvUTF8(line));
880 sv_catpvs(parser->linestr, "\n;");
882 parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
885 parser->oldoldbufptr =
888 parser->linestart = SvPVX(parser->linestr);
889 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
890 parser->last_lop = parser->last_uni = NULL;
892 STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
893 |LEX_DONT_CLOSE_RSFP));
894 parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
895 |LEX_DONT_CLOSE_RSFP));
897 parser->in_pod = parser->filtered = 0;
901 /* delete a parser object */
904 Perl_parser_free(pTHX_ const yy_parser *parser)
906 PERL_ARGS_ASSERT_PARSER_FREE;
908 PL_curcop = parser->saved_curcop;
909 SvREFCNT_dec(parser->linestr);
911 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
912 PerlIO_clearerr(parser->rsfp);
913 else if (parser->rsfp && (!parser->old_parser
914 || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
915 PerlIO_close(parser->rsfp);
916 SvREFCNT_dec(parser->rsfp_filters);
917 SvREFCNT_dec(parser->lex_stuff);
918 SvREFCNT_dec(parser->lex_sub_repl);
920 Safefree(parser->lex_brackstack);
921 Safefree(parser->lex_casestack);
922 Safefree(parser->lex_shared);
923 PL_parser = parser->old_parser;
928 Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab)
930 I32 nexttoke = parser->nexttoke;
931 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
933 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
934 && parser->nextval[nexttoke].opval
935 && parser->nextval[nexttoke].opval->op_slabbed
936 && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
937 op_free(parser->nextval[nexttoke].opval);
938 parser->nextval[nexttoke].opval = NULL;
945 =for apidoc AmnxUN|SV *|PL_parser-E<gt>linestr
947 Buffer scalar containing the chunk currently under consideration of the
948 text currently being lexed. This is always a plain string scalar (for
949 which C<SvPOK> is true). It is not intended to be used as a scalar by
950 normal scalar means; instead refer to the buffer directly by the pointer
951 variables described below.
953 The lexer maintains various C<char*> pointers to things in the
954 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
955 reallocated, all of these pointers must be updated. Don't attempt to
956 do this manually, but rather use L</lex_grow_linestr> if you need to
957 reallocate the buffer.
959 The content of the text chunk in the buffer is commonly exactly one
960 complete line of input, up to and including a newline terminator,
961 but there are situations where it is otherwise. The octets of the
962 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
963 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
964 flag on this scalar, which may disagree with it.
966 For direct examination of the buffer, the variable
967 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
968 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
969 of these pointers is usually preferable to examination of the scalar
970 through normal scalar means.
972 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufend
974 Direct pointer to the end of the chunk of text currently being lexed, the
975 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
976 + SvCUR(PL_parser-E<gt>linestr)>. A C<NUL> character (zero octet) is
977 always located at the end of the buffer, and does not count as part of
978 the buffer's contents.
980 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufptr
982 Points to the current position of lexing inside the lexer buffer.
983 Characters around this point may be freely examined, within
984 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
985 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
986 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
988 Lexing code (whether in the Perl core or not) moves this pointer past
989 the characters that it consumes. It is also expected to perform some
990 bookkeeping whenever a newline character is consumed. This movement
991 can be more conveniently performed by the function L</lex_read_to>,
992 which handles newlines appropriately.
994 Interpretation of the buffer's octets can be abstracted out by
995 using the slightly higher-level functions L</lex_peek_unichar> and
996 L</lex_read_unichar>.
998 =for apidoc AmnxUN|char *|PL_parser-E<gt>linestart
1000 Points to the start of the current line inside the lexer buffer.
1001 This is useful for indicating at which column an error occurred, and
1002 not much else. This must be updated by any lexing code that consumes
1003 a newline; the function L</lex_read_to> handles this detail.
1009 =for apidoc lex_bufutf8
1011 Indicates whether the octets in the lexer buffer
1012 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
1013 of Unicode characters. If not, they should be interpreted as Latin-1
1014 characters. This is analogous to the C<SvUTF8> flag for scalars.
1016 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
1017 contains valid UTF-8. Lexing code must be robust in the face of invalid
1020 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
1021 is significant, but not the whole story regarding the input character
1022 encoding. Normally, when a file is being read, the scalar contains octets
1023 and its C<SvUTF8> flag is off, but the octets should be interpreted as
1024 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
1025 however, the scalar may have the C<SvUTF8> flag on, and in this case its
1026 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
1027 is in effect. This logic may change in the future; use this function
1028 instead of implementing the logic yourself.
1034 Perl_lex_bufutf8(pTHX)
1040 =for apidoc lex_grow_linestr
1042 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
1043 at least C<len> octets (including terminating C<NUL>). Returns a
1044 pointer to the reallocated buffer. This is necessary before making
1045 any direct modification of the buffer that would increase its length.
1046 L</lex_stuff_pvn> provides a more convenient way to insert text into
1049 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
1050 this function updates all of the lexer's variables that point directly
1057 Perl_lex_grow_linestr(pTHX_ STRLEN len)
1061 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1062 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
1065 linestr = PL_parser->linestr;
1066 buf = SvPVX(linestr);
1067 if (len <= SvLEN(linestr))
1070 /* Is the lex_shared linestr SV the same as the current linestr SV?
1071 * Only in this case does re_eval_start need adjusting, since it
1072 * points within lex_shared->ls_linestr's buffer */
1073 current = ( !PL_parser->lex_shared->ls_linestr
1074 || linestr == PL_parser->lex_shared->ls_linestr);
1076 bufend_pos = PL_parser->bufend - buf;
1077 bufptr_pos = PL_parser->bufptr - buf;
1078 oldbufptr_pos = PL_parser->oldbufptr - buf;
1079 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1080 linestart_pos = PL_parser->linestart - buf;
1081 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1082 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1083 re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ?
1084 PL_parser->lex_shared->re_eval_start - buf : 0;
1086 buf = sv_grow(linestr, len);
1088 PL_parser->bufend = buf + bufend_pos;
1089 PL_parser->bufptr = buf + bufptr_pos;
1090 PL_parser->oldbufptr = buf + oldbufptr_pos;
1091 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1092 PL_parser->linestart = buf + linestart_pos;
1093 if (PL_parser->last_uni)
1094 PL_parser->last_uni = buf + last_uni_pos;
1095 if (PL_parser->last_lop)
1096 PL_parser->last_lop = buf + last_lop_pos;
1097 if (current && PL_parser->lex_shared->re_eval_start)
1098 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
1103 =for apidoc lex_stuff_pvn
1105 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1106 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1107 reallocating the buffer if necessary. This means that lexing code that
1108 runs later will see the characters as if they had appeared in the input.
1109 It is not recommended to do this as part of normal parsing, and most
1110 uses of this facility run the risk of the inserted characters being
1111 interpreted in an unintended manner.
1113 The string to be inserted is represented by C<len> octets starting
1114 at C<pv>. These octets are interpreted as either UTF-8 or Latin-1,
1115 according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
1116 The characters are recoded for the lexer buffer, according to how the
1117 buffer is currently being interpreted (L</lex_bufutf8>). If a string
1118 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1119 function is more convenient.
1121 =for apidoc Amnh||LEX_STUFF_UTF8
1127 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1130 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1131 if (flags & ~(LEX_STUFF_UTF8))
1132 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1134 if (flags & LEX_STUFF_UTF8) {
1137 STRLEN highhalf = variant_under_utf8_count((U8 *) pv,
1139 const char *p, *e = pv+len;;
1142 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1143 bufptr = PL_parser->bufptr;
1144 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1145 SvCUR_set(PL_parser->linestr,
1146 SvCUR(PL_parser->linestr) + len+highhalf);
1147 PL_parser->bufend += len+highhalf;
1148 for (p = pv; p != e; p++) {
1149 append_utf8_from_native_byte(*p, (U8 **) &bufptr);
1153 if (flags & LEX_STUFF_UTF8) {
1154 STRLEN highhalf = 0;
1155 const char *p, *e = pv+len;
1156 for (p = pv; p != e; p++) {
1158 if (UTF8_IS_ABOVE_LATIN1(c)) {
1159 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1160 "non-Latin-1 character into Latin-1 input");
1161 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1164 } else assert(UTF8_IS_INVARIANT(c));
1168 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1169 bufptr = PL_parser->bufptr;
1170 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1171 SvCUR_set(PL_parser->linestr,
1172 SvCUR(PL_parser->linestr) + len-highhalf);
1173 PL_parser->bufend += len-highhalf;
1176 if (UTF8_IS_INVARIANT(*p)) {
1182 *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
1188 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1189 bufptr = PL_parser->bufptr;
1190 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1191 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1192 PL_parser->bufend += len;
1193 Copy(pv, bufptr, len, char);
1199 =for apidoc lex_stuff_pv
1201 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1202 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1203 reallocating the buffer if necessary. This means that lexing code that
1204 runs later will see the characters as if they had appeared in the input.
1205 It is not recommended to do this as part of normal parsing, and most
1206 uses of this facility run the risk of the inserted characters being
1207 interpreted in an unintended manner.
1209 The string to be inserted is represented by octets starting at C<pv>
1210 and continuing to the first nul. These octets are interpreted as either
1211 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1212 in C<flags>. The characters are recoded for the lexer buffer, according
1213 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1214 If it is not convenient to nul-terminate a string to be inserted, the
1215 L</lex_stuff_pvn> function is more appropriate.
1221 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1223 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1224 lex_stuff_pvn(pv, strlen(pv), flags);
1228 =for apidoc lex_stuff_sv
1230 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1231 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1232 reallocating the buffer if necessary. This means that lexing code that
1233 runs later will see the characters as if they had appeared in the input.
1234 It is not recommended to do this as part of normal parsing, and most
1235 uses of this facility run the risk of the inserted characters being
1236 interpreted in an unintended manner.
1238 The string to be inserted is the string value of C<sv>. The characters
1239 are recoded for the lexer buffer, according to how the buffer is currently
1240 being interpreted (L</lex_bufutf8>). If a string to be inserted is
1241 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1242 need to construct a scalar.
1248 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1252 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1254 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1256 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1260 =for apidoc lex_unstuff
1262 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1263 C<ptr>. Text following C<ptr> will be moved, and the buffer shortened.
1264 This hides the discarded text from any lexing code that runs later,
1265 as if the text had never appeared.
1267 This is not the normal way to consume lexed text. For that, use
1274 Perl_lex_unstuff(pTHX_ char *ptr)
1278 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1279 buf = PL_parser->bufptr;
1281 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1284 bufend = PL_parser->bufend;
1286 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1287 unstuff_len = ptr - buf;
1288 Move(ptr, buf, bufend+1-ptr, char);
1289 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1290 PL_parser->bufend = bufend - unstuff_len;
1294 =for apidoc lex_read_to
1296 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1297 to C<ptr>. This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
1298 performing the correct bookkeeping whenever a newline character is passed.
1299 This is the normal way to consume lexed text.
1301 Interpretation of the buffer's octets can be abstracted out by
1302 using the slightly higher-level functions L</lex_peek_unichar> and
1303 L</lex_read_unichar>.
1309 Perl_lex_read_to(pTHX_ char *ptr)
1312 PERL_ARGS_ASSERT_LEX_READ_TO;
1313 s = PL_parser->bufptr;
1314 if (ptr < s || ptr > PL_parser->bufend)
1315 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1316 for (; s != ptr; s++)
1318 COPLINE_INC_WITH_HERELINES;
1319 PL_parser->linestart = s+1;
1321 PL_parser->bufptr = ptr;
1325 =for apidoc lex_discard_to
1327 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1328 up to C<ptr>. The remaining content of the buffer will be moved, and
1329 all pointers into the buffer updated appropriately. C<ptr> must not
1330 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1331 it is not permitted to discard text that has yet to be lexed.
1333 Normally it is not necessarily to do this directly, because it suffices to
1334 use the implicit discarding behaviour of L</lex_next_chunk> and things
1335 based on it. However, if a token stretches across multiple lines,
1336 and the lexing code has kept multiple lines of text in the buffer for
1337 that purpose, then after completion of the token it would be wise to
1338 explicitly discard the now-unneeded earlier lines, to avoid future
1339 multi-line tokens growing the buffer without bound.
1345 Perl_lex_discard_to(pTHX_ char *ptr)
1349 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1350 buf = SvPVX(PL_parser->linestr);
1352 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1355 if (ptr > PL_parser->bufptr)
1356 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1357 discard_len = ptr - buf;
1358 if (PL_parser->oldbufptr < ptr)
1359 PL_parser->oldbufptr = ptr;
1360 if (PL_parser->oldoldbufptr < ptr)
1361 PL_parser->oldoldbufptr = ptr;
1362 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1363 PL_parser->last_uni = NULL;
1364 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1365 PL_parser->last_lop = NULL;
1366 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1367 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1368 PL_parser->bufend -= discard_len;
1369 PL_parser->bufptr -= discard_len;
1370 PL_parser->oldbufptr -= discard_len;
1371 PL_parser->oldoldbufptr -= discard_len;
1372 if (PL_parser->last_uni)
1373 PL_parser->last_uni -= discard_len;
1374 if (PL_parser->last_lop)
1375 PL_parser->last_lop -= discard_len;
1379 Perl_notify_parser_that_changed_to_utf8(pTHX)
1381 /* Called when $^H is changed to indicate that HINT_UTF8 has changed from
1382 * off to on. At compile time, this has the effect of entering a 'use
1383 * utf8' section. This means that any input was not previously checked for
1384 * UTF-8 (because it was off), but now we do need to check it, or our
1385 * assumptions about the input being sane could be wrong, and we could
1386 * segfault. This routine just sets a flag so that the next time we look
1387 * at the input we do the well-formed UTF-8 check. If we aren't in the
1388 * proper phase, there may not be a parser object, but if there is, setting
1389 * the flag is harmless */
1392 PL_parser->recheck_utf8_validity = TRUE;
1397 =for apidoc lex_next_chunk
1399 Reads in the next chunk of text to be lexed, appending it to
1400 L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1401 looked to the end of the current chunk and wants to know more. It is
1402 usual, but not necessary, for lexing to have consumed the entirety of
1403 the current chunk at this time.
1405 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1406 chunk (i.e., the current chunk has been entirely consumed), normally the
1407 current chunk will be discarded at the same time that the new chunk is
1408 read in. If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
1409 will not be discarded. If the current chunk has not been entirely
1410 consumed, then it will not be discarded regardless of the flag.
1412 Returns true if some new text was added to the buffer, or false if the
1413 buffer has reached the end of the input text.
1415 =for apidoc Amnh||LEX_KEEP_PREVIOUS
1420 #define LEX_FAKE_EOF 0x80000000
1421 #define LEX_NO_TERM 0x40000000 /* here-doc */
1424 Perl_lex_next_chunk(pTHX_ U32 flags)
1428 STRLEN old_bufend_pos, new_bufend_pos;
1429 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1430 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1431 bool got_some_for_debugger = 0;
1434 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1435 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1436 if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1438 linestr = PL_parser->linestr;
1439 buf = SvPVX(linestr);
1440 if (!(flags & LEX_KEEP_PREVIOUS)
1441 && PL_parser->bufptr == PL_parser->bufend)
1443 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1445 if (PL_parser->last_uni != PL_parser->bufend)
1446 PL_parser->last_uni = NULL;
1447 if (PL_parser->last_lop != PL_parser->bufend)
1448 PL_parser->last_lop = NULL;
1449 last_uni_pos = last_lop_pos = 0;
1451 SvCUR_set(linestr, 0);
1453 old_bufend_pos = PL_parser->bufend - buf;
1454 bufptr_pos = PL_parser->bufptr - buf;
1455 oldbufptr_pos = PL_parser->oldbufptr - buf;
1456 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1457 linestart_pos = PL_parser->linestart - buf;
1458 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1459 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1461 if (flags & LEX_FAKE_EOF) {
1463 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1465 } else if (filter_gets(linestr, old_bufend_pos)) {
1467 got_some_for_debugger = 1;
1468 } else if (flags & LEX_NO_TERM) {
1471 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1474 /* End of real input. Close filehandle (unless it was STDIN),
1475 * then add implicit termination.
1477 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1478 PerlIO_clearerr(PL_parser->rsfp);
1479 else if (PL_parser->rsfp)
1480 (void)PerlIO_close(PL_parser->rsfp);
1481 PL_parser->rsfp = NULL;
1482 PL_parser->in_pod = PL_parser->filtered = 0;
1483 if (!PL_in_eval && PL_minus_p) {
1485 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1486 PL_minus_n = PL_minus_p = 0;
1487 } else if (!PL_in_eval && PL_minus_n) {
1488 sv_catpvs(linestr, /*{*/";}");
1491 sv_catpvs(linestr, ";");
1494 buf = SvPVX(linestr);
1495 new_bufend_pos = SvCUR(linestr);
1496 PL_parser->bufend = buf + new_bufend_pos;
1497 PL_parser->bufptr = buf + bufptr_pos;
1500 const U8* first_bad_char_loc;
1501 if (UNLIKELY(! is_utf8_string_loc(
1502 (U8 *) PL_parser->bufptr,
1503 PL_parser->bufend - PL_parser->bufptr,
1504 &first_bad_char_loc)))
1506 _force_out_malformed_utf8_message(first_bad_char_loc,
1507 (U8 *) PL_parser->bufend,
1509 1 /* 1 means die */ );
1510 NOT_REACHED; /* NOTREACHED */
1514 PL_parser->oldbufptr = buf + oldbufptr_pos;
1515 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1516 PL_parser->linestart = buf + linestart_pos;
1517 if (PL_parser->last_uni)
1518 PL_parser->last_uni = buf + last_uni_pos;
1519 if (PL_parser->last_lop)
1520 PL_parser->last_lop = buf + last_lop_pos;
1521 if (PL_parser->preambling != NOLINE) {
1522 CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1523 PL_parser->preambling = NOLINE;
1525 if ( got_some_for_debugger
1526 && PERLDB_LINE_OR_SAVESRC
1527 && PL_curstash != PL_debstash)
1529 /* debugger active and we're not compiling the debugger code,
1530 * so store the line into the debugger's array of lines
1532 update_debugger_info(NULL, buf+old_bufend_pos,
1533 new_bufend_pos-old_bufend_pos);
1539 =for apidoc lex_peek_unichar
1541 Looks ahead one (Unicode) character in the text currently being lexed.
1542 Returns the codepoint (unsigned integer value) of the next character,
1543 or -1 if lexing has reached the end of the input text. To consume the
1544 peeked character, use L</lex_read_unichar>.
1546 If the next character is in (or extends into) the next chunk of input
1547 text, the next chunk will be read in. Normally the current chunk will be
1548 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1549 bit set, then the current chunk will not be discarded.
1551 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1552 is encountered, an exception is generated.
1558 Perl_lex_peek_unichar(pTHX_ U32 flags)
1561 if (flags & ~(LEX_KEEP_PREVIOUS))
1562 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1563 s = PL_parser->bufptr;
1564 bufend = PL_parser->bufend;
1570 if (!lex_next_chunk(flags))
1572 s = PL_parser->bufptr;
1573 bufend = PL_parser->bufend;
1576 if (UTF8_IS_INVARIANT(head))
1578 if (UTF8_IS_START(head)) {
1579 len = UTF8SKIP(&head);
1580 while ((STRLEN)(bufend-s) < len) {
1581 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1583 s = PL_parser->bufptr;
1584 bufend = PL_parser->bufend;
1587 unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1588 if (retlen == (STRLEN)-1) {
1589 _force_out_malformed_utf8_message((U8 *) s,
1592 1 /* 1 means die */ );
1593 NOT_REACHED; /* NOTREACHED */
1598 if (!lex_next_chunk(flags))
1600 s = PL_parser->bufptr;
1607 =for apidoc lex_read_unichar
1609 Reads the next (Unicode) character in the text currently being lexed.
1610 Returns the codepoint (unsigned integer value) of the character read,
1611 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1612 if lexing has reached the end of the input text. To non-destructively
1613 examine the next character, use L</lex_peek_unichar> instead.
1615 If the next character is in (or extends into) the next chunk of input
1616 text, the next chunk will be read in. Normally the current chunk will be
1617 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1618 bit set, then the current chunk will not be discarded.
1620 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1621 is encountered, an exception is generated.
1627 Perl_lex_read_unichar(pTHX_ U32 flags)
1630 if (flags & ~(LEX_KEEP_PREVIOUS))
1631 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1632 c = lex_peek_unichar(flags);
1635 COPLINE_INC_WITH_HERELINES;
1637 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1639 ++(PL_parser->bufptr);
1645 =for apidoc lex_read_space
1647 Reads optional spaces, in Perl style, in the text currently being
1648 lexed. The spaces may include ordinary whitespace characters and
1649 Perl-style comments. C<#line> directives are processed if encountered.
1650 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1651 at a non-space character (or the end of the input text).
1653 If spaces extend into the next chunk of input text, the next chunk will
1654 be read in. Normally the current chunk will be discarded at the same
1655 time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
1656 chunk will not be discarded.
1661 #define LEX_NO_INCLINE 0x40000000
1662 #define LEX_NO_NEXT_CHUNK 0x80000000
1665 Perl_lex_read_space(pTHX_ U32 flags)
1668 const bool can_incline = !(flags & LEX_NO_INCLINE);
1669 bool need_incline = 0;
1670 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1671 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1672 s = PL_parser->bufptr;
1673 bufend = PL_parser->bufend;
1679 } while (!(c == '\n' || (c == 0 && s == bufend)));
1680 } else if (c == '\n') {
1683 PL_parser->linestart = s;
1689 } else if (isSPACE(c)) {
1691 } else if (c == 0 && s == bufend) {
1694 if (flags & LEX_NO_NEXT_CHUNK)
1696 PL_parser->bufptr = s;
1697 l = CopLINE(PL_curcop);
1698 CopLINE(PL_curcop) += PL_parser->herelines + 1;
1699 got_more = lex_next_chunk(flags);
1700 CopLINE_set(PL_curcop, l);
1701 s = PL_parser->bufptr;
1702 bufend = PL_parser->bufend;
1705 if (can_incline && need_incline && PL_parser->rsfp) {
1715 PL_parser->bufptr = s;
1720 =for apidoc validate_proto
1722 This function performs syntax checking on a prototype, C<proto>.
1723 If C<warn> is true, any illegal characters or mismatched brackets
1724 will trigger illegalproto warnings, declaring that they were
1725 detected in the prototype for C<name>.
1727 The return value is C<true> if this is a valid prototype, and
1728 C<false> if it is not, regardless of whether C<warn> was C<true> or
1731 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1738 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash)
1740 STRLEN len, origlen;
1742 bool bad_proto = FALSE;
1743 bool in_brackets = FALSE;
1744 bool after_slash = FALSE;
1745 char greedy_proto = ' ';
1746 bool proto_after_greedy_proto = FALSE;
1747 bool must_be_last = FALSE;
1748 bool underscore = FALSE;
1749 bool bad_proto_after_underscore = FALSE;
1751 PERL_ARGS_ASSERT_VALIDATE_PROTO;
1756 p = SvPV(proto, len);
1758 for (; len--; p++) {
1761 proto_after_greedy_proto = TRUE;
1763 if (!memCHRs(";@%", *p))
1764 bad_proto_after_underscore = TRUE;
1767 if (!memCHRs("$@%*;[]&\\_+", *p) || *p == '\0') {
1774 in_brackets = FALSE;
1775 else if ((*p == '@' || *p == '%')
1779 must_be_last = TRUE;
1788 after_slash = FALSE;
1793 SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1796 ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1797 origlen, UNI_DISPLAY_ISPRINT)
1798 : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1800 if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) {
1801 SV *name2 = sv_2mortal(newSVsv(PL_curstname));
1802 sv_catpvs(name2, "::");
1803 sv_catsv(name2, (SV *)name);
1807 if (proto_after_greedy_proto)
1808 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1809 "Prototype after '%c' for %" SVf " : %s",
1810 greedy_proto, SVfARG(name), p);
1812 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1813 "Missing ']' in prototype for %" SVf " : %s",
1816 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1817 "Illegal character in prototype for %" SVf " : %s",
1819 if (bad_proto_after_underscore)
1820 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1821 "Illegal character after '_' in prototype for %" SVf " : %s",
1825 return (! (proto_after_greedy_proto || bad_proto) );
1830 * This subroutine has nothing to do with tilting, whether at windmills
1831 * or pinball tables. Its name is short for "increment line". It
1832 * increments the current line number in CopLINE(PL_curcop) and checks
1833 * to see whether the line starts with a comment of the form
1834 * # line 500 "foo.pm"
1835 * If so, it sets the current line number and file to the values in the comment.
1839 S_incline(pTHX_ const char *s, const char *end)
1847 PERL_ARGS_ASSERT_INCLINE;
1851 COPLINE_INC_WITH_HERELINES;
1852 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1853 && s+1 == PL_bufend && *s == ';') {
1854 /* fake newline in string eval */
1855 CopLINE_dec(PL_curcop);
1860 while (SPACE_OR_TAB(*s))
1862 if (memBEGINs(s, (STRLEN) (end - s), "line"))
1863 s += sizeof("line") - 1;
1866 if (SPACE_OR_TAB(*s))
1870 while (SPACE_OR_TAB(*s))
1878 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1880 while (SPACE_OR_TAB(*s))
1882 if (*s == '"' && (t = (char *) memchr(s+1, '"', end - s))) {
1888 while (*t && !isSPACE(*t))
1892 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1894 if (*e != '\n' && *e != '\0')
1895 return; /* false alarm */
1897 if (!grok_atoUV(n, &uv, &e))
1899 line_num = ((line_t)uv) - 1;
1902 const STRLEN len = t - s;
1904 if (!PL_rsfp && !PL_parser->filtered) {
1905 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1906 * to *{"::_<newfilename"} */
1907 /* However, the long form of evals is only turned on by the
1908 debugger - usually they're "(eval %lu)" */
1909 GV * const cfgv = CopFILEGV(PL_curcop);
1912 STRLEN tmplen2 = len;
1916 if (tmplen2 + 2 <= sizeof smallbuf)
1919 Newx(tmpbuf2, tmplen2 + 2, char);
1924 memcpy(tmpbuf2 + 2, s, tmplen2);
1927 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1929 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1930 /* adjust ${"::_<newfilename"} to store the new file name */
1931 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1932 /* The line number may differ. If that is the case,
1933 alias the saved lines that are in the array.
1934 Otherwise alias the whole array. */
1935 if (CopLINE(PL_curcop) == line_num) {
1936 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1937 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1939 else if (GvAV(cfgv)) {
1940 AV * const av = GvAV(cfgv);
1941 const line_t start = CopLINE(PL_curcop)+1;
1942 SSize_t items = AvFILLp(av) - start;
1944 AV * const av2 = GvAVn(gv2);
1945 SV **svp = AvARRAY(av) + start;
1946 Size_t l = line_num+1;
1947 while (items-- && l < SSize_t_MAX && l == (line_t)l)
1948 av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++));
1953 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1956 CopFILE_free(PL_curcop);
1957 CopFILE_setn(PL_curcop, s, len);
1959 CopLINE_set(PL_curcop, line_num);
1963 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1965 AV *av = CopFILEAVx(PL_curcop);
1968 if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1970 sv = *av_fetch(av, 0, 1);
1971 SvUPGRADE(sv, SVt_PVMG);
1973 if (!SvPOK(sv)) SvPVCLEAR(sv);
1975 sv_catsv(sv, orig_sv);
1977 sv_catpvn(sv, buf, len);
1982 if (PL_parser->preambling == NOLINE)
1983 av_store(av, CopLINE(PL_curcop), sv);
1989 * Called to gobble the appropriate amount and type of whitespace.
1990 * Skips comments as well.
1991 * Returns the next character after the whitespace that is skipped.
1994 * Same thing, but look ahead without incrementing line numbers or
1995 * adjusting PL_linestart.
1998 #define skipspace(s) skipspace_flags(s, 0)
1999 #define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
2002 Perl_skipspace_flags(pTHX_ char *s, U32 flags)
2004 PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
2005 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2006 while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
2009 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
2011 lex_read_space(flags | LEX_KEEP_PREVIOUS |
2012 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
2013 LEX_NO_NEXT_CHUNK : 0));
2015 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
2016 if (PL_linestart > PL_bufptr)
2017 PL_bufptr = PL_linestart;
2025 * Check the unary operators to ensure there's no ambiguity in how they're
2026 * used. An ambiguous piece of code would be:
2028 * This doesn't mean rand() + 5. Because rand() is a unary operator,
2029 * the +5 is its argument.
2037 if (PL_oldoldbufptr != PL_last_uni)
2039 while (isSPACE(*PL_last_uni))
2042 while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
2043 s += UTF ? UTF8SKIP(s) : 1;
2044 if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s))
2047 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
2048 "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
2049 UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
2053 * LOP : macro to build a list operator. Its behaviour has been replaced
2054 * with a subroutine, S_lop() for which LOP is just another name.
2057 #define LOP(f,x) return lop(f,x,s)
2061 * Build a list operator (or something that might be one). The rules:
2062 * - if we have a next token, then it's a list operator (no parens) for
2063 * which the next token has already been parsed; e.g.,
2066 * - if the next thing is an opening paren, then it's a function
2067 * - else it's a list operator
2071 S_lop(pTHX_ I32 f, U8 x, char *s)
2073 PERL_ARGS_ASSERT_LOP;
2078 PL_last_lop = PL_oldbufptr;
2079 PL_last_lop_op = (OPCODE)f;
2084 return REPORT(FUNC);
2087 return REPORT(FUNC);
2090 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
2091 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2092 return REPORT(LSTOP);
2098 * When the lexer realizes it knows the next token (for instance,
2099 * it is reordering tokens for the parser) then it can call S_force_next
2100 * to know what token to return the next time the lexer is called. Caller
2101 * will need to set PL_nextval[] and possibly PL_expect to ensure
2102 * the lexer handles the token correctly.
2106 S_force_next(pTHX_ I32 type)
2110 PerlIO_printf(Perl_debug_log, "### forced token:\n");
2111 tokereport(type, &NEXTVAL_NEXTTOKE);
2114 assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
2115 PL_nexttype[PL_nexttoke] = type;
2122 * This subroutine handles postfix deref syntax after the arrow has already
2123 * been emitted. @* $* etc. are emitted as two separate tokens right here.
2124 * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2125 * only the first, leaving yylex to find the next.
2129 S_postderef(pTHX_ int const funny, char const next)
2131 assert(funny == DOLSHARP
2132 || funny == PERLY_DOLLAR
2133 || funny == PERLY_SNAIL
2134 || funny == PERLY_PERCENT_SIGN
2135 || funny == PERLY_AMPERSAND
2136 || funny == PERLY_STAR
2139 PL_expect = XOPERATOR;
2140 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2141 assert(PERLY_SNAIL == funny || PERLY_DOLLAR == funny || DOLSHARP == funny);
2142 PL_lex_state = LEX_INTERPEND;
2143 if (PERLY_SNAIL == funny)
2144 force_next(POSTJOIN);
2146 force_next(PERLY_STAR);
2150 if (PERLY_SNAIL == funny && PL_lex_state == LEX_INTERPNORMAL
2151 && !PL_lex_brackets)
2153 PL_expect = XOPERATOR;
2162 int yyc = PL_parser->yychar;
2163 if (yyc != YYEMPTY) {
2165 NEXTVAL_NEXTTOKE = PL_parser->yylval;
2166 if (yyc == PERLY_BRACE_OPEN || yyc == HASHBRACK || yyc == PERLY_BRACKET_OPEN) {
2167 PL_lex_allbrackets--;
2169 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2170 } else if (yyc == PERLY_PAREN_OPEN) {
2171 PL_lex_allbrackets--;
2176 PL_parser->yychar = YYEMPTY;
2181 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2183 SV * const sv = newSVpvn_utf8(start, len,
2187 && is_utf8_non_invariant_string((const U8*)start, len));
2193 * When the lexer knows the next thing is a word (for instance, it has
2194 * just seen -> and it knows that the next char is a word char, then
2195 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2199 * char *start : buffer position (must be within PL_linestr)
2200 * int token : PL_next* will be this type of bare word
2201 * (e.g., METHOD,BAREWORD)
2202 * int check_keyword : if true, Perl checks to make sure the word isn't
2203 * a keyword (do this if the word is a label, e.g. goto FOO)
2204 * int allow_pack : if true, : characters will also be allowed (require,
2205 * use, etc. do this)
2209 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2214 PERL_ARGS_ASSERT_FORCE_WORD;
2216 start = skipspace(start);
2218 if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
2219 || (allow_pack && *s == ':' && s[1] == ':') )
2221 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2222 if (check_keyword) {
2223 char *s2 = PL_tokenbuf;
2225 if (allow_pack && memBEGINPs(s2, len, "CORE::")) {
2226 s2 += sizeof("CORE::") - 1;
2227 len2 -= sizeof("CORE::") - 1;
2229 if (keyword(s2, len2, 0))
2232 if (token == METHOD) {
2237 PL_expect = XOPERATOR;
2240 NEXTVAL_NEXTTOKE.opval
2241 = newSVOP(OP_CONST,0,
2242 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2243 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2251 * Called when the lexer wants $foo *foo &foo etc, but the program
2252 * text only contains the "foo" portion. The first argument is a pointer
2253 * to the "foo", and the second argument is the type symbol to prefix.
2254 * Forces the next token to be a "BAREWORD".
2255 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2259 S_force_ident(pTHX_ const char *s, int kind)
2261 PERL_ARGS_ASSERT_FORCE_IDENT;
2264 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2265 OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2266 UTF ? SVf_UTF8 : 0));
2267 NEXTVAL_NEXTTOKE.opval = o;
2268 force_next(BAREWORD);
2270 o->op_private = OPpCONST_ENTERED;
2271 /* XXX see note in pp_entereval() for why we forgo typo
2272 warnings if the symbol must be introduced in an eval.
2274 gv_fetchpvn_flags(s, len,
2275 (PL_in_eval ? GV_ADDMULTI
2276 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2277 kind == PERLY_DOLLAR ? SVt_PV :
2278 kind == PERLY_SNAIL ? SVt_PVAV :
2279 kind == PERLY_PERCENT_SIGN ? SVt_PVHV :
2287 S_force_ident_maybe_lex(pTHX_ char pit)
2289 NEXTVAL_NEXTTOKE.ival = pit;
2294 Perl_str_to_version(pTHX_ SV *sv)
2299 const char *start = SvPV_const(sv,len);
2300 const char * const end = start + len;
2301 const bool utf = cBOOL(SvUTF8(sv));
2303 PERL_ARGS_ASSERT_STR_TO_VERSION;
2305 while (start < end) {
2309 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2314 retval += ((NV)n)/nshift;
2323 * Forces the next token to be a version number.
2324 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2325 * and if "guessing" is TRUE, then no new token is created (and the caller
2326 * must use an alternative parsing method).
2330 S_force_version(pTHX_ char *s, int guessing)
2335 PERL_ARGS_ASSERT_FORCE_VERSION;
2343 while (isDIGIT(*d) || *d == '_' || *d == '.')
2345 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2347 s = scan_num(s, &pl_yylval);
2348 version = pl_yylval.opval;
2349 ver = cSVOPx(version)->op_sv;
2350 if (SvPOK(ver) && !SvNIOK(ver)) {
2351 SvUPGRADE(ver, SVt_PVNV);
2352 SvNV_set(ver, str_to_version(ver));
2353 SvNOK_on(ver); /* hint that it is a version */
2356 else if (guessing) {
2361 /* NOTE: The parser sees the package name and the VERSION swapped */
2362 NEXTVAL_NEXTTOKE.opval = version;
2363 force_next(BAREWORD);
2369 * S_force_strict_version
2370 * Forces the next token to be a version number using strict syntax rules.
2374 S_force_strict_version(pTHX_ char *s)
2377 const char *errstr = NULL;
2379 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2381 while (isSPACE(*s)) /* leading whitespace */
2384 if (is_STRICT_VERSION(s,&errstr)) {
2386 s = (char *)scan_version(s, ver, 0);
2387 version = newSVOP(OP_CONST, 0, ver);
2389 else if ((*s != ';' && *s != '{' && *s != '}' )
2390 && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
2394 yyerror(errstr); /* version required */
2398 /* NOTE: The parser sees the package name and the VERSION swapped */
2399 NEXTVAL_NEXTTOKE.opval = version;
2400 force_next(BAREWORD);
2407 * Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv',
2408 * modified as necessary. However, if HINT_NEW_STRING is on, 'sv' is
2409 * unchanged, and a new SV containing the modified input is returned.
2413 S_tokeq(pTHX_ SV *sv)
2420 PERL_ARGS_ASSERT_TOKEQ;
2424 assert (!SvIsCOW(sv));
2425 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2429 /* This is relying on the SV being "well formed" with a trailing '\0' */
2430 while (s < send && !(*s == '\\' && s[1] == '\\'))
2435 if ( PL_hints & HINT_NEW_STRING ) {
2436 pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2437 SVs_TEMP | SvUTF8(sv));
2441 if (s + 1 < send && (s[1] == '\\'))
2442 s++; /* all that, just for this */
2447 SvCUR_set(sv, d - SvPVX_const(sv));
2449 if ( PL_hints & HINT_NEW_STRING )
2450 return new_constant(NULL, 0, "q", sv, pv, "q", 1, NULL);
2455 * Now come three functions related to double-quote context,
2456 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2457 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2458 * interact with PL_lex_state, and create fake ( ... ) argument lists
2459 * to handle functions and concatenation.
2463 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
2468 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2470 * Pattern matching will set PL_lex_op to the pattern-matching op to
2471 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2473 * OP_CONST is easy--just make the new op and return.
2475 * Everything else becomes a FUNC.
2477 * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we
2478 * had an OP_CONST. This just sets us up for a
2479 * call to S_sublex_push().
2483 S_sublex_start(pTHX)
2485 const I32 op_type = pl_yylval.ival;
2487 if (op_type == OP_NULL) {
2488 pl_yylval.opval = PL_lex_op;
2492 if (op_type == OP_CONST) {
2493 SV *sv = PL_lex_stuff;
2494 PL_lex_stuff = NULL;
2497 if (SvTYPE(sv) == SVt_PVIV) {
2498 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2500 const char * const p = SvPV_const(sv, len);
2501 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2505 pl_yylval.opval = newSVOP(op_type, 0, sv);
2509 PL_parser->lex_super_state = PL_lex_state;
2510 PL_parser->lex_sub_inwhat = (U16)op_type;
2511 PL_parser->lex_sub_op = PL_lex_op;
2512 PL_parser->sub_no_recover = FALSE;
2513 PL_parser->sub_error_count = PL_error_count;
2514 PL_lex_state = LEX_INTERPPUSH;
2518 pl_yylval.opval = PL_lex_op;
2528 * Create a new scope to save the lexing state. The scope will be
2529 * ended in S_sublex_done. Returns a '(', starting the function arguments
2530 * to the uc, lc, etc. found before.
2531 * Sets PL_lex_state to LEX_INTERPCONCAT.
2538 const bool is_heredoc = PL_multi_close == '<';
2541 PL_lex_state = PL_parser->lex_super_state;
2542 SAVEI8(PL_lex_dojoin);
2543 SAVEI32(PL_lex_brackets);
2544 SAVEI32(PL_lex_allbrackets);
2545 SAVEI32(PL_lex_formbrack);
2546 SAVEI8(PL_lex_fakeeof);
2547 SAVEI32(PL_lex_casemods);
2548 SAVEI32(PL_lex_starts);
2549 SAVEI8(PL_lex_state);
2550 SAVESPTR(PL_lex_repl);
2551 SAVEVPTR(PL_lex_inpat);
2552 SAVEI16(PL_lex_inwhat);
2555 SAVECOPLINE(PL_curcop);
2556 SAVEI32(PL_multi_end);
2557 SAVEI32(PL_parser->herelines);
2558 PL_parser->herelines = 0;
2560 SAVEIV(PL_multi_close);
2561 SAVEPPTR(PL_bufptr);
2562 SAVEPPTR(PL_bufend);
2563 SAVEPPTR(PL_oldbufptr);
2564 SAVEPPTR(PL_oldoldbufptr);
2565 SAVEPPTR(PL_last_lop);
2566 SAVEPPTR(PL_last_uni);
2567 SAVEPPTR(PL_linestart);
2568 SAVESPTR(PL_linestr);
2569 SAVEGENERICPV(PL_lex_brackstack);
2570 SAVEGENERICPV(PL_lex_casestack);
2571 SAVEGENERICPV(PL_parser->lex_shared);
2572 SAVEBOOL(PL_parser->lex_re_reparsing);
2573 SAVEI32(PL_copline);
2575 /* The here-doc parser needs to be able to peek into outer lexing
2576 scopes to find the body of the here-doc. So we put PL_linestr and
2577 PL_bufptr into lex_shared, to ‘share’ those values.
2579 PL_parser->lex_shared->ls_linestr = PL_linestr;
2580 PL_parser->lex_shared->ls_bufptr = PL_bufptr;
2582 PL_linestr = PL_lex_stuff;
2583 PL_lex_repl = PL_parser->lex_sub_repl;
2584 PL_lex_stuff = NULL;
2585 PL_parser->lex_sub_repl = NULL;
2587 /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2588 set for an inner quote-like operator and then an error causes scope-
2589 popping. We must not have a PL_lex_stuff value left dangling, as
2590 that breaks assumptions elsewhere. See bug #123617. */
2591 SAVEGENERICSV(PL_lex_stuff);
2592 SAVEGENERICSV(PL_parser->lex_sub_repl);
2594 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2595 = SvPVX(PL_linestr);
2596 PL_bufend += SvCUR(PL_linestr);
2597 PL_last_lop = PL_last_uni = NULL;
2598 SAVEFREESV(PL_linestr);
2599 if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2601 PL_lex_dojoin = FALSE;
2602 PL_lex_brackets = PL_lex_formbrack = 0;
2603 PL_lex_allbrackets = 0;
2604 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2605 Newx(PL_lex_brackstack, 120, char);
2606 Newx(PL_lex_casestack, 12, char);
2607 PL_lex_casemods = 0;
2608 *PL_lex_casestack = '\0';
2610 PL_lex_state = LEX_INTERPCONCAT;
2612 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2613 PL_copline = NOLINE;
2615 Newxz(shared, 1, LEXSHARED);
2616 shared->ls_prev = PL_parser->lex_shared;
2617 PL_parser->lex_shared = shared;
2619 PL_lex_inwhat = PL_parser->lex_sub_inwhat;
2620 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2621 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2622 PL_lex_inpat = PL_parser->lex_sub_op;
2624 PL_lex_inpat = NULL;
2626 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2627 PL_in_eval &= ~EVAL_RE_REPARSING;
2634 * Restores lexer state after a S_sublex_push.
2640 if (!PL_lex_starts++) {
2641 SV * const sv = newSVpvs("");
2642 if (SvUTF8(PL_linestr))
2644 PL_expect = XOPERATOR;
2645 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
2649 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2650 PL_lex_state = LEX_INTERPCASEMOD;
2654 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2655 assert(PL_lex_inwhat != OP_TRANSR);
2657 assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2658 PL_linestr = PL_lex_repl;
2660 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2661 PL_bufend += SvCUR(PL_linestr);
2662 PL_last_lop = PL_last_uni = NULL;
2663 PL_lex_dojoin = FALSE;
2664 PL_lex_brackets = 0;
2665 PL_lex_allbrackets = 0;
2666 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2667 PL_lex_casemods = 0;
2668 *PL_lex_casestack = '\0';
2670 if (SvEVALED(PL_lex_repl)) {
2671 PL_lex_state = LEX_INTERPNORMAL;
2673 /* we don't clear PL_lex_repl here, so that we can check later
2674 whether this is an evalled subst; that means we rely on the
2675 logic to ensure sublex_done() is called again only via the
2676 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2679 PL_lex_state = LEX_INTERPCONCAT;
2682 if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2683 CopLINE(PL_curcop) +=
2684 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines
2685 + PL_parser->herelines;
2686 PL_parser->herelines = 0;
2691 const line_t l = CopLINE(PL_curcop);
2693 if (PL_parser->sub_error_count != PL_error_count) {
2694 if (PL_parser->sub_no_recover) {
2699 if (PL_multi_close == '<')
2700 PL_parser->herelines += l - PL_multi_end;
2701 PL_bufend = SvPVX(PL_linestr);
2702 PL_bufend += SvCUR(PL_linestr);
2703 PL_expect = XOPERATOR;
2709 Perl_load_charnames(pTHX_ SV * char_name, const char * context,
2710 const STRLEN context_len, const char ** error_msg)
2712 /* Load the official _charnames module if not already there. The
2713 * parameters are just to give info for any error messages generated:
2714 * char_name a name to look up which is the reason for loading this
2715 * context 'char_name' in the context in the input in which it appears
2716 * context_len how many bytes 'context' occupies
2717 * error_msg *error_msg will be set to any error
2719 * Returns the ^H table if success; otherwise NULL */
2726 PERL_ARGS_ASSERT_LOAD_CHARNAMES;
2728 /* This loop is executed 1 1/2 times. On the first time through, if it
2729 * isn't already loaded, try loading it, and iterate just once to see if it
2731 for (i = 0; i < 2; i++) {
2732 table = GvHV(PL_hintgv); /* ^H */
2735 && (PL_hints & HINT_LOCALIZE_HH)
2736 && (cvp = hv_fetchs(table, "charnames", FALSE))
2739 return table; /* Quit if already loaded */
2743 Perl_load_module(aTHX_
2745 newSVpvs("_charnames"),
2747 /* version parameter; no need to specify it, as if we get too early
2748 * a version, will fail anyway, not being able to find 'charnames'
2757 /* Here, it failed; new_constant will give appropriate error messages */
2759 res = new_constant( NULL, 0, "charnames", char_name, NULL,
2760 context, context_len, error_msg);
2767 S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e)
2769 /* This justs wraps get_and_check_backslash_N_name() to output any error
2770 * message it returns. */
2772 const char * error_msg = NULL;
2775 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER;
2777 /* charnames doesn't work well if there have been errors found */
2778 if (PL_error_count > 0) {
2782 result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg);
2785 yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0);
2792 Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
2795 const char ** error_msg)
2797 /* <s> points to first character of interior of \N{}, <e> to one beyond the
2798 * interior, hence to the "}". Finds what the name resolves to, returning
2799 * an SV* containing it; NULL if no valid one found.
2801 * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it
2802 * doesn't have to be. */
2812 /* Points to the beginning of the \N{... so that any messages include the
2813 * context of what's failing*/
2814 const char* context = s - 3;
2815 STRLEN context_len = e - context + 1; /* include all of \N{...} */
2818 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2821 assert(s > (char *) 3);
2823 while (s < e && isBLANK(*s)) {
2827 while (s < e && isBLANK(*(e - 1))) {
2831 char_name = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0);
2833 if (!SvCUR(char_name)) {
2834 SvREFCNT_dec_NN(char_name);
2835 /* diag_listed_as: Unknown charname '%s' */
2836 *error_msg = Perl_form(aTHX_ "Unknown charname ''");
2840 /* Autoload the charnames module */
2842 table = load_charnames(char_name, context, context_len, error_msg);
2843 if (table == NULL) {
2848 res = new_constant( NULL, 0, "charnames", char_name, NULL,
2849 context, context_len, error_msg);
2851 *error_msg = Perl_form(aTHX_ "Unknown charname '%s'", SvPVX(char_name));
2857 /* See if the charnames handler is the Perl core's, and if so, we can skip
2858 * the validation needed for a user-supplied one, as Perl's does its own
2860 cvp = hv_fetchs(table, "charnames", FALSE);
2861 if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2862 SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2864 const char * const name = HvNAME(stash);
2865 if (memEQs(name, HvNAMELEN(stash), "_charnames")) {
2870 /* Here, it isn't Perl's charname handler. We can't rely on a
2871 * user-supplied handler to validate the input name. For non-ut8 input,
2872 * look to see that the first character is legal. Then loop through the
2873 * rest checking that each is a continuation */
2875 /* This code makes the reasonable assumption that the only Latin1-range
2876 * characters that begin a character name alias are alphabetic, otherwise
2877 * would have to create a isCHARNAME_BEGIN macro */
2880 if (! isALPHAU(*s)) {
2885 if (! isCHARNAME_CONT(*s)) {
2888 if (*s == ' ' && *(s-1) == ' ') {
2895 /* Similarly for utf8. For invariants can check directly; for other
2896 * Latin1, can calculate their code point and check; otherwise use an
2898 if (UTF8_IS_INVARIANT(*s)) {
2899 if (! isALPHAU(*s)) {
2903 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2904 if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
2910 if (! _invlist_contains_cp(PL_utf8_charname_begin,
2911 utf8_to_uvchr_buf((U8 *) s,
2921 if (UTF8_IS_INVARIANT(*s)) {
2922 if (! isCHARNAME_CONT(*s)) {
2925 if (*s == ' ' && *(s-1) == ' ') {
2930 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2931 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
2938 if (! _invlist_contains_cp(PL_utf8_charname_continue,
2939 utf8_to_uvchr_buf((U8 *) s,
2949 if (*(s-1) == ' ') {
2950 /* diag_listed_as: charnames alias definitions may not contain
2951 trailing white-space; marked by <-- HERE in %s
2953 *error_msg = Perl_form(aTHX_
2954 "charnames alias definitions may not contain trailing "
2955 "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2956 (int)(s - context + 1), context,
2957 (int)(e - s + 1), s + 1);
2961 if (SvUTF8(res)) { /* Don't accept malformed charname value */
2962 const U8* first_bad_char_loc;
2964 const char* const str = SvPV_const(res, len);
2965 if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len,
2966 &first_bad_char_loc)))
2968 _force_out_malformed_utf8_message(first_bad_char_loc,
2969 (U8 *) PL_parser->bufend,
2971 0 /* 0 means don't die */ );
2972 /* diag_listed_as: Malformed UTF-8 returned by \N{%s}
2973 immediately after '%s' */
2974 *error_msg = Perl_form(aTHX_
2975 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2976 (int) context_len, context,
2977 (int) ((char *) first_bad_char_loc - str), str);
2986 /* The final %.*s makes sure that should the trailing NUL be missing
2987 * that this print won't run off the end of the string */
2988 /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
2990 *error_msg = Perl_form(aTHX_
2991 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2992 (int)(s - context + 1), context,
2993 (int)(e - s + 1), s + 1);
2998 /* diag_listed_as: charnames alias definitions may not contain a
2999 sequence of multiple spaces; marked by <-- HERE
3001 *error_msg = Perl_form(aTHX_
3002 "charnames alias definitions may not contain a sequence of "
3003 "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
3004 (int)(s - context + 1), context,
3005 (int)(e - s + 1), s + 1);
3012 Extracts the next constant part of a pattern, double-quoted string,
3013 or transliteration. This is terrifying code.
3015 For example, in parsing the double-quoted string "ab\x63$d", it would
3016 stop at the '$' and return an OP_CONST containing 'abc'.
3018 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3019 processing a pattern (PL_lex_inpat is true), a transliteration
3020 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
3022 Returns a pointer to the character scanned up to. If this is
3023 advanced from the start pointer supplied (i.e. if anything was
3024 successfully parsed), will leave an OP_CONST for the substring scanned
3025 in pl_yylval. Caller must intuit reason for not parsing further
3026 by looking at the next characters herself.
3030 \N{FOO} => \N{U+hex_for_character_FOO}
3031 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
3034 all other \-char, including \N and \N{ apart from \N{ABC}
3037 @ and $ where it appears to be a var, but not for $ as tail anchor
3041 In transliterations:
3042 characters are VERY literal, except for - not at the start or end
3043 of the string, which indicates a range. However some backslash sequences
3044 are recognized: \r, \n, and the like
3045 \007 \o{}, \x{}, \N{}
3046 If all elements in the transliteration are below 256,
3047 scan_const expands the range to the full set of intermediate
3048 characters. If the range is in utf8, the hyphen is replaced with
3049 a certain range mark which will be handled by pmtrans() in op.c.
3051 In double-quoted strings:
3053 all those recognized in transliterations
3054 deprecated backrefs: \1 (in substitution replacements)
3055 case and quoting: \U \Q \E
3058 scan_const does *not* construct ops to handle interpolated strings.
3059 It stops processing as soon as it finds an embedded $ or @ variable
3060 and leaves it to the caller to work out what's going on.
3062 embedded arrays (whether in pattern or not) could be:
3063 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
3065 $ in double-quoted strings must be the symbol of an embedded scalar.
3067 $ in pattern could be $foo or could be tail anchor. Assumption:
3068 it's a tail anchor if $ is the last thing in the string, or if it's
3069 followed by one of "()| \r\n\t"
3071 \1 (backreferences) are turned into $1 in substitutions
3073 The structure of the code is
3074 while (there's a character to process) {
3075 handle transliteration ranges
3076 skip regexp comments /(?#comment)/ and codes /(?{code})/
3077 skip #-initiated comments in //x patterns
3078 check for embedded arrays
3079 check for embedded scalars
3081 deprecate \1 in substitution replacements
3082 handle string-changing backslashes \l \U \Q \E, etc.
3083 switch (what was escaped) {
3084 handle \- in a transliteration (becomes a literal -)
3085 if a pattern and not \N{, go treat as regular character
3086 handle \132 (octal characters)
3087 handle \x15 and \x{1234} (hex characters)
3088 handle \N{name} (named characters, also \N{3,5} in a pattern)
3089 handle \cV (control characters)
3090 handle printf-style backslashes (\f, \r, \n, etc)
3093 } (end if backslash)
3094 handle regular character
3095 } (end while character to read)
3100 S_scan_const(pTHX_ char *start)
3102 const char * const send = PL_bufend;/* end of the constant */
3103 SV *sv = newSV(send - start); /* sv for the constant. See note below
3105 char *s = start; /* start of the constant */
3106 char *d = SvPVX(sv); /* destination for copies */
3107 bool dorange = FALSE; /* are we in a translit range? */
3108 bool didrange = FALSE; /* did we just finish a range? */
3109 bool in_charclass = FALSE; /* within /[...]/ */
3110 const bool s_is_utf8 = cBOOL(UTF); /* Is the source string assumed to be
3111 UTF8? But, this can show as true
3112 when the source isn't utf8, as for
3113 example when it is entirely composed
3115 bool d_is_utf8 = FALSE; /* Output constant is UTF8 */
3116 STRLEN utf8_variant_count = 0; /* When not in UTF-8, this counts the
3117 number of characters found so far
3118 that will expand (into 2 bytes)
3119 should we have to convert to
3121 SV *res; /* result from charnames */
3122 STRLEN offset_to_max = 0; /* The offset in the output to where the range
3123 high-end character is temporarily placed */
3125 /* Does something require special handling in tr/// ? This avoids extra
3126 * work in a less likely case. As such, khw didn't feel it was worth
3127 * adding any branches to the more mainline code to handle this, which
3128 * means that this doesn't get set in some circumstances when things like
3129 * \x{100} get expanded out. As a result there needs to be extra testing
3130 * done in the tr code */
3131 bool has_above_latin1 = FALSE;
3133 /* Note on sizing: The scanned constant is placed into sv, which is
3134 * initialized by newSV() assuming one byte of output for every byte of
3135 * input. This routine expects newSV() to allocate an extra byte for a
3136 * trailing NUL, which this routine will append if it gets to the end of
3137 * the input. There may be more bytes of input than output (eg., \N{LATIN
3138 * CAPITAL LETTER A}), or more output than input if the constant ends up
3139 * recoded to utf8, but each time a construct is found that might increase
3140 * the needed size, SvGROW() is called. Its size parameter each time is
3141 * based on the best guess estimate at the time, namely the length used so
3142 * far, plus the length the current construct will occupy, plus room for
3143 * the trailing NUL, plus one byte for every input byte still unscanned */
3145 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3148 int backslash_N = 0; /* ? was the character from \N{} */
3149 int non_portable_endpoint = 0; /* ? In a range is an endpoint
3150 platform-specific like \x65 */
3153 PERL_ARGS_ASSERT_SCAN_CONST;
3155 assert(PL_lex_inwhat != OP_TRANSR);
3157 /* Protect sv from errors and fatal warnings. */
3158 ENTER_with_name("scan_const");
3161 /* A bunch of code in the loop below assumes that if s[n] exists and is not
3162 * NUL, then s[n+1] exists. This assertion makes sure that assumption is
3164 assert(*send == '\0');
3167 || dorange /* Handle tr/// range at right edge of input */
3170 /* get transliterations out of the way (they're most literal) */
3171 if (PL_lex_inwhat == OP_TRANS) {
3173 /* But there isn't any special handling necessary unless there is a
3174 * range, so for most cases we just drop down and handle the value
3175 * as any other. There are two exceptions.
3177 * 1. A hyphen indicates that we are actually going to have a
3178 * range. In this case, skip the '-', set a flag, then drop
3179 * down to handle what should be the end range value.
3180 * 2. After we've handled that value, the next time through, that
3181 * flag is set and we fix up the range.
3183 * Ranges entirely within Latin1 are expanded out entirely, in
3184 * order to make the transliteration a simple table look-up.
3185 * Ranges that extend above Latin1 have to be done differently, so
3186 * there is no advantage to expanding them here, so they are
3187 * stored here as Min, RANGE_INDICATOR, Max. 'RANGE_INDICATOR' is
3188 * a byte that can't occur in legal UTF-8, and hence can signify a
3189 * hyphen without any possible ambiguity. On EBCDIC machines, if
3190 * the range is expressed as Unicode, the Latin1 portion is
3191 * expanded out even if the range extends above Latin1. This is
3192 * because each code point in it has to be processed here
3193 * individually to get its native translation */
3197 /* Here, we don't think we're in a range. If the new character
3198 * is not a hyphen; or if it is a hyphen, but it's too close to
3199 * either edge to indicate a range, or if we haven't output any
3200 * characters yet then it's a regular character. */
3201 if (*s != '-' || s >= send - 1 || s == start || d == SvPVX(sv))
3204 /* A regular character. Process like any other, but first
3205 * clear any flags */
3209 non_portable_endpoint = 0;
3212 /* The tests here for being above Latin1 and similar ones
3213 * in the following 'else' suffice to find all such
3214 * occurences in the constant, except those added by a
3215 * backslash escape sequence, like \x{100}. Mostly, those
3216 * set 'has_above_latin1' as appropriate */
3217 if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3218 has_above_latin1 = TRUE;
3221 /* Drops down to generic code to process current byte */
3223 else { /* Is a '-' in the context where it means a range */
3224 if (didrange) { /* Something like y/A-C-Z// */
3225 Perl_croak(aTHX_ "Ambiguous range in transliteration"
3231 s++; /* Skip past the hyphen */
3233 /* d now points to where the end-range character will be
3234 * placed. Drop down to get that character. We'll finish
3235 * processing the range the next time through the loop */
3237 if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3238 has_above_latin1 = TRUE;
3241 /* Drops down to generic code to process current byte */
3243 } /* End of not a range */
3245 /* Here we have parsed a range. Now must handle it. At this
3247 * 'sv' is a SV* that contains the output string we are
3248 * constructing. The final two characters in that string
3249 * are the range start and range end, in order.
3250 * 'd' points to just beyond the range end in the 'sv' string,
3251 * where we would next place something
3256 IV range_max; /* last character in range */
3258 Size_t offset_to_min = 0;
3261 bool convert_unicode;
3262 IV real_range_max = 0;
3264 /* Get the code point values of the range ends. */
3265 max_ptr = (d_is_utf8) ? (char *) utf8_hop( (U8*) d, -1) : d - 1;
3266 offset_to_max = max_ptr - SvPVX_const(sv);
3268 /* We know the utf8 is valid, because we just constructed
3269 * it ourselves in previous loop iterations */
3270 min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
3271 range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
3272 range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
3274 /* This compensates for not all code setting
3275 * 'has_above_latin1', so that we don't skip stuff that
3276 * should be executed */
3277 if (range_max > 255) {
3278 has_above_latin1 = TRUE;
3282 min_ptr = max_ptr - 1;
3283 range_min = * (U8*) min_ptr;
3284 range_max = * (U8*) max_ptr;
3287 /* If the range is just a single code point, like tr/a-a/.../,
3288 * that code point is already in the output, twice. We can
3289 * just back up over the second instance and avoid all the rest
3290 * of the work. But if it is a variant character, it's been
3291 * counted twice, so decrement. (This unlikely scenario is
3292 * special cased, like the one for a range of 2 code points
3293 * below, only because the main-line code below needs a range
3294 * of 3 or more to work without special casing. Might as well
3295 * get it out of the way now.) */
3296 if (UNLIKELY(range_max == range_min)) {
3298 if (! d_is_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
3299 utf8_variant_count--;
3305 /* On EBCDIC platforms, we may have to deal with portable
3306 * ranges. These happen if at least one range endpoint is a
3307 * Unicode value (\N{...}), or if the range is a subset of
3308 * [A-Z] or [a-z], and both ends are literal characters,
3309 * like 'A', and not like \x{C1} */
3311 cBOOL(backslash_N) /* \N{} forces Unicode,
3312 hence portable range */
3313 || ( ! non_portable_endpoint
3314 && (( isLOWER_A(range_min) && isLOWER_A(range_max))
3315 || (isUPPER_A(range_min) && isUPPER_A(range_max))));
3316 if (convert_unicode) {
3318 /* Special handling is needed for these portable ranges.
3319 * They are defined to be in Unicode terms, which includes
3320 * all the Unicode code points between the end points.
3321 * Convert to Unicode to get the Unicode range. Later we
3322 * will convert each code point in the range back to
3324 range_min = NATIVE_TO_UNI(range_min);
3325 range_max = NATIVE_TO_UNI(range_max);
3329 if (range_min > range_max) {
3331 if (convert_unicode) {
3332 /* Need to convert back to native for meaningful
3333 * messages for this platform */
3334 range_min = UNI_TO_NATIVE(range_min);
3335 range_max = UNI_TO_NATIVE(range_max);
3338 /* Use the characters themselves for the error message if
3339 * ASCII printables; otherwise some visible representation
3341 if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
3343 "Invalid range \"%c-%c\" in transliteration operator",
3344 (char)range_min, (char)range_max);
3347 else if (convert_unicode) {
3348 /* diag_listed_as: Invalid range "%s" in transliteration operator */
3350 "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04"
3351 UVXf "}\" in transliteration operator",
3352 range_min, range_max);
3356 /* diag_listed_as: Invalid range "%s" in transliteration operator */
3358 "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
3359 " in transliteration operator",
3360 range_min, range_max);
3364 /* If the range is exactly two code points long, they are
3365 * already both in the output */
3366 if (UNLIKELY(range_min + 1 == range_max)) {
3370 /* Here the range contains at least 3 code points */
3374 /* If everything in the transliteration is below 256, we
3375 * can avoid special handling later. A translation table
3376 * for each of those bytes is created by op.c. So we
3377 * expand out all ranges to their constituent code points.
3378 * But if we've encountered something above 255, the
3379 * expanding won't help, so skip doing that. But if it's
3380 * EBCDIC, we may have to look at each character below 256
3381 * if we have to convert to/from Unicode values */
3382 if ( has_above_latin1
3384 && (range_min > 255 || ! convert_unicode)
3387 const STRLEN off = d - SvPVX(sv);
3388 const STRLEN extra = 1 + (send - s) + 1;
3391 /* Move the high character one byte to the right; then
3392 * insert between it and the range begin, an illegal
3393 * byte which serves to indicate this is a range (using
3394 * a '-' would be ambiguous). */
3396 if (off + extra > SvLEN(sv)) {
3397 d = off + SvGROW(sv, off + extra);
3398 max_ptr = d - off + offset_to_max;
3402 while (e-- > max_ptr) {
3405 *(e + 1) = (char) RANGE_INDICATOR;
3409 /* Here, we're going to expand out the range. For EBCDIC
3410 * the range can extend above 255 (not so in ASCII), so
3411 * for EBCDIC, split it into the parts above and below
3414 if (range_max > 255) {
3415 real_range_max = range_max;
3421 /* Here we need to expand out the string to contain each
3422 * character in the range. Grow the output to handle this.
3423 * For non-UTF8, we need a byte for each code point in the
3424 * range, minus the three that we've already allocated for: the
3425 * hyphen, the min, and the max. For UTF-8, we need this
3426 * plus an extra byte for each code point that occupies two
3427 * bytes (is variant) when in UTF-8 (except we've already
3428 * allocated for the end points, including if they are
3429 * variants). For ASCII platforms and Unicode ranges on EBCDIC
3430 * platforms, it's easy to calculate a precise number. To
3431 * start, we count the variants in the range, which we need
3432 * elsewhere in this function anyway. (For the case where it
3433 * isn't easy to calculate, 'extras' has been initialized to 0,
3434 * and the calculation is done in a loop further down.) */
3436 if (convert_unicode)
3439 /* This is executed unconditionally on ASCII, and for
3440 * Unicode ranges on EBCDIC. Under these conditions, all
3441 * code points above a certain value are variant; and none
3442 * under that value are. We just need to find out how much
3443 * of the range is above that value. We don't count the
3444 * end points here, as they will already have been counted
3445 * as they were parsed. */
3446 if (range_min >= UTF_CONTINUATION_MARK) {
3448 /* The whole range is made up of variants */
3449 extras = (range_max - 1) - (range_min + 1) + 1;
3451 else if (range_max >= UTF_CONTINUATION_MARK) {
3453 /* Only the higher portion of the range is variants */
3454 extras = (range_max - 1) - UTF_CONTINUATION_MARK + 1;
3457 utf8_variant_count += extras;
3460 /* The base growth is the number of code points in the range,
3461 * not including the endpoints, which have already been sized
3462 * for (and output). We don't subtract for the hyphen, as it
3463 * has been parsed but not output, and the SvGROW below is
3464 * based only on what's been output plus what's left to parse.
3466 grow = (range_max - 1) - (range_min + 1) + 1;
3470 /* In some cases in EBCDIC, we haven't yet calculated a
3471 * precise amount needed for the UTF-8 variants. Just
3472 * assume the worst case, that everything will expand by a
3474 if (! convert_unicode) {
3480 /* Otherwise we know exactly how many variants there
3481 * are in the range. */
3486 /* Grow, but position the output to overwrite the range min end
3487 * point, because in some cases we overwrite that */
3488 SvCUR_set(sv, d - SvPVX_const(sv));
3489 offset_to_min = min_ptr - SvPVX_const(sv);
3491 /* See Note on sizing above. */
3492 d = offset_to_min + SvGROW(sv, SvCUR(sv)
3495 + 1 /* Trailing NUL */ );
3497 /* Now, we can expand out the range. */
3499 if (convert_unicode) {
3502 /* Recall that the min and max are now in Unicode terms, so
3503 * we have to convert each character to its native
3506 for (i = range_min; i <= range_max; i++) {
3507 append_utf8_from_native_byte(
3508 LATIN1_TO_NATIVE((U8) i),
3513 for (i = range_min; i <= range_max; i++) {
3514 *d++ = (char)LATIN1_TO_NATIVE((U8) i);
3520 /* Always gets run for ASCII, and sometimes for EBCDIC. */
3522 /* Here, no conversions are necessary, which means that the
3523 * first character in the range is already in 'd' and
3524 * valid, so we can skip overwriting it */
3528 for (i = range_min + 1; i <= range_max; i++) {
3529 append_utf8_from_native_byte((U8) i, (U8 **) &d);
3535 assert(range_min + 1 <= range_max);
3536 for (i = range_min + 1; i < range_max; i++) {
3538 /* In this case on EBCDIC, we haven't calculated
3539 * the variants. Do it here, as we go along */
3540 if (! UVCHR_IS_INVARIANT(i)) {
3541 utf8_variant_count++;
3547 /* The range_max is done outside the loop so as to
3548 * avoid having to special case not incrementing
3549 * 'utf8_variant_count' on EBCDIC (it's already been
3550 * counted when originally parsed) */
3551 *d++ = (char) range_max;
3556 /* If the original range extended above 255, add in that
3558 if (real_range_max) {
3559 *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
3560 *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
3561 if (real_range_max > 0x100) {
3562 if (real_range_max > 0x101) {
3563 *d++ = (char) RANGE_INDICATOR;
3565 d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
3571 /* mark the range as done, and continue */
3575 non_portable_endpoint = 0;
3579 } /* End of is a range */
3580 } /* End of transliteration. Joins main code after these else's */
3581 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3584 while (s1 >= start && *s1-- == '\\')
3587 in_charclass = TRUE;
3589 else if (*s == ']' && PL_lex_inpat && in_charclass) {
3592 while (s1 >= start && *s1-- == '\\')
3595 in_charclass = FALSE;
3597 /* skip for regexp comments /(?#comment)/, except for the last
3598 * char, which will be done separately. Stop on (?{..}) and
3600 else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3603 PERL_UINT_FAST8_T len = UTF8SKIP(s);
3605 while (s + len < send && *s != ')') {
3606 Copy(s, d, len, U8);
3609 len = UTF8_SAFE_SKIP(s, send);
3612 else while (s+1 < send && *s != ')') {
3616 else if (!PL_lex_casemods
3617 && ( s[2] == '{' /* This should match regcomp.c */
3618 || (s[2] == '?' && s[3] == '{')))
3623 /* likewise skip #-initiated comments in //x patterns */
3627 && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
3629 while (s < send && *s != '\n')
3632 /* no further processing of single-quoted regex */
3633 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3634 goto default_action;
3636 /* check for embedded arrays
3637 * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3639 else if (*s == '@' && s[1]) {
3641 ? isIDFIRST_utf8_safe(s+1, send)
3642 : isWORDCHAR_A(s[1]))
3646 if (memCHRs(":'{$", s[1]))
3648 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3649 break; /* in regexp, neither @+ nor @- are interpolated */
3651 /* check for embedded scalars. only stop if we're sure it's a
3653 else if (*s == '$') {
3654 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
3656 if (s + 1 < send && !memCHRs("()| \r\n\t", s[1])) {
3658 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3659 "Possible unintended interpolation of $\\ in regex");
3661 break; /* in regexp, $ might be tail anchor */
3665 /* End of else if chain - OP_TRANS rejoin rest */
3667 if (UNLIKELY(s >= send)) {
3673 if (*s == '\\' && s+1 < send) {
3674 char* bslash = s; /* point to beginning \ */
3675 char* rbrace; /* point to ending '}' */
3676 char* e; /* 1 past the meat (non-blanks) before the
3680 /* warn on \1 - \9 in substitution replacements, but note that \11
3681 * is an octal; and \19 is \1 followed by '9' */
3682 if (PL_lex_inwhat == OP_SUBST
3688 /* diag_listed_as: \%d better written as $%d */
3689 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3695 /* string-change backslash escapes */
3696 if (PL_lex_inwhat != OP_TRANS && *s && memCHRs("lLuUEQF", *s)) {
3700 /* In a pattern, process \N, but skip any other backslash escapes.
3701 * This is because we don't want to translate an escape sequence
3702 * into a meta symbol and have the regex compiler use the meta
3703 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3704 * in spite of this, we do have to process \N here while the proper
3705 * charnames handler is in scope. See bugs #56444 and #62056.
3707 * There is a complication because \N in a pattern may also stand
3708 * for 'match a non-nl', and not mean a charname, in which case its
3709 * processing should be deferred to the regex compiler. To be a
3710 * charname it must be followed immediately by a '{', and not look
3711 * like \N followed by a curly quantifier, i.e., not something like
3712 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3714 else if (PL_lex_inpat
3717 || regcurly(s + 1, send, NULL)))
3720 goto default_action;
3726 if ((isALPHANUMERIC(*s)))
3727 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3728 "Unrecognized escape \\%c passed through",
3730 /* default action is to copy the quoted character */
3731 goto default_action;
3734 /* eg. \132 indicates the octal constant 0132 */
3735 case '0': case '1': case '2': case '3':
3736 case '4': case '5': case '6': case '7':
3738 I32 flags = PERL_SCAN_SILENT_ILLDIGIT
3739 | PERL_SCAN_NOTIFY_ILLDIGIT;
3741 uv = grok_oct(s, &len, &flags, NULL);
3743 if ( (flags & PERL_SCAN_NOTIFY_ILLDIGIT)
3745 && isDIGIT(*s) /* like \08, \178 */
3746 && ckWARN(WARN_MISC))
3748 Perl_warner(aTHX_ packWARN(WARN_MISC), "%s",
3749 form_alien_digit_msg(8, len, s, send, UTF, FALSE));
3752 goto NUM_ESCAPE_INSERT;
3754 /* eg. \o{24} indicates the octal constant \024 */
3759 if (! grok_bslash_o(&s, send,
3762 FALSE, /* Not strict */
3763 FALSE, /* No illegal cp's */
3767 uv = 0; /* drop through to ensure range ends are set */
3769 goto NUM_ESCAPE_INSERT;
3772 /* eg. \x24 indicates the hex constant 0x24 */
3777 if (! grok_bslash_x(&s, send,
3780 FALSE, /* Not strict */
3781 FALSE, /* No illegal cp's */
3785 uv = 0; /* drop through to ensure range ends are set */
3790 /* Insert oct or hex escaped character. */
3792 /* Here uv is the ordinal of the next character being added */
3793 if (UVCHR_IS_INVARIANT(uv)) {
3797 if (!d_is_utf8 && uv > 255) {
3799 /* Here, 'uv' won't fit unless we convert to UTF-8.
3800 * If we've only seen invariants so far, all we have to
3801 * do is turn on the flag */
3802 if (utf8_variant_count == 0) {
3806 SvCUR_set(sv, d - SvPVX_const(sv));
3810 sv_utf8_upgrade_flags_grow(
3812 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3814 /* Since we're having to grow here,
3815 * make sure we have enough room for
3816 * this escape and a NUL, so the
3817 * code immediately below won't have
3818 * to actually grow again */
3820 + (STRLEN)(send - s) + 1);
3821 d = SvPVX(sv) + SvCUR(sv);
3824 has_above_latin1 = TRUE;
3830 utf8_variant_count++;
3833 /* Usually, there will already be enough room in 'sv'
3834 * since such escapes are likely longer than any UTF-8
3835 * sequence they can end up as. This isn't the case on
3836 * EBCDIC where \x{40000000} contains 12 bytes, and the
3837 * UTF-8 for it contains 14. And, we have to allow for
3838 * a trailing NUL. It probably can't happen on ASCII
3839 * platforms, but be safe. See Note on sizing above. */
3840 const STRLEN needed = d - SvPVX(sv)
3844 if (UNLIKELY(needed > SvLEN(sv))) {
3845 SvCUR_set(sv, d - SvPVX_const(sv));
3846 d = SvCUR(sv) + SvGROW(sv, needed);
3849 d = (char*) uvchr_to_utf8_flags((U8*)d, uv,
3850 (ckWARN(WARN_PORTABLE))
3851 ? UNICODE_WARN_PERL_EXTENDED
3856 non_portable_endpoint++;
3861 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3862 * named character, like \N{LATIN SMALL LETTER A}, or a named
3863 * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3864 * GRAVE} (except y/// can't handle the latter, croaking). For
3865 * convenience all three forms are referred to as "named
3866 * characters" below.
3868 * For patterns, \N also can mean to match a non-newline. Code
3869 * before this 'switch' statement should already have handled
3870 * this situation, and hence this code only has to deal with
3871 * the named character cases.
3873 * For non-patterns, the named characters are converted to
3874 * their string equivalents. In patterns, named characters are
3875 * not converted to their ultimate forms for the same reasons
3876 * that other escapes aren't (mainly that the ultimate
3877 * character could be considered a meta-symbol by the regex
3878 * compiler). Instead, they are converted to the \N{U+...}
3879 * form to get the value from the charnames that is in effect
3880 * right now, while preserving the fact that it was a named
3881 * character, so that the regex compiler knows this.
3883 * The structure of this section of code (besides checking for
3884 * errors and upgrading to utf8) is:
3885 * If the named character is of the form \N{U+...}, pass it
3886 * through if a pattern; otherwise convert the code point
3888 * Otherwise must be some \N{NAME}: convert to
3889 * \N{U+c1.c2...} if a pattern; otherwise convert to utf8
3891 * Transliteration is an exception. The conversion to utf8 is
3892 * only done if the code point requires it to be representable.
3894 * Here, 's' points to the 'N'; the test below is guaranteed to
3895 * succeed if we are being called on a pattern, as we already
3896 * know from a test above that the next character is a '{'. A
3897 * non-pattern \N must mean 'named character', which requires
3901 yyerror("Missing braces on \\N{}");
3907 /* If there is no matching '}', it is an error. */
3908 if (! (rbrace = (char *) memchr(s, '}', send - s))) {
3909 if (! PL_lex_inpat) {
3910 yyerror("Missing right brace on \\N{}");
3912 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3914 yyquit(); /* Have exhausted the input. */
3917 /* Here it looks like a named character */
3918 while (s < rbrace && isBLANK(*s)) {
3923 while (s < e && isBLANK(*(e - 1))) {
3927 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3928 s += 2; /* Skip to next char after the 'U+' */
3931 /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3932 /* Check the syntax. */
3933 if (!isXDIGIT(*s)) {
3936 "Invalid hexadecimal number in \\N{U+...}"
3945 else if ((*s == '.' || *s == '_')
3951 /* Pass everything through unchanged.
3952 * +1 is to include the '}' */
3953 Copy(bslash, d, rbrace - bslash + 1, char);
3954 d += rbrace - bslash + 1;
3956 else { /* Not a pattern: convert the hex to string */
3957 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3958 | PERL_SCAN_SILENT_ILLDIGIT
3959 | PERL_SCAN_SILENT_OVERFLOW
3960 | PERL_SCAN_DISALLOW_PREFIX;
3963 uv = grok_hex(s, &len, &flags, NULL);
3964 if (len == 0 || (len != (STRLEN)(e - s)))
3967 if ( uv > MAX_LEGAL_CP
3968 || (flags & PERL_SCAN_GREATER_THAN_UV_MAX))
3970 yyerror(form_cp_too_large_msg(16, s, len, 0));
3971 uv = 0; /* drop through to ensure range ends are
3975 /* For non-tr///, if the destination is not in utf8,
3976 * unconditionally recode it to be so. This is
3977 * because \N{} implies Unicode semantics, and scalars
3978 * have to be in utf8 to guarantee those semantics.
3979 * tr/// doesn't care about Unicode rules, so no need
3980 * there to upgrade to UTF-8 for small enough code
3982 if (! d_is_utf8 && ( uv > 0xFF
3983 || PL_lex_inwhat != OP_TRANS))
3985 /* See Note on sizing above. */
3986 const STRLEN extra = OFFUNISKIP(uv) + (send - rbrace) + 1;
3988 SvCUR_set(sv, d - SvPVX_const(sv));
3992 if (utf8_variant_count == 0) {
3994 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
3997 sv_utf8_upgrade_flags_grow(
3999 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4001 d = SvPVX(sv) + SvCUR(sv);
4005 has_above_latin1 = TRUE;
4008 /* Add the (Unicode) code point to the output. */
4009 if (! d_is_utf8 || OFFUNI_IS_INVARIANT(uv)) {
4010 *d++ = (char) LATIN1_TO_NATIVE(uv);
4013 d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv,
4014 (ckWARN(WARN_PORTABLE))
4015 ? UNICODE_WARN_PERL_EXTENDED
4020 else /* Here is \N{NAME} but not \N{U+...}. */
4021 if (! (res = get_and_check_backslash_N_name_wrapper(s, e)))
4022 { /* Failed. We should die eventually, but for now use a NUL
4026 else { /* Successfully evaluated the name */
4028 const char *str = SvPV_const(res, len);
4031 if (! len) { /* The name resolved to an empty string */
4032 const char empty_N[] = "\\N{_}";
4033 Copy(empty_N, d, sizeof(empty_N) - 1, char);
4034 d += sizeof(empty_N) - 1;
4037 /* In order to not lose information for the regex
4038 * compiler, pass the result in the specially made
4039 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
4040 * the code points in hex of each character
4041 * returned by charnames */
4043 const char *str_end = str + len;
4044 const STRLEN off = d - SvPVX_const(sv);
4046 if (! SvUTF8(res)) {
4047 /* For the non-UTF-8 case, we can determine the
4048 * exact length needed without having to parse
4049 * through the string. Each character takes up
4050 * 2 hex digits plus either a trailing dot or
4052 const char initial_text[] = "\\N{U+";
4053 const STRLEN initial_len = sizeof(initial_text)
4055 d = off + SvGROW(sv, off
4058 /* +1 for trailing NUL */
4061 + (STRLEN)(send - rbrace));
4062 Copy(initial_text, d, initial_len, char);
4064 while (str < str_end) {
4067 my_snprintf(hex_string,
4071 /* The regex compiler is
4072 * expecting Unicode, not
4074 NATIVE_TO_LATIN1(*str));
4075 PERL_MY_SNPRINTF_POST_GUARD(len,
4076 sizeof(hex_string));
4077 Copy(hex_string, d, 3, char);
4081 d--; /* Below, we will overwrite the final
4082 dot with a right brace */
4085 STRLEN char_length; /* cur char's byte length */
4087 /* and the number of bytes after this is
4088 * translated into hex digits */
4089 STRLEN output_length;
4091 /* 2 hex per byte; 2 chars for '\N'; 2 chars
4092 * for max('U+', '.'); and 1 for NUL */
4093 char hex_string[2 * UTF8_MAXBYTES + 5];
4095 /* Get the first character of the result. */
4096 U32 uv = utf8n_to_uvchr((U8 *) str,
4100 /* Convert first code point to Unicode hex,
4101 * including the boiler plate before it. */
4103 my_snprintf(hex_string, sizeof(hex_string),
4105 (unsigned int) NATIVE_TO_UNI(uv));
4107 /* Make sure there is enough space to hold it */
4108 d = off + SvGROW(sv, off
4110 + (STRLEN)(send - rbrace)
4111 + 2); /* '}' + NUL */
4113 Copy(hex_string, d, output_length, char);
4116 /* For each subsequent character, append dot and
4117 * its Unicode code point in hex */
4118 while ((str += char_length) < str_end) {
4119 const STRLEN off = d - SvPVX_const(sv);
4120 U32 uv = utf8n_to_uvchr((U8 *) str,
4125 my_snprintf(hex_string,
4128 (unsigned int) NATIVE_TO_UNI(uv));
4130 d = off + SvGROW(sv, off
4132 + (STRLEN)(send - rbrace)
4133 + 2); /* '}' + NUL */
4134 Copy(hex_string, d, output_length, char);
4139 *d++ = '}'; /* Done. Add the trailing brace */
4142 else { /* Here, not in a pattern. Convert the name to a
4145 if (PL_lex_inwhat == OP_TRANS) {
4146 str = SvPV_const(res, len);
4147 if (len > ((SvUTF8(res))
4151 yyerror(Perl_form(aTHX_
4152 "%.*s must not be a named sequence"
4153 " in transliteration operator",
4154 /* +1 to include the "}" */
4155 (int) (rbrace + 1 - start), start));
4157 goto end_backslash_N;
4160 if (SvUTF8(res) && UTF8_IS_ABOVE_LATIN1(*str)) {
4161 has_above_latin1 = TRUE;
4165 else if (! SvUTF8(res)) {
4166 /* Make sure \N{} return is UTF-8. This is because
4167 * \N{} implies Unicode semantics, and scalars have
4168 * to be in utf8 to guarantee those semantics; but
4169 * not needed in tr/// */
4170 sv_utf8_upgrade_flags(res, 0);
4171 str = SvPV_const(res, len);
4174 /* Upgrade destination to be utf8 if this new
4176 if (! d_is_utf8 && SvUTF8(res)) {
4177 /* See Note on sizing above. */
4178 const STRLEN extra = len + (send - s) + 1;
4180 SvCUR_set(sv, d - SvPVX_const(sv));
4184 if (utf8_variant_count == 0) {
4186 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
4189 sv_utf8_upgrade_flags_grow(sv,
4190 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4192 d = SvPVX(sv) + SvCUR(sv);
4195 } else if (len > (STRLEN)(e - s + 4)) { /* +4 is for \N{} */
4197 /* See Note on sizing above. (NOTE: SvCUR() is not
4198 * set correctly here). */
4199 const STRLEN extra = len + (send - rbrace) + 1;
4200 const STRLEN off = d - SvPVX_const(sv);
4201 d = off + SvGROW(sv, off + extra);
4203 Copy(str, d, len, char);
4209 } /* End \N{NAME} */
4213 backslash_N++; /* \N{} is defined to be Unicode */
4215 s = rbrace + 1; /* Point to just after the '}' */
4218 /* \c is a control character */
4222 const char * message;
4224 if (! grok_bslash_c(*s, (U8 *) d, &message, NULL)) {
4226 yyquit(); /* Have always immediately croaked on
4232 yyerror("Missing control char name in \\c");
4233 yyquit(); /* Are at end of input, no sense continuing */
4236 non_portable_endpoint++;
4240 /* printf-style backslashes, formfeeds, newlines, etc */
4266 } /* end if (backslash) */
4269 /* Just copy the input to the output, though we may have to convert
4272 * If the input has the same representation in UTF-8 as not, it will be
4273 * a single byte, and we don't care about UTF8ness; just copy the byte */
4274 if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
4277 else if (! s_is_utf8 && ! d_is_utf8) {
4278 /* If neither source nor output is UTF-8, is also a single byte,
4279 * just copy it; but this byte counts should we later have to
4280 * convert to UTF-8 */
4282 utf8_variant_count++;
4284 else if (s_is_utf8 && d_is_utf8) { /* Both UTF-8, can just copy */
4285 const STRLEN len = UTF8SKIP(s);
4287 /* We expect the source to have already been checked for
4289 assert(isUTF8_CHAR((U8 *) s, (U8 *) send));
4291 Copy(s, d, len, U8);
4295 else if (s_is_utf8) { /* UTF8ness matters: convert output to utf8 */
4296 STRLEN need = send - s + 1; /* See Note on sizing above. */
4298 SvCUR_set(sv, d - SvPVX_const(sv));
4302 if (utf8_variant_count == 0) {
4304 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
4307 sv_utf8_upgrade_flags_grow(sv,
4308 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4310 d = SvPVX(sv) + SvCUR(sv);
4313 goto default_action; /* Redo, having upgraded so both are UTF-8 */
4315 else { /* UTF8ness matters: convert this non-UTF8 source char to
4316 UTF-8 for output. It will occupy 2 bytes, but don't include
4317 the input byte since we haven't incremented 's' yet. See
4318 Note on sizing above. */
4319 const STRLEN off = d - SvPVX(sv);
4320 const STRLEN extra = 2 + (send - s - 1) + 1;
4321 if (off + extra > SvLEN(sv)) {
4322 d = off + SvGROW(sv, off + extra);
4324 *d++ = UTF8_EIGHT_BIT_HI(*s);
4325 *d++ = UTF8_EIGHT_BIT_LO(*s);
4328 } /* while loop to process each character */
4331 const STRLEN off = d - SvPVX(sv);
4333 /* See if room for the terminating NUL */
4334 if (UNLIKELY(off >= SvLEN(sv))) {
4338 if (off > SvLEN(sv))
4340 Perl_croak(aTHX_ "panic: constant overflowed allocated space,"
4341 " %" UVuf " >= %" UVuf, (UV)off, (UV)SvLEN(sv));
4343 /* Whew! Here we don't have room for the terminating NUL, but
4344 * everything else so far has fit. It's not too late to grow
4345 * to fit the NUL and continue on. But it is a bug, as the code
4346 * above was supposed to have made room for this, so under
4347 * DEBUGGING builds, we panic anyway. */
4348 d = off + SvGROW(sv, off + 1);
4352 /* terminate the string and set up the sv */
4354 SvCUR_set(sv, d - SvPVX_const(sv));
4361 /* shrink the sv if we allocated more than we used */
4362 if (SvCUR(sv) + 5 < SvLEN(sv)) {
4363 SvPV_shrink_to_cur(sv);
4366 /* return the substring (via pl_yylval) only if we parsed anything */
4369 for (; s2 < s; s2++) {
4371 COPLINE_INC_WITH_HERELINES;
4373 SvREFCNT_inc_simple_void_NN(sv);
4374 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
4375 && ! PL_parser->lex_re_reparsing)
4377 const char *const key = PL_lex_inpat ? "qr" : "q";
4378 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
4382 if (PL_lex_inwhat == OP_TRANS) {
4385 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
4388 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
4396 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
4397 type, typelen, NULL);
4399 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
4401 LEAVE_with_name("scan_const");
4406 * Returns TRUE if there's more to the expression (e.g., a subscript),
4409 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
4411 * ->[ and ->{ return TRUE
4412 * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
4413 * { and [ outside a pattern are always subscripts, so return TRUE
4414 * if we're outside a pattern and it's not { or [, then return FALSE
4415 * if we're in a pattern and the first char is a {
4416 * {4,5} (any digits around the comma) returns FALSE
4417 * if we're in a pattern and the first char is a [
4419 * [SOMETHING] has a funky algorithm to decide whether it's a
4420 * character class or not. It has to deal with things like
4421 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
4422 * anything else returns TRUE
4425 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
4428 S_intuit_more(pTHX_ char *s, char *e)
4430 PERL_ARGS_ASSERT_INTUIT_MORE;
4432 if (PL_lex_brackets)
4434 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
4436 if (*s == '-' && s[1] == '>'
4437 && FEATURE_POSTDEREF_QQ_IS_ENABLED
4438 && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
4439 ||(s[2] == '@' && memCHRs("*[{",s[3])) ))
4441 if (*s != '{' && *s != '[')
4443 PL_parser->sub_no_recover = TRUE;
4447 /* In a pattern, so maybe we have {n,m}. */
4449 if (regcurly(s, e, NULL)) {
4455 /* On the other hand, maybe we have a character class */
4458 if (*s == ']' || *s == '^')
4461 /* this is terrifying, and it works */
4464 const char * const send = (char *) memchr(s, ']', e - s);
4465 unsigned char un_char, last_un_char;
4466 char tmpbuf[sizeof PL_tokenbuf * 4];
4468 if (!send) /* has to be an expression */
4470 weight = 2; /* let's weigh the evidence */
4474 else if (isDIGIT(*s)) {
4476 if (isDIGIT(s[1]) && s[2] == ']')
4482 Zero(seen,256,char);
4484 for (; s < send; s++) {
4485 last_un_char = un_char;
4486 un_char = (unsigned char)*s;
4491 weight -= seen[un_char] * 10;
4492 if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
4494 scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4495 len = (int)strlen(tmpbuf);
4496 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
4497 UTF ? SVf_UTF8 : 0, SVt_PV))
4504 && memCHRs("[#!%*<>()-=",s[1]))
4506 if (/*{*/ memCHRs("])} =",s[2]))
4515 if (memCHRs("wds]",s[1]))
4517 else if (seen[(U8)'\''] || seen[(U8)'"'])
4519 else if (memCHRs("rnftbxcav",s[1]))
4521 else if (isDIGIT(s[1])) {
4523 while (s[1] && isDIGIT(s[1]))
4533 if (memCHRs("aA01! ",last_un_char))
4535 if (memCHRs("zZ79~",s[1]))
4537 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4538 weight -= 5; /* cope with negative subscript */
4541 if (!isWORDCHAR(last_un_char)
4542 && !(last_un_char == '$' || last_un_char == '@'
4543 || last_un_char == '&')
4544 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4548 if (keyword(d, s - d, 0))
4551 if (un_char == last_un_char + 1)
4553 weight -= seen[un_char];
4558 if (weight >= 0) /* probably a character class */
4568 * Does all the checking to disambiguate
4570 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
4571 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4573 * First argument is the stuff after the first token, e.g. "bar".
4575 * Not a method if foo is a filehandle.
4576 * Not a method if foo is a subroutine prototyped to take a filehandle.
4577 * Not a method if it's really "Foo $bar"
4578 * Method if it's "foo $bar"
4579 * Not a method if it's really "print foo $bar"
4580 * Method if it's really "foo package::" (interpreted as package->foo)
4581 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4582 * Not a method if bar is a filehandle or package, but is quoted with
4587 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
4589 char *s = start + (*start == '$');
4590 char tmpbuf[sizeof PL_tokenbuf];
4593 /* Mustn't actually add anything to a symbol table.
4594 But also don't want to "initialise" any placeholder
4595 constants that might already be there into full
4596 blown PVGVs with attached PVCV. */
4598 ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
4600 PERL_ARGS_ASSERT_INTUIT_METHOD;
4602 if (!FEATURE_INDIRECT_IS_ENABLED)
4605 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4607 if (cv && SvPOK(cv)) {
4608 const char *proto = CvPROTO(cv);
4610 while (*proto && (isSPACE(*proto) || *proto == ';'))
4617 if (*start == '$') {
4618 SSize_t start_off = start - SvPVX(PL_linestr);
4619 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
4620 || isUPPER(*PL_tokenbuf))
4622 /* this could be $# */
4625 PL_bufptr = SvPVX(PL_linestr) + start_off;
4627 return *s == '(' ? FUNCMETH : METHOD;
4630 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4631 /* start is the beginning of the possible filehandle/object,
4632 * and s is the end of it
4633 * tmpbuf is a copy of it (but with single quotes as double colons)
4636 if (!keyword(tmpbuf, len, 0)) {
4637 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4642 indirgv = gv_fetchpvn_flags(tmpbuf, len,
4643 GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
4645 if (indirgv && SvTYPE(indirgv) != SVt_NULL
4646 && (!isGV(indirgv) || GvCVu(indirgv)))
4648 /* filehandle or package name makes it a method */
4649 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4651 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4652 return 0; /* no assumptions -- "=>" quotes bareword */
4654 NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0,
4655 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4656 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4658 force_next(BAREWORD);
4660 return *s == '(' ? FUNCMETH : METHOD;
4666 /* Encoded script support. filter_add() effectively inserts a
4667 * 'pre-processing' function into the current source input stream.
4668 * Note that the filter function only applies to the current source file
4669 * (e.g., it will not affect files 'require'd or 'use'd by this one).
4671 * The datasv parameter (which may be NULL) can be used to pass
4672 * private data to this instance of the filter. The filter function
4673 * can recover the SV using the FILTER_DATA macro and use it to
4674 * store private buffers and state information.
4676 * The supplied datasv parameter is upgraded to a PVIO type
4677 * and the IoDIRP/IoANY field is used to store the function pointer,
4678 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4679 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4680 * private use must be set using malloc'd pointers.
4684 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4692 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4693 Perl_croak(aTHX_ "Source filters apply only to byte streams");
4695 if (!PL_rsfp_filters)
4696 PL_rsfp_filters = newAV();
4699 SvUPGRADE(datasv, SVt_PVIO);
4700 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4701 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4702 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4703 FPTR2DPTR(void *, IoANY(datasv)),
4704 SvPV_nolen(datasv)));
4705 av_unshift(PL_rsfp_filters, 1);
4706 av_store(PL_rsfp_filters, 0, datasv) ;
4708 !PL_parser->filtered
4709 && PL_parser->lex_flags & LEX_EVALBYTES
4710 && PL_bufptr < PL_bufend
4712 const char *s = PL_bufptr;
4713 while (s < PL_bufend) {
4715 SV *linestr = PL_parser->linestr;
4716 char *buf = SvPVX(linestr);
4717 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4718 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4719 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4720 STRLEN const linestart_pos = PL_parser->linestart - buf;
4721 STRLEN const last_uni_pos =
4722 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4723 STRLEN const last_lop_pos =
4724 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4725 av_push(PL_rsfp_filters, linestr);
4726 PL_parser->linestr =
4727 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4728 buf = SvPVX(PL_parser->linestr);
4729 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4730 PL_parser->bufptr = buf + bufptr_pos;
4731 PL_parser->oldbufptr = buf + oldbufptr_pos;
4732 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4733 PL_parser->linestart = buf + linestart_pos;
4734 if (PL_parser->last_uni)
4735 PL_parser->last_uni = buf + last_uni_pos;
4736 if (PL_parser->last_lop)
4737 PL_parser->last_lop = buf + last_lop_pos;
4738 SvLEN_set(linestr, SvCUR(linestr));
4739 SvCUR_set(linestr, s - SvPVX(linestr));
4740 PL_parser->filtered = 1;
4750 /* Delete most recently added instance of this filter function. */
4752 Perl_filter_del(pTHX_ filter_t funcp)
4756 PERL_ARGS_ASSERT_FILTER_DEL;
4759 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4760 FPTR2DPTR(void*, funcp)));
4762 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4764 /* if filter is on top of stack (usual case) just pop it off */
4765 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4766 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4767 sv_free(av_pop(PL_rsfp_filters));
4771 /* we need to search for the correct entry and clear it */
4772 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4776 /* Invoke the idxth filter function for the current rsfp. */
4777 /* maxlen 0 = read one text line */
4779 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4784 /* This API is bad. It should have been using unsigned int for maxlen.
4785 Not sure if we want to change the API, but if not we should sanity
4786 check the value here. */
4787 unsigned int correct_length = maxlen < 0 ? PERL_INT_MAX : maxlen;
4789 PERL_ARGS_ASSERT_FILTER_READ;
4791 if (!PL_parser || !PL_rsfp_filters)
4793 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
4794 /* Provide a default input filter to make life easy. */
4795 /* Note that we append to the line. This is handy. */
4796 DEBUG_P(PerlIO_printf(Perl_debug_log,
4797 "filter_read %d: from rsfp\n", idx));
4798 if (correct_length) {
4801 const int old_len = SvCUR(buf_sv);
4803 /* ensure buf_sv is large enough */
4804 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4805 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4806 correct_length)) <= 0) {
4807 if (PerlIO_error(PL_rsfp))
4808 return -1; /* error */
4810 return 0 ; /* end of file */
4812 SvCUR_set(buf_sv, old_len + len) ;
4813 SvPVX(buf_sv)[old_len + len] = '\0';
4816 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4817 if (PerlIO_error(PL_rsfp))
4818 return -1; /* error */
4820 return 0 ; /* end of file */
4823 return SvCUR(buf_sv);
4825 /* Skip this filter slot if filter has been deleted */
4826 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4827 DEBUG_P(PerlIO_printf(Perl_debug_log,
4828 "filter_read %d: skipped (filter deleted)\n",
4830 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4832 if (SvTYPE(datasv) != SVt_PVIO) {
4833 if (correct_length) {
4835 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4836 if (!remainder) return 0; /* eof */
4837 if (correct_length > remainder) correct_length = remainder;
4838 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4839 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4842 const char *s = SvEND(datasv);
4843 const char *send = SvPVX(datasv) + SvLEN(datasv);
4851 if (s == send) return 0; /* eof */
4852 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4853 SvCUR_set(datasv, s-SvPVX(datasv));
4855 return SvCUR(buf_sv);
4857 /* Get function pointer hidden within datasv */
4858 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4859 DEBUG_P(PerlIO_printf(Perl_debug_log,
4860 "filter_read %d: via function %p (%s)\n",
4861 idx, (void*)datasv, SvPV_nolen_const(datasv)));
4862 /* Call function. The function is expected to */
4863 /* call "FILTER_READ(idx+1, buf_sv)" first. */
4864 /* Return: <0:error, =0:eof, >0:not eof */
4866 save_scalar(PL_errgv);
4867 ret = (*funcp)(aTHX_ idx, buf_sv, correct_length);
4873 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4875 PERL_ARGS_ASSERT_FILTER_GETS;
4877 #ifdef PERL_CR_FILTER
4878 if (!PL_rsfp_filters) {
4879 filter_add(S_cr_textfilter,NULL);
4882 if (PL_rsfp_filters) {
4884 SvCUR_set(sv, 0); /* start with empty line */
4885 if (FILTER_READ(0, sv, 0) > 0)
4886 return ( SvPVX(sv) ) ;
4891 return (sv_gets(sv, PL_rsfp, append));
4895 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4899 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4901 if (memEQs(pkgname, len, "__PACKAGE__"))
4905 && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
4906 && (gv = gv_fetchpvn_flags(pkgname,
4908 ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4910 return GvHV(gv); /* Foo:: */
4913 /* use constant CLASS => 'MyClass' */
4914 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4915 if (gv && GvCV(gv)) {
4916 SV * const sv = cv_const_sv(GvCV(gv));
4918 return gv_stashsv(sv, 0);
4921 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4926 S_tokenize_use(pTHX_ int is_use, char *s) {
4927 PERL_ARGS_ASSERT_TOKENIZE_USE;
4929 if (PL_expect != XSTATE)
4930 /* diag_listed_as: "use" not allowed in expression */
4931 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4932 is_use ? "use" : "no"));
4935 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4936 s = force_version(s, TRUE);
4937 if (*s == ';' || *s == '}'
4938 || (s = skipspace(s), (*s == ';' || *s == '}'))) {
4939 NEXTVAL_NEXTTOKE.opval = NULL;
4940 force_next(BAREWORD);
4942 else if (*s == 'v') {
4943 s = force_word(s,BAREWORD,FALSE,TRUE);
4944 s = force_version(s, FALSE);
4948 s = force_word(s,BAREWORD,FALSE,TRUE);
4949 s = force_version(s, FALSE);
4951 pl_yylval.ival = is_use;
4955 static const char* const exp_name[] =
4956 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4957 "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
4958 "SIGVAR", "TERMORDORDOR"
4962 #define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
4964 S_word_takes_any_delimiter(char *p, STRLEN len)
4966 return (len == 1 && memCHRs("msyq", p[0]))
4968 && ((p[0] == 't' && p[1] == 'r')
4969 || (p[0] == 'q' && memCHRs("qwxr", p[1]))));
4973 S_check_scalar_slice(pTHX_ char *s)
4976 while (SPACE_OR_TAB(*s)) s++;
4977 if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2,
4983 while ( isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
4984 || (*s && memCHRs(" \t$#+-'\"", *s)))
4986 s += UTF ? UTF8SKIP(s) : 1;
4988 if (*s == '}' || *s == ']')
4989 pl_yylval.ival = OPpSLICEWARNING;
4992 #define lex_token_boundary() S_lex_token_boundary(aTHX)
4994 S_lex_token_boundary(pTHX)
4996 PL_oldoldbufptr = PL_oldbufptr;
4997 PL_oldbufptr = PL_bufptr;
5000 #define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s)
5002 S_vcs_conflict_marker(pTHX_ char *s)
5004 lex_token_boundary();
5006 yyerror("Version control conflict marker");
5007 while (s < PL_bufend && *s != '\n')
5013 yyl_sigvar(pTHX_ char *s)
5015 /* we expect the sigil and optional var name part of a
5016 * signature element here. Since a '$' is not necessarily
5017 * followed by a var name, handle it specially here; the general
5018 * yylex code would otherwise try to interpret whatever follows
5019 * as a var; e.g. ($, ...) would be seen as the var '$,'
5026 PL_bufptr = s; /* for error reporting */
5031 /* spot stuff that looks like an prototype */
5032 if (memCHRs("$:@%&*;\\[]", *s)) {
5033 yyerror("Illegal character following sigil in a subroutine signature");
5036 /* '$#' is banned, while '$ # comment' isn't */
5038 yyerror("'#' not allowed immediately following a sigil in a subroutine signature");
5042 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5043 char *dest = PL_tokenbuf + 1;
5044 /* read var name, including sigil, into PL_tokenbuf */
5045 PL_tokenbuf[0] = sigil;
5046 parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
5047 0, cBOOL(UTF), FALSE, FALSE);
5049 assert(PL_tokenbuf[1]); /* we have a variable name */
5057 /* parse the = for the default ourselves to avoid '+=' etc being accepted here
5058 * as the ASSIGNOP, and exclude other tokens that start with =
5060 if (*s == '=' && (!s[1] || memCHRs("=~>", s[1]) == 0)) {
5061 /* save now to report with the same context as we did when
5062 * all ASSIGNOPS were accepted */
5066 NEXTVAL_NEXTTOKE.ival = 0;
5067 force_next(ASSIGNOP);
5070 else if (*s == ',' || *s == ')') {
5071 PL_expect = XOPERATOR;
5074 /* make sure the context shows the unexpected character and
5075 * hopefully a bit more */
5077 while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5079 PL_bufptr = s; /* for error reporting */
5080 yyerror("Illegal operator following parameter in a subroutine signature");
5084 NEXTVAL_NEXTTOKE.ival = sigil;
5085 force_next('p'); /* force a signature pending identifier */
5092 case ',': /* handle ($a,,$b) */
5097 yyerror("A signature parameter must start with '$', '@' or '%'");
5098 /* very crude error recovery: skip to likely next signature
5100 while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5106 case ',': TOKEN (PERLY_COMMA);
5107 case '$': TOKEN (PERLY_DOLLAR);
5108 case '@': TOKEN (PERLY_SNAIL);
5109 case '%': TOKEN (PERLY_PERCENT_SIGN);
5110 case ')': TOKEN (PERLY_PAREN_CLOSE);
5111 default: TOKEN (sigil);
5116 yyl_dollar(pTHX_ char *s)
5120 if (PL_expect == XPOSTDEREF) {
5123 POSTDEREF(DOLSHARP);
5125 POSTDEREF(PERLY_DOLLAR);
5129 && ( isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
5130 || memCHRs("{$:+-@", s[2])))
5132 PL_tokenbuf[0] = '@';
5133 s = scan_ident(s + 1, PL_tokenbuf + 1,
5134 sizeof PL_tokenbuf - 1, FALSE);
5135 if (PL_expect == XOPERATOR) {
5137 if (PL_bufptr > s) {
5139 PL_bufptr = PL_oldbufptr;
5141 no_op("Array length", d);
5143 if (!PL_tokenbuf[1])
5145 PL_expect = XOPERATOR;
5146 force_ident_maybe_lex('#');
5150 PL_tokenbuf[0] = '$';
5151 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5152 if (PL_expect == XOPERATOR) {
5154 if (PL_bufptr > s) {
5156 PL_bufptr = PL_oldbufptr;
5160 if (!PL_tokenbuf[1]) {
5162 yyerror("Final $ should be \\$ or $name");
5163 PREREF(PERLY_DOLLAR);
5167 const char tmp = *s;
5168 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5171 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5172 && intuit_more(s, PL_bufend)) {
5174 PL_tokenbuf[0] = '@';
5175 if (ckWARN(WARN_SYNTAX)) {
5178 while ( t < PL_bufend ) {
5180 do { t += UTF ? UTF8SKIP(t) : 1; } while (t < PL_bufend && isSPACE(*t));
5181 /* consumed one or more space chars */
5182 } else if (*t == '$' || *t == '@') {
5183 /* could be more than one '$' like $$ref or @$ref */
5184 do { t++; } while (t < PL_bufend && *t == '$');
5186 /* could be an abigail style identifier like $ foo */
5187 while (t < PL_bufend && *t == ' ') t++;
5189 /* strip off the name of the var */
5190 while (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
5191 t += UTF ? UTF8SKIP(t) : 1;
5192 /* consumed a varname */
5193 } else if (isDIGIT(*t)) {
5194 /* deal with hex constants like 0x11 */
5195 if (t[0] == '0' && t[1] == 'x') {
5197 while (t < PL_bufend && isXDIGIT(*t)) t++;
5199 /* deal with decimal/octal constants like 1 and 0123 */
5200 do { t++; } while (isDIGIT(*t));
5201 if (t<PL_bufend && *t == '.') {
5202 do { t++; } while (isDIGIT(*t));
5205 /* consumed a number */
5207 /* not a var nor a space nor a number */
5211 if (t < PL_bufend && *t++ == ',') {
5212 PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
5213 while (t < PL_bufend && *t != ']')
5215 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5216 "Multidimensional syntax %" UTF8f " not supported",
5217 UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
5221 else if (*s == '{') {
5223 PL_tokenbuf[0] = '%';
5224 if ( strEQ(PL_tokenbuf+1, "SIG")
5225 && ckWARN(WARN_SYNTAX)
5226 && (t = (char *) memchr(s, '}', PL_bufend - s))
5227 && (t = (char *) memchr(t, '=', PL_bufend - t)))
5229 char tmpbuf[sizeof PL_tokenbuf];
5232 } while (isSPACE(*t));
5233 if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
5235 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
5240 && get_cvn_flags(tmpbuf, len, UTF
5244 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5245 "You need to quote \"%" UTF8f "\"",
5246 UTF8fARG(UTF, len, tmpbuf));
5253 PL_expect = XOPERATOR;
5254 if ((PL_lex_state == LEX_NORMAL || PL_lex_brackets) && isSPACE((char)tmp)) {
5255 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5256 if (!islop || PL_last_lop_op == OP_GREPSTART)
5257 PL_expect = XOPERATOR;
5258 else if (memCHRs("$@\"'`q", *s))
5259 PL_expect = XTERM; /* e.g. print $fh "foo" */
5260 else if ( memCHRs("&*<%", *s)
5261 && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
5263 PL_expect = XTERM; /* e.g. print $fh &sub */
5265 else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5266 char tmpbuf[sizeof PL_tokenbuf];
5269 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5270 if ((t2 = keyword(tmpbuf, len, 0))) {
5271 /* binary operators exclude handle interpretations */
5283 PL_expect = XTERM; /* e.g. print $fh length() */
5288 PL_expect = XTERM; /* e.g. print $fh subr() */
5291 else if (isDIGIT(*s))
5292 PL_expect = XTERM; /* e.g. print $fh 3 */
5293 else if (*s == '.' && isDIGIT(s[1]))
5294 PL_expect = XTERM; /* e.g. print $fh .3 */
5295 else if ((*s == '?' || *s == '-' || *s == '+')
5296 && !isSPACE(s[1]) && s[1] != '=')
5297 PL_expect = XTERM; /* e.g. print $fh -1 */
5298 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5300 PL_expect = XTERM; /* e.g. print $fh /.../
5301 XXX except DORDOR operator
5303 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5305 PL_expect = XTERM; /* print $fh <<"EOF" */
5308 force_ident_maybe_lex('$');
5309 TOKEN(PERLY_DOLLAR);
5313 yyl_sub(pTHX_ char *s, const int key)
5315 char * const tmpbuf = PL_tokenbuf + 1;
5316 bool have_name, have_proto;
5318 SV *format_name = NULL;
5319 bool is_sigsub = FEATURE_SIGNATURES_IS_ENABLED;
5321 SSize_t off = s-SvPVX(PL_linestr);
5324 s = skipspace(s); /* can move PL_linestr */
5326 d = SvPVX(PL_linestr)+off;
5328 SAVEBOOL(PL_parser->sig_seen);
5329 PL_parser->sig_seen = FALSE;
5331 if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
5333 || (*s == ':' && s[1] == ':'))
5336 PL_expect = XATTRBLOCK;
5337 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
5339 if (key == KEY_format)
5340 format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
5342 if (memchr(tmpbuf, ':', len) || key != KEY_sub
5344 PL_tokenbuf, len + 1, 0
5346 sv_setpvn(PL_subname, tmpbuf, len);
5348 sv_setsv(PL_subname,PL_curstname);
5349 sv_catpvs(PL_subname,"::");
5350 sv_catpvn(PL_subname,tmpbuf,len);
5352 if (SvUTF8(PL_linestr))
5353 SvUTF8_on(PL_subname);
5359 if (key == KEY_my || key == KEY_our || key==KEY_state) {
5361 /* diag_listed_as: Missing name in "%s sub" */
5363 "Missing name in \"%s\"", PL_bufptr);
5365 PL_expect = XATTRTERM;
5366 sv_setpvs(PL_subname,"?");
5370 if (key == KEY_format) {
5372 NEXTVAL_NEXTTOKE.opval
5373 = newSVOP(OP_CONST,0, format_name);
5374 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
5375 force_next(BAREWORD);
5380 /* Look for a prototype */
5381 if (*s == '(' && !is_sigsub) {
5382 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5384 Perl_croak(aTHX_ "Prototype not terminated");
5385 COPLINE_SET_FROM_MULTI_END;
5386 (void)validate_proto(PL_subname, PL_lex_stuff,
5387 ckWARN(WARN_ILLEGALPROTO), 0);
5395 if ( !(*s == ':' && s[1] != ':')
5396 && (*s != '{' && *s != '(') && key != KEY_format)
5398 assert(key == KEY_sub || key == KEY_AUTOLOAD ||
5399 key == KEY_DESTROY || key == KEY_BEGIN ||
5400 key == KEY_UNITCHECK || key == KEY_CHECK ||
5401 key == KEY_INIT || key == KEY_END ||
5402 key == KEY_my || key == KEY_state ||
5405 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5406 else if (*s != ';' && *s != '}')
5407 Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname));
5411 NEXTVAL_NEXTTOKE.opval =
5412 newSVOP(OP_CONST, 0, PL_lex_stuff);
5413 PL_lex_stuff = NULL;
5418 sv_setpvs(PL_subname, "__ANON__");
5420 sv_setpvs(PL_subname, "__ANON__::__ANON__");
5426 force_ident_maybe_lex('&');
5434 yyl_interpcasemod(pTHX_ char *s)
5437 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
5439 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
5440 PL_bufptr, PL_bufend, *PL_bufptr);
5443 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
5445 if (PL_lex_casemods) {
5446 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
5447 PL_lex_casestack[PL_lex_casemods] = '\0';
5449 if (PL_bufptr != PL_bufend
5450 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
5451 || oldmod == 'F')) {
5453 PL_lex_state = LEX_INTERPCONCAT;
5455 PL_lex_allbrackets--;
5456 return REPORT(PERLY_PAREN_CLOSE);
5458 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
5459 /* Got an unpaired \E */
5460 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
5461 "Useless use of \\E");
5463 if (PL_bufptr != PL_bufend)
5465 PL_lex_state = LEX_INTERPCONCAT;
5470 PerlIO_printf(Perl_debug_log, "### Saw case modifier\n");
5473 if (s[1] == '\\' && s[2] == 'E') {
5475 PL_lex_state = LEX_INTERPCONCAT;
5480 if ( memBEGINs(s, (STRLEN) (PL_bufend - s), "L\\u")
5481 || memBEGINs(s, (STRLEN) (PL_bufend - s), "U\\l"))
5483 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
5485 if ((*s == 'L' || *s == 'U' || *s == 'F')
5486 && (strpbrk(PL_lex_casestack, "LUF")))
5488 PL_lex_casestack[--PL_lex_casemods] = '\0';
5489 PL_lex_allbrackets--;
5490 return REPORT(PERLY_PAREN_CLOSE);
5492 if (PL_lex_casemods > 10)
5493 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
5494 PL_lex_casestack[PL_lex_casemods++] = *s;
5495 PL_lex_casestack[PL_lex_casemods] = '\0';
5496 PL_lex_state = LEX_INTERPCONCAT;
5497 NEXTVAL_NEXTTOKE.ival = 0;
5498 force_next((2<<24)|PERLY_PAREN_OPEN);
5500 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
5502 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
5504 NEXTVAL_NEXTTOKE.ival = OP_LC;
5506 NEXTVAL_NEXTTOKE.ival = OP_UC;
5508 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
5510 NEXTVAL_NEXTTOKE.ival = OP_FC;
5512 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
5516 if (PL_lex_starts) {
5519 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5520 if (PL_lex_casemods == 1 && PL_lex_inpat)
5523 AopNOASSIGN(OP_CONCAT);
5531 yyl_secondclass_keyword(pTHX_ char *s, STRLEN len, int key, I32 *orig_keyword,
5532 GV **pgv, GV ***pgvp)
5534 GV *ogv = NULL; /* override (winner) */
5535 GV *hgv = NULL; /* hidden (loser) */
5538 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5540 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5541 (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
5543 && (cv = GvCVu(gv)))
5545 if (GvIMPORTED_CV(gv))
5547 else if (! CvMETHOD(cv))
5551 && (*pgvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf, len, FALSE))
5553 && (isGV_with_GP(gv)
5554 ? GvCVu(gv) && GvIMPORTED_CV(gv)
5555 : SvPCS_IMPORTED(gv)
5556 && (gv_init(gv, PL_globalstash, PL_tokenbuf,
5566 *orig_keyword = key;
5567 return 0; /* overridden by import or by GLOBAL */
5569 else if (gv && !*pgvp
5570 && -key==KEY_lock /* XXX generalizable kludge */
5573 return 0; /* any sub overrides "weak" keyword */
5575 else { /* no override */
5577 if (key == KEY_dump) {
5578 Perl_croak(aTHX_ "dump() must be written as CORE::dump() as of Perl 5.30");
5582 if (hgv && key != KEY_x) /* never ambiguous */
5583 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5584 "Ambiguous call resolved as CORE::%s(), "
5585 "qualify as such or use &",
5592 yyl_qw(pTHX_ char *s, STRLEN len)
5596 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5598 missingterm(NULL, 0);
5600 COPLINE_SET_FROM_MULTI_END;
5601 PL_expect = XOPERATOR;
5602 if (SvCUR(PL_lex_stuff)) {
5603 int warned_comma = !ckWARN(WARN_QW);
5604 int warned_comment = warned_comma;
5605 char *d = SvPV_force(PL_lex_stuff, len);
5607 for (; isSPACE(*d) && len; --len, ++d)
5612 if (!warned_comma || !warned_comment) {
5613 for (; !isSPACE(*d) && len; --len, ++d) {
5614 if (!warned_comma && *d == ',') {
5615 Perl_warner(aTHX_ packWARN(WARN_QW),
5616 "Possible attempt to separate words with commas");
5619 else if (!warned_comment && *d == '#') {
5620 Perl_warner(aTHX_ packWARN(WARN_QW),
5621 "Possible attempt to put comments in qw() list");
5627 for (; !isSPACE(*d) && len; --len, ++d)
5630 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
5631 words = op_append_elem(OP_LIST, words,
5632 newSVOP(OP_CONST, 0, tokeq(sv)));
5637 words = newNULLLIST();
5638 SvREFCNT_dec_NN(PL_lex_stuff);
5639 PL_lex_stuff = NULL;
5640 PL_expect = XOPERATOR;
5641 pl_yylval.opval = sawparens(words);
5646 yyl_hyphen(pTHX_ char *s)
5648 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5656 while (s < PL_bufend && SPACE_OR_TAB(*s))
5659 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=>")) {
5660 s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE);
5661 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5662 OPERATOR(PERLY_MINUS); /* unary minus */
5665 case 'r': ftst = OP_FTEREAD; break;
5666 case 'w': ftst = OP_FTEWRITE; break;
5667 case 'x': ftst = OP_FTEEXEC; break;
5668 case 'o': ftst = OP_FTEOWNED; break;
5669 case 'R': ftst = OP_FTRREAD; break;
5670 case 'W': ftst = OP_FTRWRITE; break;
5671 case 'X': ftst = OP_FTREXEC; break;
5672 case 'O': ftst = OP_FTROWNED; break;
5673 case 'e': ftst = OP_FTIS; break;
5674 case 'z': ftst = OP_FTZERO; break;
5675 case 's': ftst = OP_FTSIZE; break;
5676 case 'f': ftst = OP_FTFILE; break;
5677 case 'd': ftst = OP_FTDIR; break;
5678 case 'l': ftst = OP_FTLINK; break;
5679 case 'p': ftst = OP_FTPIPE; break;
5680 case 'S': ftst = OP_FTSOCK; break;
5681 case 'u': ftst = OP_FTSUID; break;
5682 case 'g': ftst = OP_FTSGID; break;
5683 case 'k': ftst = OP_FTSVTX; break;
5684 case 'b': ftst = OP_FTBLK; break;
5685 case 'c': ftst = OP_FTCHR; break;
5686 case 't': ftst = OP_FTTTY; break;
5687 case 'T': ftst = OP_FTTEXT; break;
5688 case 'B': ftst = OP_FTBINARY; break;
5689 case 'M': case 'A': case 'C':
5690 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5692 case 'M': ftst = OP_FTMTIME; break;
5693 case 'A': ftst = OP_FTATIME; break;
5694 case 'C': ftst = OP_FTCTIME; break;
5702 PL_last_uni = PL_oldbufptr;
5703 PL_last_lop_op = (OPCODE)ftst;
5705 PerlIO_printf(Perl_debug_log, "### Saw file test %c\n", (int)tmp);
5710 /* Assume it was a minus followed by a one-letter named
5711 * subroutine call (or a -bareword), then. */
5713 PerlIO_printf(Perl_debug_log,
5714 "### '-%c' looked like a file test but was not\n",
5721 const char tmp = *s++;
5724 if (PL_expect == XOPERATOR)
5729 else if (*s == '>') {
5732 if (((*s == '$' || *s == '&') && s[1] == '*')
5733 ||(*s == '$' && s[1] == '#' && s[2] == '*')
5734 ||((*s == '@' || *s == '%') && memCHRs("*[{", s[1]))
5735 ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5738 PL_expect = XPOSTDEREF;
5741 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5742 s = force_word(s,METHOD,FALSE,TRUE);
5750 if (PL_expect == XOPERATOR) {
5752 && !PL_lex_allbrackets
5753 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5761 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5763 OPERATOR(PERLY_MINUS); /* unary minus */
5769 yyl_plus(pTHX_ char *s)
5771 const char tmp = *s++;
5774 if (PL_expect == XOPERATOR)
5779 if (PL_expect == XOPERATOR) {
5781 && !PL_lex_allbrackets
5782 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5790 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5792 OPERATOR(PERLY_PLUS);
5797 yyl_star(pTHX_ char *s)
5799 if (PL_expect == XPOSTDEREF)
5800 POSTDEREF(PERLY_STAR);
5802 if (PL_expect != XOPERATOR) {
5803 s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5804 PL_expect = XOPERATOR;
5805 force_ident(PL_tokenbuf, PERLY_STAR);
5814 if (*s == '=' && !PL_lex_allbrackets
5815 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5824 && !PL_lex_allbrackets
5825 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5835 yyl_percent(pTHX_ char *s)
5837 if (PL_expect == XOPERATOR) {
5839 && !PL_lex_allbrackets
5840 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5847 else if (PL_expect == XPOSTDEREF)
5848 POSTDEREF(PERLY_PERCENT_SIGN);
5850 PL_tokenbuf[0] = '%';
5851 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5853 if (!PL_tokenbuf[1]) {
5854 PREREF(PERLY_PERCENT_SIGN);
5856 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5857 && intuit_more(s, PL_bufend)) {
5859 PL_tokenbuf[0] = '@';
5861 PL_expect = XOPERATOR;
5862 force_ident_maybe_lex('%');
5863 TERM(PERLY_PERCENT_SIGN);
5867 yyl_caret(pTHX_ char *s)
5870 const bool bof = cBOOL(FEATURE_BITWISE_IS_ENABLED);
5871 if (bof && s[1] == '.')
5873 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5874 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5880 BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
5884 yyl_colon(pTHX_ char *s)
5888 switch (PL_expect) {
5890 if (!PL_in_my || (PL_lex_state != LEX_NORMAL && !PL_lex_brackets))
5892 PL_bufptr = s; /* update in case we back off */
5895 "Use of := for an empty attribute list is not allowed");
5902 PL_expect = XTERMBLOCK;
5904 /* NB: as well as parsing normal attributes, we also end up
5905 * here if there is something looking like attributes
5906 * following a signature (which is illegal, but used to be
5907 * legal in 5.20..5.26). If the latter, we still parse the
5908 * attributes so that error messages(s) are less confusing,
5909 * but ignore them (parser->sig_seen).
5913 while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5914 bool sig = PL_parser->sig_seen;
5918 char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5919 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5920 if (tmp < 0) tmp = -tmp;
5935 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5937 d = scan_str(d,TRUE,TRUE,FALSE,NULL);
5942 Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list");
5944 COPLINE_SET_FROM_MULTI_END;
5947 sv_catsv(sv, PL_lex_stuff);
5948 attrs = op_append_elem(OP_LIST, attrs,
5949 newSVOP(OP_CONST, 0, sv));
5950 SvREFCNT_dec_NN(PL_lex_stuff);
5951 PL_lex_stuff = NULL;
5954 /* NOTE: any CV attrs applied here need to be part of
5955 the CVf_BUILTIN_ATTRS define in cv.h! */
5956 if (!PL_in_my && memEQs(SvPVX(sv), len, "lvalue")) {
5959 CvLVALUE_on(PL_compcv);
5961 else if (!PL_in_my && memEQs(SvPVX(sv), len, "method")) {
5964 CvMETHOD_on(PL_compcv);
5966 else if (!PL_in_my && memEQs(SvPVX(sv), len, "const")) {
5969 Perl_ck_warner_d(aTHX_
5970 packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
5971 ":const is experimental"
5973 CvANONCONST_on(PL_compcv);
5974 if (!CvANON(PL_compcv))
5975 yyerror(":const is not permitted on named "
5979 /* After we've set the flags, it could be argued that
5980 we don't need to do the attributes.pm-based setting
5981 process, and shouldn't bother appending recognized
5982 flags. To experiment with that, uncomment the
5983 following "else". (Note that's already been
5984 uncommented. That keeps the above-applied built-in
5985 attributes from being intercepted (and possibly
5986 rejected) by a package's attribute routines, but is
5987 justified by the performance win for the common case
5988 of applying only built-in attributes.) */
5990 attrs = op_append_elem(OP_LIST, attrs,
5991 newSVOP(OP_CONST, 0,
5995 if (*s == ':' && s[1] != ':')
5998 break; /* require real whitespace or :'s */
5999 /* XXX losing whitespace on sequential attributes here */
6004 && !(PL_expect == XOPERATOR
6005 ? (*s == '=' || *s == ')')
6006 : (*s == '{' || *s == '(')))
6008 const char q = ((*s == '\'') ? '"' : '\'');
6009 /* If here for an expression, and parsed no attrs, back off. */
6010 if (PL_expect == XOPERATOR && !attrs) {
6014 /* MUST advance bufptr here to avoid bogus "at end of line"
6015 context messages from yyerror().
6018 yyerror( (const char *)
6020 ? Perl_form(aTHX_ "Invalid separator character "
6021 "%c%c%c in attribute list", q, *s, q)
6022 : "Unterminated attribute list" ) );
6025 OPERATOR(PERLY_COLON);
6029 if (PL_parser->sig_seen) {
6030 /* see comment about about sig_seen and parser error
6034 Perl_croak(aTHX_ "Subroutine attributes must come "
6035 "before the signature");
6038 NEXTVAL_NEXTTOKE.opval = attrs;
6044 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
6049 PL_lex_allbrackets--;
6050 OPERATOR(PERLY_COLON);
6054 yyl_subproto(pTHX_ char *s, CV *cv)
6056 STRLEN protolen = CvPROTOLEN(cv);
6057 const char *proto = CvPROTO(cv);
6060 proto = S_strip_spaces(aTHX_ proto, &protolen);
6063 if ((optional = *proto == ';')) {
6066 } while (*proto == ';');
6072 *proto == '$' || *proto == '_'
6073 || *proto == '*' || *proto == '+'
6078 *proto == '\\' && proto[1] && proto[2] == '\0'
6081 UNIPROTO(UNIOPSUB,optional);
6084 if (*proto == '\\' && proto[1] == '[') {
6085 const char *p = proto + 2;
6086 while(*p && *p != ']')
6088 if(*p == ']' && !p[1])
6089 UNIPROTO(UNIOPSUB,optional);
6092 if (*proto == '&' && *s == '{') {
6094 sv_setpvs(PL_subname, "__ANON__");
6096 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6097 if (!PL_lex_allbrackets
6098 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6100 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6109 yyl_leftcurly(pTHX_ char *s, const U8 formbrack)
6112 if (PL_lex_brackets > 100) {
6113 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6116 switch (PL_expect) {
6119 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6120 PL_lex_allbrackets++;
6121 OPERATOR(HASHBRACK);
6123 while (s < PL_bufend && SPACE_OR_TAB(*s))
6126 PL_tokenbuf[0] = '\0';
6127 if (d < PL_bufend && *d == '-') {
6128 PL_tokenbuf[0] = '-';
6130 while (d < PL_bufend && SPACE_OR_TAB(*d))
6133 if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
6135 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6137 while (d < PL_bufend && SPACE_OR_TAB(*d))
6140 const char minus = (PL_tokenbuf[0] == '-');
6141 s = force_word(s + minus, BAREWORD, FALSE, TRUE);
6143 force_next(PERLY_MINUS);
6149 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6150 PL_lex_allbrackets++;
6155 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6156 PL_lex_allbrackets++;
6160 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6161 PL_lex_allbrackets++;
6166 if (PL_oldoldbufptr == PL_last_lop)
6167 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6169 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6170 PL_lex_allbrackets++;
6173 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6175 /* This hack is to get the ${} in the message. */
6177 yyerror("syntax error");
6180 OPERATOR(HASHBRACK);
6182 if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
6183 /* ${...} or @{...} etc., but not print {...}
6184 * Skip the disambiguation and treat this as a block.
6186 goto block_expectation;
6188 /* This hack serves to disambiguate a pair of curlies
6189 * as being a block or an anon hash. Normally, expectation
6190 * determines that, but in cases where we're not in a
6191 * position to expect anything in particular (like inside
6192 * eval"") we have to resolve the ambiguity. This code
6193 * covers the case where the first term in the curlies is a
6194 * quoted string. Most other cases need to be explicitly
6195 * disambiguated by prepending a "+" before the opening
6196 * curly in order to force resolution as an anon hash.
6198 * XXX should probably propagate the outer expectation
6199 * into eval"" to rely less on this hack, but that could
6200 * potentially break current behavior of eval"".
6204 if (*s == '\'' || *s == '"' || *s == '`') {
6205 /* common case: get past first string, handling escapes */
6206 for (t++; t < PL_bufend && *t != *s;)
6211 else if (*s == 'q') {
6214 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6215 && !isWORDCHAR(*t))))
6217 /* skip q//-like construct */
6219 char open, close, term;
6222 while (t < PL_bufend && isSPACE(*t))
6224 /* check for q => */
6225 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6226 OPERATOR(HASHBRACK);
6230 if (term && (tmps = memCHRs("([{< )]}> )]}>",term)))
6234 for (t++; t < PL_bufend; t++) {
6235 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6237 else if (*t == open)
6241 for (t++; t < PL_bufend; t++) {
6242 if (*t == '\\' && t+1 < PL_bufend)
6244 else if (*t == close && --brackets <= 0)
6246 else if (*t == open)
6253 /* skip plain q word */
6254 while ( t < PL_bufend
6255 && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6257 t += UTF ? UTF8SKIP(t) : 1;
6260 else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
6261 t += UTF ? UTF8SKIP(t) : 1;
6262 while ( t < PL_bufend
6263 && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6265 t += UTF ? UTF8SKIP(t) : 1;
6268 while (t < PL_bufend && isSPACE(*t))
6270 /* if comma follows first term, call it an anon hash */
6271 /* XXX it could be a comma expression with loop modifiers */
6272 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6273 || (*t == '=' && t[1] == '>')))
6274 OPERATOR(HASHBRACK);
6275 if (PL_expect == XREF) {
6277 /* If there is an opening brace or 'sub:', treat it
6278 as a term to make ${{...}}{k} and &{sub:attr...}
6279 dwim. Otherwise, treat it as a statement, so
6280 map {no strict; ...} works.
6287 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "sub")) {
6300 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6307 pl_yylval.ival = CopLINE(PL_curcop);
6308 PL_copline = NOLINE; /* invalidate current command line number */
6309 TOKEN(formbrack ? PERLY_EQUAL_SIGN : PERLY_BRACE_OPEN);
6313 yyl_rightcurly(pTHX_ char *s, const U8 formbrack)
6315 assert(s != PL_bufend);
6318 if (PL_lex_brackets <= 0)
6319 /* diag_listed_as: Unmatched right %s bracket */
6320 yyerror("Unmatched right curly bracket");
6322 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6324 PL_lex_allbrackets--;
6326 if (PL_lex_state == LEX_INTERPNORMAL) {
6327 if (PL_lex_brackets == 0) {
6328 if (PL_expect & XFAKEBRACK) {
6329 PL_expect &= XENUMMASK;
6330 PL_lex_state = LEX_INTERPEND;
6332 return yylex(); /* ignore fake brackets */
6334 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6335 && SvEVALED(PL_lex_repl))
6336 PL_lex_state = LEX_INTERPEND;
6337 else if (*s == '-' && s[1] == '>')
6338 PL_lex_state = LEX_INTERPENDMAYBE;
6339 else if (*s != '[' && *s != '{')
6340 PL_lex_state = LEX_INTERPEND;
6344 if (PL_expect & XFAKEBRACK) {
6345 PL_expect &= XENUMMASK;
6347 return yylex(); /* ignore fake brackets */
6350 force_next(formbrack ? PERLY_DOT : PERLY_BRACE_CLOSE);
6351 if (formbrack) LEAVE_with_name("lex_format");
6352 if (formbrack == 2) { /* means . where arguments were expected */
6353 force_next(PERLY_SEMICOLON);
6357 TOKEN(PERLY_SEMICOLON);
6361 yyl_ampersand(pTHX_ char *s)
6363 if (PL_expect == XPOSTDEREF)
6364 POSTDEREF(PERLY_AMPERSAND);
6368 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6369 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6377 if (PL_expect == XOPERATOR) {
6380 if ( PL_bufptr == PL_linestart
6381 && ckWARN(WARN_SEMICOLON)
6382 && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
6384 CopLINE_dec(PL_curcop);
6385 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6386 CopLINE_inc(PL_curcop);
6389 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6391 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6392 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6398 BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
6403 PL_tokenbuf[0] = '&';
6404 s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
6405 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6408 force_ident_maybe_lex('&');
6410 PREREF(PERLY_AMPERSAND);
6412 TERM(PERLY_AMPERSAND);
6416 yyl_verticalbar(pTHX_ char *s)
6423 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6424 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6433 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6436 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6437 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6442 BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
6446 yyl_bang(pTHX_ char *s)
6448 const char tmp = *s++;
6450 /* was this !=~ where !~ was meant?
6451 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6453 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6454 const char *t = s+1;
6456 while (t < PL_bufend && isSPACE(*t))
6459 if (*t == '/' || *t == '?'
6460 || ((*t == 'm' || *t == 's' || *t == 'y')
6461 && !isWORDCHAR(t[1]))
6462 || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6463 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6464 "!=~ should be !~");
6467 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6479 OPERATOR(PERLY_EXCLAMATION_MARK);
6483 yyl_snail(pTHX_ char *s)
6485 if (PL_expect == XPOSTDEREF)
6486 POSTDEREF(PERLY_SNAIL);
6487 PL_tokenbuf[0] = '@';
6488 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6489 if (PL_expect == XOPERATOR) {
6491 if (PL_bufptr > s) {
6493 PL_bufptr = PL_oldbufptr;
6498 if (!PL_tokenbuf[1]) {
6499 PREREF(PERLY_SNAIL);
6501 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6503 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6504 && intuit_more(s, PL_bufend))
6507 PL_tokenbuf[0] = '%';
6509 /* Warn about @ where they meant $. */
6510 if (*s == '[' || *s == '{') {
6511 if (ckWARN(WARN_SYNTAX)) {
6512 S_check_scalar_slice(aTHX_ s);
6516 PL_expect = XOPERATOR;
6517 force_ident_maybe_lex('@');
6522 yyl_slash(pTHX_ char *s)
6524 if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6525 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6526 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6531 else if (PL_expect == XOPERATOR) {
6533 if (*s == '=' && !PL_lex_allbrackets
6534 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6542 /* Disable warning on "study /blah/" */
6543 if ( PL_oldoldbufptr == PL_last_uni
6544 && ( *PL_last_uni != 's' || s - PL_last_uni < 5
6545 || memNE(PL_last_uni, "study", 5)
6546 || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF)
6549 s = scan_pat(s,OP_MATCH);
6550 TERM(sublex_start());
6555 yyl_leftsquare(pTHX_ char *s)
6557 if (PL_lex_brackets > 100)
6558 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6559 PL_lex_brackstack[PL_lex_brackets++] = 0;
6560 PL_lex_allbrackets++;
6562 OPERATOR(PERLY_BRACKET_OPEN);
6566 yyl_rightsquare(pTHX_ char *s)
6568 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6571 if (PL_lex_brackets <= 0)
6572 /* diag_listed_as: Unmatched right %s bracket */
6573 yyerror("Unmatched right square bracket");
6576 PL_lex_allbrackets--;
6577 if (PL_lex_state == LEX_INTERPNORMAL) {
6578 if (PL_lex_brackets == 0) {
6579 if (*s == '-' && s[1] == '>')
6580 PL_lex_state = LEX_INTERPENDMAYBE;
6581 else if (*s != '[' && *s != '{')
6582 PL_lex_state = LEX_INTERPEND;
6585 TERM(PERLY_BRACKET_CLOSE);
6589 yyl_tilde(pTHX_ char *s)
6592 if (s[1] == '~' && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)) {
6593 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6596 Perl_ck_warner_d(aTHX_
6597 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
6598 "Smartmatch is experimental");
6599 NCEop(OP_SMARTMATCH);
6602 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
6604 BCop(OP_SCOMPLEMENT);
6606 BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
6610 yyl_leftparen(pTHX_ char *s)
6612 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6613 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
6617 PL_lex_allbrackets++;
6618 TOKEN(PERLY_PAREN_OPEN);
6622 yyl_rightparen(pTHX_ char *s)
6624 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6627 PL_lex_allbrackets--;
6630 PREBLOCK(PERLY_PAREN_CLOSE);
6631 TERM(PERLY_PAREN_CLOSE);
6635 yyl_leftpointy(pTHX_ char *s)
6639 if (PL_expect != XOPERATOR) {
6640 if (s[1] != '<' && !memchr(s,'>', PL_bufend - s))
6642 if (s[1] == '<' && s[2] != '>')
6643 s = scan_heredoc(s);
6645 s = scan_inputsymbol(s);
6646 PL_expect = XOPERATOR;
6647 TOKEN(sublex_start());
6654 if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6658 SHop(OP_LEFT_SHIFT);
6663 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6670 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6678 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6687 yyl_rightpointy(pTHX_ char *s)
6689 const char tmp = *s++;
6692 if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6696 SHop(OP_RIGHT_SHIFT);
6698 else if (tmp == '=') {
6699 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6707 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6716 yyl_sglquote(pTHX_ char *s)
6718 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6720 missingterm(NULL, 0);
6721 COPLINE_SET_FROM_MULTI_END;
6722 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6723 if (PL_expect == XOPERATOR) {
6726 pl_yylval.ival = OP_CONST;
6727 TERM(sublex_start());
6731 yyl_dblquote(pTHX_ char *s)
6735 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6738 printbuf("### Saw string before %s\n", s);
6740 PerlIO_printf(Perl_debug_log,
6741 "### Saw unterminated string\n");
6743 if (PL_expect == XOPERATOR) {
6747 missingterm(NULL, 0);
6748 pl_yylval.ival = OP_CONST;
6749 /* FIXME. I think that this can be const if char *d is replaced by
6750 more localised variables. */
6751 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6752 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6753 pl_yylval.ival = OP_STRINGIFY;
6757 if (pl_yylval.ival == OP_CONST)
6758 COPLINE_SET_FROM_MULTI_END;
6759 TERM(sublex_start());
6763 yyl_backtick(pTHX_ char *s)
6765 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6768 printbuf("### Saw backtick string before %s\n", s);
6770 PerlIO_printf(Perl_debug_log,
6771 "### Saw unterminated backtick string\n");
6773 if (PL_expect == XOPERATOR)
6774 no_op("Backticks",s);
6776 missingterm(NULL, 0);
6777 pl_yylval.ival = OP_BACKTICK;
6778 TERM(sublex_start());
6782 yyl_backslash(pTHX_ char *s)
6784 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr && isDIGIT(*s))
6785 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6787 if (PL_expect == XOPERATOR)
6788 no_op("Backslash",s);
6793 yyl_data_handle(pTHX)
6795 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
6798 GV *gv = (GV *)*hv_fetchs(stash, "DATA", 1);
6801 gv_init(gv,stash,"DATA",4,0);
6805 GvIOp(gv) = newIO();
6806 IoIFP(GvIOp(gv)) = PL_rsfp;
6808 /* Mark this internal pseudo-handle as clean */
6809 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6810 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6811 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6813 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6815 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6816 /* if the script was opened in binmode, we need to revert
6817 * it to text mode for compatibility; but only iff it has CRs
6818 * XXX this is a questionable hack at best. */
6819 if (PL_bufend-PL_bufptr > 2
6820 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6823 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6824 loc = PerlIO_tell(PL_rsfp);
6825 (void)PerlIO_seek(PL_rsfp, 0L, 0);
6827 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6829 PerlIO_seek(PL_rsfp, loc, 0);
6834 #ifdef PERLIO_LAYERS
6837 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6844 PERL_STATIC_NO_RET void yyl_croak_unrecognised(pTHX_ char*)
6845 __attribute__noreturn__;
6847 PERL_STATIC_NO_RET void
6848 yyl_croak_unrecognised(pTHX_ char *s)
6850 SV *dsv = newSVpvs_flags("", SVs_TEMP);
6856 STRLEN skiplen = UTF8SKIP(s);
6857 STRLEN stravail = PL_bufend - s;
6858 c = sv_uni_display(dsv, newSVpvn_flags(s,
6859 skiplen > stravail ? stravail : skiplen,
6860 SVs_TEMP | SVf_UTF8),
6861 10, UNI_DISPLAY_ISPRINT);
6864 c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
6867 if (s >= PL_linestart) {
6871 /* somehow (probably due to a parse failure), PL_linestart has advanced
6872 * pass PL_bufptr, get a reasonable beginning of line
6875 while (d > SvPVX(PL_linestr) && d[-1] && d[-1] != '\n')
6878 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d);
6879 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
6880 d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT;
6883 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
6884 UTF8fARG(UTF, (s - d), d),
6889 yyl_require(pTHX_ char *s, I32 orig_keyword)
6893 s = force_version(s, FALSE);
6895 else if (*s != 'v' || !isDIGIT(s[1])
6896 || (s = force_version(s, TRUE), *s == 'v'))
6898 *PL_tokenbuf = '\0';
6899 s = force_word(s,BAREWORD,TRUE,TRUE);
6900 if (isIDFIRST_lazy_if_safe(PL_tokenbuf,
6901 PL_tokenbuf + sizeof(PL_tokenbuf),
6904 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
6905 GV_ADD | (UTF ? SVf_UTF8 : 0));
6908 yyerror("<> at require-statement should be quotes");
6911 if (orig_keyword == KEY_require)
6916 PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
6918 PL_last_uni = PL_oldbufptr;
6919 PL_last_lop_op = OP_REQUIRE;
6921 return REPORT( (int)REQUIRE );
6925 yyl_foreach(pTHX_ char *s)
6927 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6929 pl_yylval.ival = CopLINE(PL_curcop);
6931 if (PL_expect == XSTATE && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6933 SSize_t s_off = s - SvPVX(PL_linestr);
6934 bool paren_is_valid = FALSE;
6935 bool maybe_package = FALSE;
6936 bool saw_core = FALSE;
6937 bool core_valid = FALSE;
6939 if (UNLIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "CORE::"))) {
6943 if (LIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "my"))) {
6945 paren_is_valid = TRUE;
6946 if (isSPACE(p[2])) {
6947 p = skipspace(p + 3);
6948 maybe_package = TRUE;
6954 else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "our")) {
6956 if (isSPACE(p[3])) {
6957 p = skipspace(p + 4);
6958 maybe_package = TRUE;
6964 else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "state")) {
6966 if (isSPACE(p[5])) {
6967 p = skipspace(p + 6);
6973 if (saw_core && !core_valid) {
6974 Perl_croak(aTHX_ "Missing $ on loop variable");
6977 if (maybe_package && !saw_core) {
6978 /* skip optional package name, as in "for my abc $x (..)" */
6979 if (UNLIKELY(isIDFIRST_lazy_if_safe(p, PL_bufend, UTF))) {
6981 p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6983 paren_is_valid = FALSE;
6987 if (UNLIKELY(paren_is_valid && *p == '(')) {
6988 Perl_ck_warner_d(aTHX_
6989 packWARN(WARN_EXPERIMENTAL__FOR_LIST),
6990 "for my (...) is experimental");
6992 else if (UNLIKELY(*p != '$' && *p != '\\')) {
6993 /* "for myfoo (" will end up here, but with p pointing at the 'f' */
6994 Perl_croak(aTHX_ "Missing $ on loop variable");
6996 /* The buffer may have been reallocated, update s */
6997 s = SvPVX(PL_linestr) + s_off;
7003 yyl_do(pTHX_ char *s, I32 orig_keyword)
7012 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7014 if (len && memNEs(PL_tokenbuf+1, len, "CORE")
7015 && !keyword(PL_tokenbuf + 1, len, 0)) {
7016 SSize_t off = s-SvPVX(PL_linestr);
7018 s = SvPVX(PL_linestr)+off;
7020 force_ident_maybe_lex('&');
7025 if (orig_keyword == KEY_do)
7033 yyl_my(pTHX_ char *s, I32 my)
7037 yyerror(Perl_form(aTHX_
7038 "Can't redeclare \"%s\" in \"%s\"",
7039 my == KEY_my ? "my" :
7040 my == KEY_state ? "state" : "our",
7041 PL_in_my == KEY_my ? "my" :
7042 PL_in_my == KEY_state ? "state" : "our"));
7046 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
7048 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7049 if (memEQs(PL_tokenbuf, len, "sub"))
7050 return yyl_sub(aTHX_ s, my);
7051 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7052 if (!PL_in_my_stash) {
7056 i = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7057 PERL_MY_SNPRINTF_POST_GUARD(i, sizeof(tmpbuf));
7058 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7061 else if (*s == '\\') {
7062 if (!FEATURE_MYREF_IS_ENABLED)
7063 Perl_croak(aTHX_ "The experimental declared_refs "
7064 "feature is not enabled");
7065 Perl_ck_warner_d(aTHX_
7066 packWARN(WARN_EXPERIMENTAL__DECLARED_REFS),
7067 "Declaring references is experimental");
7072 static int yyl_try(pTHX_ char*);
7075 yyl_eol_needs_semicolon(pTHX_ char **ps)
7078 if (PL_lex_state != LEX_NORMAL
7079 || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
7081 const bool in_comment = *s == '#';
7083 if (*s == '#' && s == PL_linestart && PL_in_eval
7084 && !PL_rsfp && !PL_parser->filtered) {
7085 /* handle eval qq[#line 1 "foo"\n ...] */
7086 CopLINE_dec(PL_curcop);
7087 incline(s, PL_bufend);
7090 while (d < PL_bufend && *d != '\n')
7095 if (in_comment && d == PL_bufend
7096 && PL_lex_state == LEX_INTERPNORMAL
7097 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
7098 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
7100 incline(s, PL_bufend);
7101 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
7102 PL_lex_state = LEX_FORMLINE;
7103 force_next(FORMRBRACK);
7109 while (s < PL_bufend && *s != '\n')
7111 if (s < PL_bufend) {
7114 incline(s, PL_bufend);
7122 yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s)
7130 bof = cBOOL(PL_rsfp);
7133 PL_bufptr = PL_bufend;
7134 COPLINE_INC_WITH_HERELINES;
7135 if (!lex_next_chunk(fake_eof)) {
7136 CopLINE_dec(PL_curcop);
7138 TOKEN(PERLY_SEMICOLON); /* not infinite loop because rsfp is NULL now */
7140 CopLINE_dec(PL_curcop);
7142 /* If it looks like the start of a BOM or raw UTF-16,
7143 * check if it in fact is. */
7146 || *(U8*)s == BOM_UTF8_FIRST_BYTE
7150 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
7151 bof = (offset == (Off_t)SvCUR(PL_linestr));
7152 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
7153 /* offset may include swallowed CR */
7155 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
7158 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7159 s = swallow_bom((U8*)s);
7162 if (PL_parser->in_pod) {
7163 /* Incest with pod. */
7164 if ( memBEGINPs(s, (STRLEN) (PL_bufend - s), "=cut")
7167 SvPVCLEAR(PL_linestr);
7168 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
7169 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7170 PL_last_lop = PL_last_uni = NULL;
7171 PL_parser->in_pod = 0;
7174 if (PL_rsfp || PL_parser->filtered)
7175 incline(s, PL_bufend);
7176 } while (PL_parser->in_pod);
7178 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
7179 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7180 PL_last_lop = PL_last_uni = NULL;
7181 if (CopLINE(PL_curcop) == 1) {
7182 while (s < PL_bufend && isSPACE(*s))
7184 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
7188 if (*s == '#' && *(s+1) == '!')
7190 #ifdef ALTERNATE_SHEBANG
7192 static char const as[] = ALTERNATE_SHEBANG;
7193 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
7194 d = s + (sizeof(as) - 1);
7196 #endif /* ALTERNATE_SHEBANG */
7205 while (*d && !isSPACE(*d))
7209 #ifdef ARG_ZERO_IS_SCRIPT
7210 if (ipathend > ipath) {
7212 * HP-UX (at least) sets argv[0] to the script name,
7213 * which makes $^X incorrect. And Digital UNIX and Linux,
7214 * at least, set argv[0] to the basename of the Perl
7215 * interpreter. So, having found "#!", we'll set it right.
7217 SV* copfilesv = CopFILESV(PL_curcop);
7220 GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
7222 assert(SvPOK(x) || SvGMAGICAL(x));
7223 if (sv_eq(x, copfilesv)) {
7224 sv_setpvn(x, ipath, ipathend - ipath);
7230 const char *bstart = SvPV_const(copfilesv, blen);
7231 const char * const lstart = SvPV_const(x, llen);
7233 bstart += blen - llen;
7234 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
7235 sv_setpvn(x, ipath, ipathend - ipath);
7242 /* Anything to do if no copfilesv? */
7244 TAINT_NOT; /* $^X is always tainted, but that's OK */
7246 #endif /* ARG_ZERO_IS_SCRIPT */
7251 d = instr(s,"perl -");
7253 d = instr(s,"perl");
7255 /* avoid getting into infinite loops when shebang
7256 * line contains "Perl" rather than "perl" */
7258 for (d = ipathend-4; d >= ipath; --d) {
7259 if (isALPHA_FOLD_EQ(*d, 'p')
7260 && !ibcmp(d, "perl", 4))
7270 #ifdef ALTERNATE_SHEBANG
7272 * If the ALTERNATE_SHEBANG on this system starts with a
7273 * character that can be part of a Perl expression, then if
7274 * we see it but not "perl", we're probably looking at the
7275 * start of Perl code, not a request to hand off to some
7276 * other interpreter. Similarly, if "perl" is there, but
7277 * not in the first 'word' of the line, we assume the line
7278 * contains the start of the Perl program.
7280 if (d && *s != '#') {
7281 const char *c = ipath;
7282 while (*c && !memCHRs("; \t\r\n\f\v#", *c))
7285 d = NULL; /* "perl" not in first word; ignore */
7287 *s = '#'; /* Don't try to parse shebang line */
7289 #endif /* ALTERNATE_SHEBANG */
7294 && !instr(s,"indir")
7295 && instr(PL_origargv[0],"perl"))
7301 while (s < PL_bufend && isSPACE(*s))
7303 if (s < PL_bufend) {
7304 Newx(newargv,PL_origargc+3,char*);
7306 while (s < PL_bufend && !isSPACE(*s))
7309 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
7312 newargv = PL_origargv;
7315 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
7317 Perl_croak(aTHX_ "Can't exec %s", ipath);
7320 while (*d && !isSPACE(*d))
7322 while (SPACE_OR_TAB(*d))
7326 const bool switches_done = PL_doswitches;
7327 const U32 oldpdb = PL_perldb;
7328 const bool oldn = PL_minus_n;
7329 const bool oldp = PL_minus_p;
7333 bool baduni = FALSE;
7335 const char *d2 = d1 + 1;
7336 if (parse_unicode_opts((const char **)&d2)
7340 if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
7341 const char * const m = d1;
7342 while (*d1 && !isSPACE(*d1))
7344 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
7347 d1 = moreswitches(d1);
7349 if (PL_doswitches && !switches_done) {
7350 int argc = PL_origargc;
7351 char **argv = PL_origargv;
7354 } while (argc && argv[0][0] == '-' && argv[0][1]);
7355 init_argv_symbols(argc,argv);
7357 if ( (PERLDB_LINE_OR_SAVESRC && !oldpdb)
7358 || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
7359 /* if we have already added "LINE: while (<>) {",
7360 we must not do it again */
7362 SvPVCLEAR(PL_linestr);
7363 PL_bufptr = PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
7364 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7365 PL_last_lop = PL_last_uni = NULL;
7366 PL_preambled = FALSE;
7367 if (PERLDB_LINE_OR_SAVESRC)
7368 (void)gv_fetchfile(PL_origfilename);
7376 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
7377 PL_lex_state = LEX_FORMLINE;
7378 force_next(FORMRBRACK);
7379 TOKEN(PERLY_SEMICOLON);
7387 yyl_fatcomma(pTHX_ char *s, STRLEN len)
7391 = newSVOP(OP_CONST, 0,
7392 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7393 pl_yylval.opval->op_private = OPpCONST_BARE;
7398 yyl_safe_bareword(pTHX_ char *s, const char lastchar)
7400 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7401 && PL_parser->saw_infix_sigil)
7403 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7404 "Operator or semicolon missing before %c%" UTF8f,
7406 UTF8fARG(UTF, strlen(PL_tokenbuf),
7408 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7409 "Ambiguous use of %c resolved as operator %c",
7410 lastchar, lastchar);
7416 yyl_constant_op(pTHX_ char *s, SV *sv, CV *cv, OP *rv2cv_op, PADOFFSET off)
7420 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7421 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7422 if (SvTYPE(sv) == SVt_PVAV)
7423 pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7426 pl_yylval.opval->op_private = 0;
7427 pl_yylval.opval->op_folded = 1;
7428 pl_yylval.opval->op_flags |= OPf_SPECIAL;
7433 op_free(pl_yylval.opval);
7435 off ? newCVREF(0, rv2cv_op) : rv2cv_op;
7436 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7437 PL_last_lop = PL_oldbufptr;
7438 PL_last_lop_op = OP_ENTERSUB;
7440 /* Is there a prototype? */
7442 int k = yyl_subproto(aTHX_ s, cv);
7447 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7449 force_next(off ? PRIVATEREF : BAREWORD);
7450 if (!PL_lex_allbrackets
7451 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7453 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7459 /* Honour "reserved word" warnings, and enforce strict subs */
7461 yyl_strictwarn_bareword(pTHX_ const char lastchar)
7463 /* after "print" and similar functions (corresponding to
7464 * "F? L" in opcode.pl), whatever wasn't already parsed as
7465 * a filehandle should be subject to "strict subs".
7466 * Likewise for the optional indirect-object argument to system
7467 * or exec, which can't be a bareword */
7468 if ((PL_last_lop_op == OP_PRINT
7469 || PL_last_lop_op == OP_PRTF
7470 || PL_last_lop_op == OP_SAY
7471 || PL_last_lop_op == OP_SYSTEM
7472 || PL_last_lop_op == OP_EXEC)
7473 && (PL_hints & HINT_STRICT_SUBS))
7475 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7478 if (lastchar != '-' && ckWARN(WARN_RESERVED)) {
7479 char *d = PL_tokenbuf;
7482 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0)) {
7483 /* PL_warn_reserved is constant */
7484 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
7485 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7487 GCC_DIAG_RESTORE_STMT;
7493 yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 orig_keyword, struct code c)
7496 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7498 bool no_op_error = FALSE;
7499 /* Use this var to track whether intuit_method has been
7500 called. intuit_method returns 0 or > 255. */
7503 if (PL_expect == XOPERATOR) {
7504 if (PL_bufptr == PL_linestart) {
7505 CopLINE_dec(PL_curcop);
7506 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7507 CopLINE_inc(PL_curcop);
7510 /* We want to call no_op with s pointing after the
7511 bareword, so defer it. But we want it to come
7512 before the Bad name croak. */
7516 /* Get the rest if it looks like a package qualifier */
7518 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7520 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7523 no_op("Bareword",s);
7524 no_op_error = FALSE;
7527 Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
7528 UTF8fARG(UTF, len, PL_tokenbuf),
7529 *s == '\'' ? "'" : "::");
7535 no_op("Bareword",s);
7537 /* See if the name is "Foo::",
7538 in which case Foo is a bareword
7539 (and a package name). */
7541 if (len > 2 && PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':') {
7542 if (ckWARN(WARN_BAREWORD)
7543 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7544 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7545 "Bareword \"%" UTF8f
7546 "\" refers to nonexistent package",
7547 UTF8fARG(UTF, len, PL_tokenbuf));
7549 PL_tokenbuf[len] = '\0';
7558 /* if we saw a global override before, get the right name */
7561 c.sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len);
7563 SV *sv = newSVpvs("CORE::GLOBAL::");
7569 /* Presume this is going to be a bareword of some sort. */
7571 pl_yylval.opval = newSVOP(OP_CONST, 0, c.sv);
7572 pl_yylval.opval->op_private = OPpCONST_BARE;
7574 /* And if "Foo::", then that's what it certainly is. */
7576 return yyl_safe_bareword(aTHX_ s, lastchar);
7579 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(c.sv));
7580 const_op->op_private = OPpCONST_BARE;
7581 c.rv2cv_op = newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
7585 : SvROK(c.gv) && SvTYPE(SvRV(c.gv)) == SVt_PVCV
7588 : rv2cv_op_cv(c.rv2cv_op, RV2CVOPCV_RETURN_STUB);
7591 /* See if it's the indirect object for a list operator. */
7594 && PL_oldoldbufptr < PL_bufptr
7595 && (PL_oldoldbufptr == PL_last_lop
7596 || PL_oldoldbufptr == PL_last_uni)
7597 && /* NO SKIPSPACE BEFORE HERE! */
7599 || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
7602 bool immediate_paren = *s == '(';
7605 /* (Now we can afford to cross potential line boundary.) */
7608 /* intuit_method() can indirectly call lex_next_chunk(),
7611 s_off = s - SvPVX(PL_linestr);
7612 /* Two barewords in a row may indicate method call. */
7613 if ( ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
7615 && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7617 /* the code at method: doesn't use s */
7620 s = SvPVX(PL_linestr) + s_off;
7622 if (((PL_opargs[PL_last_lop_op] >> OASHIFT) & 7) == OA_FILEREF
7623 && !immediate_paren && !c.cv
7624 && !FEATURE_BAREWORD_FILEHANDLES_IS_ENABLED) {
7625 no_bareword_filehandle(PL_tokenbuf);
7628 /* If not a declared subroutine, it's an indirect object. */
7629 /* (But it's an indir obj regardless for sort.) */
7630 /* Also, if "_" follows a filetest operator, it's a bareword */
7633 ( !immediate_paren && (PL_last_lop_op == OP_SORT
7635 && (PL_last_lop_op != OP_MAPSTART
7636 && PL_last_lop_op != OP_GREPSTART))))
7637 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7638 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
7642 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7643 yyl_strictwarn_bareword(aTHX_ lastchar);
7644 op_free(c.rv2cv_op);
7645 return yyl_safe_bareword(aTHX_ s, lastchar);
7649 PL_expect = XOPERATOR;
7652 /* Is this a word before a => operator? */
7653 if (*s == '=' && s[1] == '>' && !pkgname) {
7654 op_free(c.rv2cv_op);
7656 if (c.gvp || (c.lex && !c.off)) {
7657 assert (cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7658 /* This is our own scalar, created a few lines
7659 above, so this is safe. */
7660 SvREADONLY_off(c.sv);
7661 sv_setpv(c.sv, PL_tokenbuf);
7662 if (UTF && !IN_BYTES
7663 && is_utf8_string((U8*)PL_tokenbuf, len))
7665 SvREADONLY_on(c.sv);
7670 /* If followed by a paren, it's certainly a subroutine. */
7675 while (SPACE_OR_TAB(*d))
7677 if (*d == ')' && (c.sv = cv_const_sv_or_av(c.cv)))
7678 return yyl_constant_op(aTHX_ d + 1, c.sv, c.cv, c.rv2cv_op, c.off);
7680 NEXTVAL_NEXTTOKE.opval =
7681 c.off ? c.rv2cv_op : pl_yylval.opval;
7683 op_free(pl_yylval.opval), force_next(PRIVATEREF);
7684 else op_free(c.rv2cv_op), force_next(BAREWORD);
7686 TOKEN(PERLY_AMPERSAND);
7689 /* If followed by var or block, call it a method (unless sub) */
7691 if ((*s == '$' || *s == '{') && !c.cv && FEATURE_INDIRECT_IS_ENABLED) {
7692 op_free(c.rv2cv_op);
7693 PL_last_lop = PL_oldbufptr;
7694 PL_last_lop_op = OP_METHOD;
7695 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7696 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7697 PL_expect = XBLOCKTERM;
7699 return REPORT(METHOD);
7702 /* If followed by a bareword, see if it looks like indir obj. */
7706 && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$')
7707 && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7710 if (c.lex && !c.off) {
7711 assert(cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7712 SvREADONLY_off(c.sv);
7713 sv_setpvn(c.sv, PL_tokenbuf, len);
7714 if (UTF && !IN_BYTES
7715 && is_utf8_string((U8*)PL_tokenbuf, len))
7717 else SvUTF8_off(c.sv);
7719 op_free(c.rv2cv_op);
7720 if (key == METHOD && !PL_lex_allbrackets
7721 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7723 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7728 /* Not a method, so call it a subroutine (if defined) */
7731 /* Check for a constant sub */
7732 c.sv = cv_const_sv_or_av(c.cv);
7733 return yyl_constant_op(aTHX_ s, c.sv, c.cv, c.rv2cv_op, c.off);
7736 /* Call it a bare word */
7738 if (PL_hints & HINT_STRICT_SUBS)
7739 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7741 yyl_strictwarn_bareword(aTHX_ lastchar);
7743 op_free(c.rv2cv_op);
7745 return yyl_safe_bareword(aTHX_ s, lastchar);
7749 yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct code c)
7752 default: /* not a keyword */
7753 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7756 FUN0OP( newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0)) );
7760 newSVOP(OP_CONST, 0,
7761 Perl_newSVpvf(aTHX_ "%" IVdf, (IV)CopLINE(PL_curcop)))
7764 case KEY___PACKAGE__:
7766 newSVOP(OP_CONST, 0, (PL_curstash
7767 ? newSVhek(HvNAME_HEK(PL_curstash))
7773 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D'))
7774 yyl_data_handle(aTHX);
7775 return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s);
7778 FUN0OP(CvCLONE(PL_compcv)
7779 ? newOP(OP_RUNCV, 0)
7780 : newPVOP(OP_RUNCV,0,NULL));
7789 if (PL_expect == XSTATE)
7790 return yyl_sub(aTHX_ PL_bufptr, key);
7791 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7800 LOP(OP_ACCEPT,XTERM);
7803 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7808 LOP(OP_ATAN2,XTERM);
7814 LOP(OP_BINMODE,XTERM);
7817 LOP(OP_BLESS,XTERM);
7823 Perl_ck_warner_d(aTHX_
7824 packWARN(WARN_EXPERIMENTAL__TRY), "try/catch is experimental");
7831 /* We have to disambiguate the two senses of
7832 "continue". If the next token is a '{' then
7833 treat it as the start of a continue block;
7834 otherwise treat it as a control operator.
7844 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7854 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7863 LOP(OP_CRYPT,XTERM);
7866 LOP(OP_CHMOD,XTERM);
7869 LOP(OP_CHOWN,XTERM);
7872 LOP(OP_CONNECT,XTERM);
7887 Perl_ck_warner_d(aTHX_
7888 packWARN(WARN_EXPERIMENTAL__DEFER), "defer is experimental");
7892 return yyl_do(aTHX_ s, orig_keyword);
7895 PL_hints |= HINT_BLOCK_SCOPE;
7905 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7906 STR_WITH_LEN("NDBM_File::"),
7907 STR_WITH_LEN("DB_File::"),
7908 STR_WITH_LEN("GDBM_File::"),
7909 STR_WITH_LEN("SDBM_File::"),
7910 STR_WITH_LEN("ODBM_File::"),
7912 LOP(OP_DBMOPEN,XTERM);
7924 pl_yylval.ival = CopLINE(PL_curcop);
7928 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7940 if (*s == '{') { /* block eval */
7941 PL_expect = XTERMBLOCK;
7942 UNIBRACK(OP_ENTERTRY);
7944 else { /* string eval */
7946 UNIBRACK(OP_ENTEREVAL);
7951 UNIBRACK(-OP_ENTEREVAL);
7965 case KEY_endhostent:
7971 case KEY_endservent:
7974 case KEY_endprotoent:
7984 Perl_ck_warner_d(aTHX_
7985 packWARN(WARN_EXPERIMENTAL__TRY), "try/catch/finally is experimental");
7990 return yyl_foreach(aTHX_ s);
7993 LOP(OP_FORMLINE,XTERM);
8002 LOP(OP_FCNTL,XTERM);
8008 LOP(OP_FLOCK,XTERM);
8011 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8016 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8021 LOP(OP_GREPSTART, XREF);
8038 case KEY_getpriority:
8039 LOP(OP_GETPRIORITY,XTERM);
8041 case KEY_getprotobyname:
8044 case KEY_getprotobynumber:
8045 LOP(OP_GPBYNUMBER,XTERM);
8047 case KEY_getprotoent:
8059 case KEY_getpeername:
8060 UNI(OP_GETPEERNAME);
8062 case KEY_gethostbyname:
8065 case KEY_gethostbyaddr:
8066 LOP(OP_GHBYADDR,XTERM);
8068 case KEY_gethostent:
8071 case KEY_getnetbyname:
8074 case KEY_getnetbyaddr:
8075 LOP(OP_GNBYADDR,XTERM);
8080 case KEY_getservbyname:
8081 LOP(OP_GSBYNAME,XTERM);
8083 case KEY_getservbyport:
8084 LOP(OP_GSBYPORT,XTERM);
8086 case KEY_getservent:
8089 case KEY_getsockname:
8090 UNI(OP_GETSOCKNAME);
8092 case KEY_getsockopt:
8093 LOP(OP_GSOCKOPT,XTERM);
8108 pl_yylval.ival = CopLINE(PL_curcop);
8109 Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8110 "given is experimental");
8114 LOP( orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB, XTERM );
8120 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8122 pl_yylval.ival = CopLINE(PL_curcop);
8126 LOP(OP_INDEX,XTERM);
8132 LOP(OP_IOCTL,XTERM);
8162 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8167 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8181 LOP(OP_LISTEN,XTERM);
8190 s = scan_pat(s,OP_MATCH);
8191 TERM(sublex_start());
8194 LOP(OP_MAPSTART, XREF);
8197 LOP(OP_MKDIR,XTERM);
8200 LOP(OP_MSGCTL,XTERM);
8203 LOP(OP_MSGGET,XTERM);
8206 LOP(OP_MSGRCV,XTERM);
8209 LOP(OP_MSGSND,XTERM);
8214 return yyl_my(aTHX_ s, key);
8220 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8225 s = tokenize_use(0, s);
8229 if (*s == '(' || (s = skipspace(s), *s == '('))
8232 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8233 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8239 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8241 char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8242 for (t=d; isSPACE(*t);)
8244 if ( *t && memCHRs("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8246 && !(t[0] == '=' && t[1] == '>')
8247 && !(t[0] == ':' && t[1] == ':')
8248 && !keyword(s, d-s, 0)
8250 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8251 "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")",
8252 UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8258 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8260 pl_yylval.ival = OP_OR;
8270 LOP(OP_OPEN_DIR,XTERM);
8273 checkcomma(s,PL_tokenbuf,"filehandle");
8277 checkcomma(s,PL_tokenbuf,"filehandle");
8296 s = force_word(s,BAREWORD,FALSE,TRUE);
8298 s = force_strict_version(s);
8302 LOP(OP_PIPE_OP,XTERM);
8305 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8307 missingterm(NULL, 0);
8308 COPLINE_SET_FROM_MULTI_END;
8309 pl_yylval.ival = OP_CONST;
8310 TERM(sublex_start());
8316 return yyl_qw(aTHX_ s, len);
8319 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8321 missingterm(NULL, 0);
8322 pl_yylval.ival = OP_STRINGIFY;
8323 if (SvIVX(PL_lex_stuff) == '\'')
8324 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
8325 TERM(sublex_start());
8328 s = scan_pat(s,OP_QR);
8329 TERM(sublex_start());
8332 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8334 missingterm(NULL, 0);
8335 pl_yylval.ival = OP_BACKTICK;
8336 TERM(sublex_start());
8342 return yyl_require(aTHX_ s, orig_keyword);
8351 LOP(OP_RENAME,XTERM);
8360 LOP(OP_RINDEX,XTERM);
8369 UNIDOR(OP_READLINE);
8372 UNIDOR(OP_BACKTICK);
8381 LOP(OP_REVERSE,XTERM);
8384 UNIDOR(OP_READLINK);
8391 if (pl_yylval.opval)
8392 TERM(sublex_start());
8394 TOKEN(1); /* force error */
8397 checkcomma(s,PL_tokenbuf,"filehandle");
8407 LOP(OP_SELECT,XTERM);
8413 LOP(OP_SEMCTL,XTERM);
8416 LOP(OP_SEMGET,XTERM);
8419 LOP(OP_SEMOP,XTERM);
8425 LOP(OP_SETPGRP,XTERM);
8427 case KEY_setpriority:
8428 LOP(OP_SETPRIORITY,XTERM);
8430 case KEY_sethostent:
8436 case KEY_setservent:
8439 case KEY_setprotoent:
8449 LOP(OP_SEEKDIR,XTERM);
8451 case KEY_setsockopt:
8452 LOP(OP_SSOCKOPT,XTERM);
8458 LOP(OP_SHMCTL,XTERM);
8461 LOP(OP_SHMGET,XTERM);
8464 LOP(OP_SHMREAD,XTERM);
8467 LOP(OP_SHMWRITE,XTERM);
8470 LOP(OP_SHUTDOWN,XTERM);
8479 LOP(OP_SOCKET,XTERM);
8481 case KEY_socketpair:
8482 LOP(OP_SOCKPAIR,XTERM);
8485 checkcomma(s,PL_tokenbuf,"subroutine name");
8488 s = force_word(s,BAREWORD,TRUE,TRUE);
8492 LOP(OP_SPLIT,XTERM);
8495 LOP(OP_SPRINTF,XTERM);
8498 LOP(OP_SPLICE,XTERM);
8513 LOP(OP_SUBSTR,XTERM);
8517 return yyl_sub(aTHX_ s, key);
8520 LOP(OP_SYSTEM,XREF);
8523 LOP(OP_SYMLINK,XTERM);
8526 LOP(OP_SYSCALL,XTERM);
8529 LOP(OP_SYSOPEN,XTERM);
8532 LOP(OP_SYSSEEK,XTERM);
8535 LOP(OP_SYSREAD,XTERM);
8538 LOP(OP_SYSWRITE,XTERM);
8543 TERM(sublex_start());
8564 LOP(OP_TRUNCATE,XTERM);
8567 pl_yylval.ival = CopLINE(PL_curcop);
8568 Perl_ck_warner_d(aTHX_
8569 packWARN(WARN_EXPERIMENTAL__TRY), "try/catch is experimental");
8582 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8584 pl_yylval.ival = CopLINE(PL_curcop);
8588 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8590 pl_yylval.ival = CopLINE(PL_curcop);
8594 LOP(OP_UNLINK,XTERM);
8600 LOP(OP_UNPACK,XTERM);
8603 LOP(OP_UTIME,XTERM);
8609 LOP(OP_UNSHIFT,XTERM);
8612 s = tokenize_use(1, s);
8622 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8624 pl_yylval.ival = CopLINE(PL_curcop);
8625 Perl_ck_warner_d(aTHX_
8626 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8627 "when is experimental");
8631 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8633 pl_yylval.ival = CopLINE(PL_curcop);
8637 PL_hints |= HINT_BLOCK_SCOPE;
8644 LOP(OP_WAITPID,XTERM);
8650 /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8651 * we use the same number on EBCDIC */
8652 gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8656 if (PL_expect == XOPERATOR) {
8657 if (*s == '=' && !PL_lex_allbrackets
8658 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8665 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
8668 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8670 pl_yylval.ival = OP_XOR;
8676 yyl_key_core(pTHX_ char *s, STRLEN len, struct code c)
8679 I32 orig_keyword = 0;
8683 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8684 if ((*s == ':' && s[1] == ':')
8685 || (!(key = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
8687 Copy(PL_bufptr, PL_tokenbuf, olen, char);
8688 return yyl_just_a_word(aTHX_ d, olen, 0, c);
8691 Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword",
8692 UTF8fARG(UTF, len, PL_tokenbuf));
8695 else if (key == KEY_require || key == KEY_do
8697 /* that's a way to remember we saw "CORE::" */
8700 /* Known to be a reserved word at this point */
8701 return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8705 yyl_keylookup(pTHX_ char *s, GV *gv)
8710 struct code c = no_code;
8711 I32 orig_keyword = 0;
8717 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8719 /* Some keywords can be followed by any delimiter, including ':' */
8720 anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
8722 /* x::* is just a word, unless x is "CORE" */
8723 if (!anydelim && *s == ':' && s[1] == ':') {
8724 if (memEQs(PL_tokenbuf, len, "CORE"))
8725 return yyl_key_core(aTHX_ s, len, c);
8726 return yyl_just_a_word(aTHX_ s, len, 0, c);
8730 while (d < PL_bufend && isSPACE(*d))
8731 d++; /* no comments skipped here, or s### is misparsed */
8733 /* Is this a word before a => operator? */
8734 if (*d == '=' && d[1] == '>') {
8735 return yyl_fatcomma(aTHX_ s, len);
8738 /* Check for plugged-in keyword */
8742 char *saved_bufptr = PL_bufptr;
8744 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
8746 if (result == KEYWORD_PLUGIN_DECLINE) {
8747 /* not a plugged-in keyword */
8748 PL_bufptr = saved_bufptr;
8749 } else if (result == KEYWORD_PLUGIN_STMT) {
8750 pl_yylval.opval = o;
8752 if (!PL_nexttoke) PL_expect = XSTATE;
8753 return REPORT(PLUGSTMT);
8754 } else if (result == KEYWORD_PLUGIN_EXPR) {
8755 pl_yylval.opval = o;
8757 if (!PL_nexttoke) PL_expect = XOPERATOR;
8758 return REPORT(PLUGEXPR);
8760 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", PL_tokenbuf);
8764 /* Is this a label? */
8765 if (!anydelim && PL_expect == XSTATE
8766 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8769 newSVOP(OP_CONST, 0,
8770 newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0));
8775 /* Check for lexical sub */
8776 if (PL_expect != XOPERATOR) {
8777 char tmpbuf[sizeof PL_tokenbuf + 1];
8779 Copy(PL_tokenbuf, tmpbuf+1, len, char);
8780 c.off = pad_findmy_pvn(tmpbuf, len+1, 0);
8781 if (c.off != NOT_IN_PAD) {
8782 assert(c.off); /* we assume this is boolean-true below */
8783 if (PAD_COMPNAME_FLAGS_isOUR(c.off)) {
8784 HV * const stash = PAD_COMPNAME_OURSTASH(c.off);
8785 HEK * const stashname = HvNAME_HEK(stash);
8786 c.sv = newSVhek(stashname);
8787 sv_catpvs(c.sv, "::");
8788 sv_catpvn_flags(c.sv, PL_tokenbuf, len,
8789 (UTF ? SV_CATUTF8 : SV_CATBYTES));
8790 c.gv = gv_fetchsv(c.sv, GV_NOADD_NOINIT | SvUTF8(c.sv),
8796 return yyl_just_a_word(aTHX_ s, len, 0, c);
8800 c.rv2cv_op = newOP(OP_PADANY, 0);
8801 c.rv2cv_op->op_targ = c.off;
8802 c.cv = find_lexical_cv(c.off);
8805 return yyl_just_a_word(aTHX_ s, len, 0, c);
8810 /* Check for built-in keyword */
8811 key = keyword(PL_tokenbuf, len, 0);
8814 key = yyl_secondclass_keyword(aTHX_ s, len, key, &orig_keyword, &c.gv, &c.gvp);
8816 if (key && key != KEY___DATA__ && key != KEY___END__
8817 && (!anydelim || *s != '#')) {
8818 /* no override, and not s### either; skipspace is safe here
8819 * check for => on following line */
8821 STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
8822 STRLEN soff = s - SvPVX(PL_linestr);
8824 arrow = *s == '=' && s[1] == '>';
8825 PL_bufptr = SvPVX(PL_linestr) + bufoff;
8826 s = SvPVX(PL_linestr) + soff;
8828 return yyl_fatcomma(aTHX_ s, len);
8831 return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8835 yyl_try(pTHX_ char *s)
8844 if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s)) {
8845 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
8849 yyl_croak_unrecognised(aTHX_ s);
8853 /* emulate EOF on ^D or ^Z */
8854 if ((tok = yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s)) != YYL_RETRY)
8861 if ((!PL_rsfp || PL_lex_inwhat)
8862 && (!PL_parser->filtered || s+1 < PL_bufend)) {
8866 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
8868 yyerror((const char *)
8870 ? "Format not terminated"
8871 : "Missing right curly or square bracket"));
8874 PerlIO_printf(Perl_debug_log, "### Tokener got EOF\n");
8878 if (s++ < PL_bufend)
8879 goto retry; /* ignore stray nulls */
8882 if (!PL_in_eval && !PL_preambled) {
8883 PL_preambled = TRUE;
8885 /* Generate a string of Perl code to load the debugger.
8886 * If PERL5DB is set, it will return the contents of that,
8887 * otherwise a compile-time require of perl5db.pl. */
8889 const char * const pdb = PerlEnv_getenv("PERL5DB");
8892 sv_setpv(PL_linestr, pdb);
8893 sv_catpvs(PL_linestr,";");
8895 SETERRNO(0,SS_NORMAL);
8896 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
8898 PL_parser->preambling = CopLINE(PL_curcop);
8900 SvPVCLEAR(PL_linestr);
8901 if (PL_preambleav) {
8902 SV **svp = AvARRAY(PL_preambleav);
8903 SV **const end = svp + AvFILLp(PL_preambleav);
8905 sv_catsv(PL_linestr, *svp);
8907 sv_catpvs(PL_linestr, ";");
8909 sv_free(MUTABLE_SV(PL_preambleav));
8910 PL_preambleav = NULL;
8913 sv_catpvs(PL_linestr,
8914 "use feature ':" STRINGIFY(PERL_REVISION) "." STRINGIFY(PERL_VERSION) "';");
8915 if (PL_minus_n || PL_minus_p) {
8916 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
8918 sv_catpvs(PL_linestr,"chomp;");
8921 if ( ( *PL_splitstr == '/'
8922 || *PL_splitstr == '\''
8923 || *PL_splitstr == '"')
8924 && strchr(PL_splitstr + 1, *PL_splitstr))
8926 /* strchr is ok, because -F pattern can't contain
8928 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
8931 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
8932 bytes can be used as quoting characters. :-) */
8933 const char *splits = PL_splitstr;
8934 sv_catpvs(PL_linestr, "our @F=split(q\0");
8937 if (*splits == '\\')
8938 sv_catpvn(PL_linestr, splits, 1);
8939 sv_catpvn(PL_linestr, splits, 1);
8940 } while (*splits++);
8941 /* This loop will embed the trailing NUL of
8942 PL_linestr as the last thing it does before
8944 sv_catpvs(PL_linestr, ");");
8948 sv_catpvs(PL_linestr,"our @F=split(' ');");
8951 sv_catpvs(PL_linestr, "\n");
8952 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
8953 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
8954 PL_last_lop = PL_last_uni = NULL;
8955 if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
8956 update_debugger_info(PL_linestr, NULL, 0);
8959 if ((tok = yyl_fake_eof(aTHX_ 0, cBOOL(PL_rsfp), s)) != YYL_RETRY)
8964 #ifdef PERL_STRICT_CR
8965 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
8967 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
8969 case ' ': case '\t': case '\f': case '\v':
8975 const bool needs_semicolon = yyl_eol_needs_semicolon(aTHX_ &s);
8976 if (needs_semicolon)
8977 TOKEN(PERLY_SEMICOLON);
8983 return yyl_hyphen(aTHX_ s);
8986 return yyl_plus(aTHX_ s);
8989 return yyl_star(aTHX_ s);
8992 return yyl_percent(aTHX_ s);
8995 return yyl_caret(aTHX_ s);
8998 return yyl_leftsquare(aTHX_ s);
9001 return yyl_tilde(aTHX_ s);
9004 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
9007 OPERATOR(PERLY_COMMA);
9010 return yyl_just_a_word(aTHX_ s, 0, 0, no_code);
9011 return yyl_colon(aTHX_ s + 1);
9014 return yyl_leftparen(aTHX_ s + 1);
9017 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
9022 TOKEN(PERLY_SEMICOLON);
9025 return yyl_rightparen(aTHX_ s);
9028 return yyl_rightsquare(aTHX_ s);
9031 return yyl_leftcurly(aTHX_ s + 1, 0);
9034 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
9036 return yyl_rightcurly(aTHX_ s, 0);
9039 return yyl_ampersand(aTHX_ s);
9042 return yyl_verticalbar(aTHX_ s);
9045 if (s[1] == '=' && (s == PL_linestart || s[-1] == '\n')
9046 && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "====="))
9048 s = vcs_conflict_marker(s + 7);
9054 const char tmp = *s++;
9056 if (!PL_lex_allbrackets
9057 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
9065 if (!PL_lex_allbrackets
9066 && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
9071 OPERATOR(PERLY_COMMA);
9075 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
9076 && memCHRs("+-*/%.^&|<",tmp))
9077 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9078 "Reversed %c= operator",(int)tmp);
9080 if (PL_expect == XSTATE
9082 && (s == PL_linestart+1 || s[-2] == '\n') )
9084 if ( (PL_in_eval && !PL_rsfp && !PL_parser->filtered)
9085 || PL_lex_state != LEX_NORMAL)
9090 incline(s, PL_bufend);
9091 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut"))
9093 s = (char *) memchr(s,'\n', d - s);
9098 incline(s, PL_bufend);
9106 PL_parser->in_pod = 1;
9110 if (PL_expect == XBLOCK) {
9112 #ifdef PERL_STRICT_CR
9113 while (SPACE_OR_TAB(*t))
9115 while (SPACE_OR_TAB(*t) || *t == '\r')
9118 if (*t == '\n' || *t == '#') {
9119 ENTER_with_name("lex_format");
9120 SAVEI8(PL_parser->form_lex_state);
9121 SAVEI32(PL_lex_formbrack);
9122 PL_parser->form_lex_state = PL_lex_state;
9123 PL_lex_formbrack = PL_lex_brackets + 1;
9124 PL_parser->sub_error_count = PL_error_count;
9125 return yyl_leftcurly(aTHX_ s, 1);
9128 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
9136 return yyl_bang(aTHX_ s + 1);
9139 if (s[1] == '<' && (s == PL_linestart || s[-1] == '\n')
9140 && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "<<<<<"))
9142 s = vcs_conflict_marker(s + 7);
9145 return yyl_leftpointy(aTHX_ s);
9148 if (s[1] == '>' && (s == PL_linestart || s[-1] == '\n')
9149 && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), ">>>>>"))
9151 s = vcs_conflict_marker(s + 7);
9154 return yyl_rightpointy(aTHX_ s + 1);
9157 return yyl_dollar(aTHX_ s);
9160 return yyl_snail(aTHX_ s);
9162 case '/': /* may be division, defined-or, or pattern */
9163 return yyl_slash(aTHX_ s);
9165 case '?': /* conditional */
9167 if (!PL_lex_allbrackets
9168 && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
9173 PL_lex_allbrackets++;
9174 OPERATOR(PERLY_QUESTION_MARK);
9177 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
9178 #ifdef PERL_STRICT_CR
9181 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
9183 && (s == PL_linestart || s[-1] == '\n') )
9186 /* formbrack==2 means dot seen where arguments expected */
9187 return yyl_rightcurly(aTHX_ s, 2);
9189 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
9193 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
9196 if (!PL_lex_allbrackets
9197 && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
9205 pl_yylval.ival = OPf_SPECIAL;
9211 if (*s == '=' && !PL_lex_allbrackets
9212 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
9220 case '0': case '1': case '2': case '3': case '4':
9221 case '5': case '6': case '7': case '8': case '9':
9222 s = scan_num(s, &pl_yylval);
9223 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
9224 if (PL_expect == XOPERATOR)
9229 return yyl_sglquote(aTHX_ s);
9232 return yyl_dblquote(aTHX_ s);
9235 return yyl_backtick(aTHX_ s);
9238 return yyl_backslash(aTHX_ s + 1);
9241 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
9242 char *start = s + 2;
9243 while (isDIGIT(*start) || *start == '_')
9245 if (*start == '.' && isDIGIT(start[1])) {
9246 s = scan_num(s, &pl_yylval);
9249 else if ((*start == ':' && start[1] == ':')
9250 || (PL_expect == XSTATE && *start == ':')) {
9251 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9255 else if (PL_expect == XSTATE) {
9257 while (d < PL_bufend && isSPACE(*d)) d++;
9259 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9264 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
9265 if (!isALPHA(*start) && (PL_expect == XTERM
9266 || PL_expect == XREF || PL_expect == XSTATE
9267 || PL_expect == XTERMORDORDOR)) {
9268 GV *const gv = gv_fetchpvn_flags(s, start - s,
9269 UTF ? SVf_UTF8 : 0, SVt_PVCV);
9271 s = scan_num(s, &pl_yylval);
9276 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9281 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
9285 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9316 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9326 Works out what to call the token just pulled out of the input
9327 stream. The yacc parser takes care of taking the ops we return and
9328 stitching them into a tree.
9331 The type of the next token
9334 Check if we have already built the token; if so, use it.
9335 Switch based on the current state:
9336 - if we have a case modifier in a string, deal with that
9337 - handle other cases of interpolation inside a string
9338 - scan the next line if we are inside a format
9339 In the normal state, switch on the next character:
9341 if alphabetic, go to key lookup
9342 unrecognized character - croak
9343 - 0/4/26: handle end-of-line or EOF
9344 - cases for whitespace
9345 - \n and #: handle comments and line numbers
9346 - various operators, brackets and sigils
9349 - 'v': vstrings (or go to key lookup)
9350 - 'x' repetition operator (or go to key lookup)
9351 - other ASCII alphanumerics (key lookup begins here):
9354 scan built-in keyword (but do nothing with it yet)
9355 check for statement label
9356 check for lexical subs
9357 return yyl_just_a_word if there is one
9358 see whether built-in keyword is overridden
9359 switch on keyword number:
9360 - default: return yyl_just_a_word:
9361 not a built-in keyword; handle bareword lookup
9362 disambiguate between method and sub call
9363 fall back to bareword
9364 - cases for built-in keywords
9370 char *s = PL_bufptr;
9372 if (UNLIKELY(PL_parser->recheck_utf8_validity)) {
9373 const U8* first_bad_char_loc;
9374 if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr,
9375 PL_bufend - PL_bufptr,
9376 &first_bad_char_loc)))
9378 _force_out_malformed_utf8_message(first_bad_char_loc,
9381 1 /* 1 means die */ );
9382 NOT_REACHED; /* NOTREACHED */
9384 PL_parser->recheck_utf8_validity = FALSE;
9387 SV* tmp = newSVpvs("");
9388 PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n",
9389 (IV)CopLINE(PL_curcop),
9390 lex_state_names[PL_lex_state],
9391 exp_name[PL_expect],
9392 pv_display(tmp, s, strlen(s), 0, 60));
9396 /* when we've already built the next token, just pull it out of the queue */
9399 pl_yylval = PL_nextval[PL_nexttoke];
9402 next_type = PL_nexttype[PL_nexttoke];
9403 if (next_type & (7<<24)) {
9404 if (next_type & (1<<24)) {
9405 if (PL_lex_brackets > 100)
9406 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
9407 PL_lex_brackstack[PL_lex_brackets++] =
9408 (char) ((U8) (next_type >> 16));
9410 if (next_type & (2<<24))
9411 PL_lex_allbrackets++;
9412 if (next_type & (4<<24))
9413 PL_lex_allbrackets--;
9414 next_type &= 0xffff;
9416 return REPORT(next_type == 'p' ? pending_ident() : next_type);
9420 switch (PL_lex_state) {
9422 case LEX_INTERPNORMAL:
9425 /* interpolated case modifiers like \L \U, including \Q and \E.
9426 when we get here, PL_bufptr is at the \
9428 case LEX_INTERPCASEMOD:
9429 /* handle \E or end of string */
9430 return yyl_interpcasemod(aTHX_ s);
9432 case LEX_INTERPPUSH:
9433 return REPORT(sublex_push());
9435 case LEX_INTERPSTART:
9436 if (PL_bufptr == PL_bufend)
9437 return REPORT(sublex_done());
9439 if(*PL_bufptr != '(')
9440 PerlIO_printf(Perl_debug_log, "### Interpolated variable\n");
9443 /* for /@a/, we leave the joining for the regex engine to do
9444 * (unless we're within \Q etc) */
9445 PL_lex_dojoin = (*PL_bufptr == '@'
9446 && (!PL_lex_inpat || PL_lex_casemods));
9447 PL_lex_state = LEX_INTERPNORMAL;
9448 if (PL_lex_dojoin) {
9449 NEXTVAL_NEXTTOKE.ival = 0;
9450 force_next(PERLY_COMMA);
9451 force_ident("\"", PERLY_DOLLAR);
9452 NEXTVAL_NEXTTOKE.ival = 0;
9453 force_next(PERLY_DOLLAR);
9454 NEXTVAL_NEXTTOKE.ival = 0;
9455 force_next((2<<24)|PERLY_PAREN_OPEN);
9456 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
9459 /* Convert (?{...}) and friends to 'do {...}' */
9460 if (PL_lex_inpat && *PL_bufptr == '(') {
9461 PL_parser->lex_shared->re_eval_start = PL_bufptr;
9463 if (*PL_bufptr != '{')
9465 PL_expect = XTERMBLOCK;
9469 if (PL_lex_starts++) {
9471 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9472 if (!PL_lex_casemods && PL_lex_inpat)
9475 AopNOASSIGN(OP_CONCAT);
9479 case LEX_INTERPENDMAYBE:
9480 if (intuit_more(PL_bufptr, PL_bufend)) {
9481 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
9487 if (PL_lex_dojoin) {
9488 const U8 dojoin_was = PL_lex_dojoin;
9489 PL_lex_dojoin = FALSE;
9490 PL_lex_state = LEX_INTERPCONCAT;
9491 PL_lex_allbrackets--;
9492 return REPORT(dojoin_was == 1 ? (int)PERLY_PAREN_CLOSE : (int)POSTJOIN);
9494 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
9495 && SvEVALED(PL_lex_repl))
9497 if (PL_bufptr != PL_bufend)
9498 Perl_croak(aTHX_ "Bad evalled substitution pattern");
9501 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
9502 re_eval_str. If the here-doc body’s length equals the previous
9503 value of re_eval_start, re_eval_start will now be null. So
9504 check re_eval_str as well. */
9505 if (PL_parser->lex_shared->re_eval_start
9506 || PL_parser->lex_shared->re_eval_str) {
9508 if (*PL_bufptr != ')')
9509 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
9511 /* having compiled a (?{..}) expression, return the original
9512 * text too, as a const */
9513 if (PL_parser->lex_shared->re_eval_str) {
9514 sv = PL_parser->lex_shared->re_eval_str;
9515 PL_parser->lex_shared->re_eval_str = NULL;
9517 PL_bufptr - PL_parser->lex_shared->re_eval_start);
9518 SvPV_shrink_to_cur(sv);
9520 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
9521 PL_bufptr - PL_parser->lex_shared->re_eval_start);
9522 NEXTVAL_NEXTTOKE.opval =
9523 newSVOP(OP_CONST, 0,
9526 PL_parser->lex_shared->re_eval_start = NULL;
9528 return REPORT(PERLY_COMMA);
9532 case LEX_INTERPCONCAT:
9534 if (PL_lex_brackets)
9535 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
9536 (long) PL_lex_brackets);
9538 if (PL_bufptr == PL_bufend)
9539 return REPORT(sublex_done());
9541 /* m'foo' still needs to be parsed for possible (?{...}) */
9542 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
9543 SV *sv = newSVsv(PL_linestr);
9545 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
9549 int save_error_count = PL_error_count;
9551 s = scan_const(PL_bufptr);
9553 /* Set flag if this was a pattern and there were errors. op.c will
9554 * refuse to compile a pattern with this flag set. Otherwise, we
9555 * could get segfaults, etc. */
9556 if (PL_lex_inpat && PL_error_count > save_error_count) {
9557 ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR;
9560 PL_lex_state = LEX_INTERPCASEMOD;
9562 PL_lex_state = LEX_INTERPSTART;
9565 if (s != PL_bufptr) {
9566 NEXTVAL_NEXTTOKE = pl_yylval;
9569 if (PL_lex_starts++) {
9570 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9571 if (!PL_lex_casemods && PL_lex_inpat)
9574 AopNOASSIGN(OP_CONCAT);
9584 if (PL_parser->sub_error_count != PL_error_count) {
9585 /* There was an error parsing a formline, which tends to
9587 Unlike interpolated sub-parsing, we can't treat any of
9588 these as recoverable, so no need to check sub_no_recover.
9592 assert(PL_lex_formbrack);
9593 s = scan_formline(PL_bufptr);
9594 if (!PL_lex_formbrack)
9595 return yyl_rightcurly(aTHX_ s, 1);
9600 /* We really do *not* want PL_linestr ever becoming a COW. */
9601 assert (!SvIsCOW(PL_linestr));
9603 PL_oldoldbufptr = PL_oldbufptr;
9606 if (PL_in_my == KEY_sigvar) {
9607 PL_parser->saw_infix_sigil = 0;
9608 return yyl_sigvar(aTHX_ s);
9612 /* yyl_try() and its callees might consult PL_parser->saw_infix_sigil.
9613 On its return, we then need to set it to indicate whether the token
9614 we just encountered was an infix operator that (if we hadn't been
9615 expecting an operator) have been a sigil.
9617 bool expected_operator = (PL_expect == XOPERATOR);
9618 int ret = yyl_try(aTHX_ s);
9619 switch (pl_yylval.ival) {
9624 if (expected_operator) {
9625 PL_parser->saw_infix_sigil = 1;
9630 PL_parser->saw_infix_sigil = 0;
9640 Looks up an identifier in the pad or in a package
9642 PL_in_my == KEY_sigvar indicates that this is a subroutine signature variable
9643 rather than a plain pad var.
9646 PRIVATEREF if this is a lexical name.
9647 BAREWORD if this belongs to a package.
9650 if we're in a my declaration
9651 croak if they tried to say my($foo::bar)
9652 build the ops for a my() declaration
9653 if it's an access to a my() variable
9654 build ops for access to a my() variable
9655 if in a dq string, and they've said @foo and we can't find @foo
9657 build ops for a bareword
9661 S_pending_ident(pTHX)
9664 const char pit = (char)pl_yylval.ival;
9665 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
9666 /* All routes through this function want to know if there is a colon. */
9667 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
9669 DEBUG_T({ PerlIO_printf(Perl_debug_log,
9670 "### Pending identifier '%s'\n", PL_tokenbuf); });
9671 assert(tokenbuf_len >= 2);
9673 /* if we're in a my(), we can't allow dynamics here.
9674 $foo'bar has already been turned into $foo::bar, so
9675 just check for colons.
9677 if it's a legal name, the OP is a PADANY.
9680 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9682 /* diag_listed_as: No package name allowed for variable %s
9684 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
9686 *PL_tokenbuf=='&' ? "subroutine" : "variable",
9687 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
9688 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
9693 /* "my" variable %s can't be in a package */
9694 /* PL_no_myglob is constant */
9695 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
9696 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
9697 PL_in_my == KEY_my ? "my" : "state",
9698 *PL_tokenbuf == '&' ? "subroutine" : "variable",
9700 UTF ? SVf_UTF8 : 0);
9701 GCC_DIAG_RESTORE_STMT;
9704 if (PL_in_my == KEY_sigvar) {
9705 /* A signature 'padop' needs in addition, an op_first to
9706 * point to a child sigdefelem, and an extra field to hold
9707 * the signature index. We can achieve both by using an
9708 * UNOP_AUX and (ab)using the op_aux field to hold the
9709 * index. If we ever need more fields, use a real malloced
9710 * aux strut instead.
9712 o = newUNOP_AUX(OP_ARGELEM, 0, NULL,
9713 INT2PTR(UNOP_AUX_item *,
9714 (PL_parser->sig_elems)));
9715 o->op_private |= ( PL_tokenbuf[0] == '$' ? OPpARGELEM_SV
9716 : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV
9720 o = newOP(OP_PADANY, 0);
9721 o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
9722 UTF ? SVf_UTF8 : 0);
9723 if (PL_in_my == KEY_sigvar)
9726 pl_yylval.opval = o;
9732 build the ops for accesses to a my() variable.
9737 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9739 if (tmp != NOT_IN_PAD) {
9740 /* might be an "our" variable" */
9741 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9742 /* build ops for a bareword */
9743 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9744 HEK * const stashname = HvNAME_HEK(stash);
9745 SV * const sym = newSVhek(stashname);
9746 sv_catpvs(sym, "::");
9747 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9748 pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
9749 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9753 ((PL_tokenbuf[0] == '$') ? SVt_PV
9754 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9759 pl_yylval.opval = newOP(OP_PADANY, 0);
9760 pl_yylval.opval->op_targ = tmp;
9766 Whine if they've said @foo or @foo{key} in a doublequoted string,
9767 and @foo (or %foo) isn't a variable we can find in the symbol
9770 if (ckWARN(WARN_AMBIGUOUS)
9772 && PL_lex_state != LEX_NORMAL
9773 && !PL_lex_brackets)
9775 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9776 ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
9778 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9781 /* Downgraded from fatal to warning 20000522 mjd */
9782 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9783 "Possible unintended interpolation of %" UTF8f
9785 UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9789 /* build ops for a bareword */
9790 pl_yylval.opval = newSVOP(OP_CONST, 0,
9791 newSVpvn_flags(PL_tokenbuf + 1,
9792 tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9793 UTF ? SVf_UTF8 : 0 ));
9794 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9796 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9797 (PL_in_eval ? GV_ADDMULTI : GV_ADD)
9798 | ( UTF ? SVf_UTF8 : 0 ),
9799 ((PL_tokenbuf[0] == '$') ? SVt_PV
9800 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9806 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9808 PERL_ARGS_ASSERT_CHECKCOMMA;
9810 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
9811 if (ckWARN(WARN_SYNTAX)) {
9814 for (w = s+2; *w && level; w++) {
9822 /* the list of chars below is for end of statements or
9823 * block / parens, boolean operators (&&, ||, //) and branch
9824 * constructs (or, and, if, until, unless, while, err, for).
9825 * Not a very solid hack... */
9826 if (!*w || !memCHRs(";&/|})]oaiuwef!=", *w))
9827 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9828 "%s (...) interpreted as function",name);
9831 while (s < PL_bufend && isSPACE(*s))
9835 while (s < PL_bufend && isSPACE(*s))
9837 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
9838 const char * const w = s;
9839 s += UTF ? UTF8SKIP(s) : 1;
9840 while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
9841 s += UTF ? UTF8SKIP(s) : 1;
9842 while (s < PL_bufend && isSPACE(*s))
9846 if (keyword(w, s - w, 0))
9849 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9850 if (gv && GvCVu(gv))
9855 Copy(w, tmpbuf+1, s - w, char);
9857 off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
9858 if (off != NOT_IN_PAD) return;
9860 Perl_croak(aTHX_ "No comma allowed after %s", what);
9865 /* S_new_constant(): do any overload::constant lookup.
9867 Either returns sv, or mortalizes/frees sv and returns a new SV*.
9868 Best used as sv=new_constant(..., sv, ...).
9869 If s, pv are NULL, calls subroutine with one argument,
9870 and <type> is used with error messages only.
9871 <type> is assumed to be well formed UTF-8.
9873 If error_msg is not NULL, *error_msg will be set to any error encountered.
9874 Otherwise yyerror() will be used to output it */
9877 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9878 SV *sv, SV *pv, const char *type, STRLEN typelen,
9879 const char ** error_msg)
9882 HV * table = GvHV(PL_hintgv); /* ^H */
9887 const char *why1 = "", *why2 = "", *why3 = "";
9888 const char * optional_colon = ":"; /* Only some messages have a colon */
9891 PERL_ARGS_ASSERT_NEW_CONSTANT;
9892 /* We assume that this is true: */
9895 sv_2mortal(sv); /* Parent created it permanently */
9898 || ! (PL_hints & HINT_LOCALIZE_HH))
9901 optional_colon = "";
9905 cvp = hv_fetch(table, key, keylen, FALSE);
9906 if (!cvp || !SvOK(*cvp)) {
9909 why3 = "} is not defined";
9915 pv = newSVpvn_flags(s, len, SVs_TEMP);
9917 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9919 typesv = &PL_sv_undef;
9921 PUSHSTACKi(PERLSI_OVERLOAD);
9933 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9937 /* Check the eval first */
9938 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9940 const char * errstr;
9941 sv_catpvs(errsv, "Propagated");
9942 errstr = SvPV_const(errsv, errlen);
9943 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9945 res = SvREFCNT_inc_simple_NN(sv);
9949 SvREFCNT_inc_simple_void_NN(res);
9962 (void)sv_2mortal(sv);
9964 why1 = "Call to &{$^H{";
9966 why3 = "}} did not return a defined value";
9970 msg = Perl_form(aTHX_ "Constant(%.*s)%s %s%s%s",
9971 (int)(type ? typelen : len),
9979 yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9981 return SvREFCNT_inc_simple_NN(sv);
9984 PERL_STATIC_INLINE void
9985 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
9986 bool is_utf8, bool check_dollar, bool tick_warn)
9989 const char *olds = *s;
9990 PERL_ARGS_ASSERT_PARSE_IDENT;
9992 while (*s < PL_bufend) {
9994 Perl_croak(aTHX_ "%s", ident_too_long);
9995 if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) {
9996 /* The UTF-8 case must come first, otherwise things
9997 * like c\N{COMBINING TILDE} would start failing, as the
9998 * isWORDCHAR_A case below would gobble the 'c' up.
10001 char *t = *s + UTF8SKIP(*s);
10002 while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
10005 if (*d + (t - *s) > e)
10006 Perl_croak(aTHX_ "%s", ident_too_long);
10007 Copy(*s, *d, t - *s, char);
10011 else if ( isWORDCHAR_A(**s) ) {
10014 } while (isWORDCHAR_A(**s) && *d < e);
10016 else if ( allow_package
10018 && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8))
10025 else if (allow_package && **s == ':' && (*s)[1] == ':'
10026 /* Disallow things like Foo::$bar. For the curious, this is
10027 * the code path that triggers the "Bad name after" warning
10028 * when looking for barewords.
10030 && !(check_dollar && (*s)[2] == '$')) {
10037 if (UNLIKELY(tick_warn && saw_tick && PL_lex_state == LEX_INTERPNORMAL
10038 && !PL_lex_brackets && ckWARN(WARN_SYNTAX))) {
10041 Newx(this_d, *s - olds + saw_tick + 2, char); /* +2 for $# */
10043 SAVEFREEPV(this_d);
10044 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10045 "Old package separator used in string");
10046 if (olds[-1] == '#')
10049 while (olds < *s) {
10050 if (*olds == '\'') {
10057 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10058 "\t(Did you mean \"%" UTF8f "\" instead?)\n",
10059 UTF8fARG(is_utf8, d2-this_d, this_d));
10064 /* Returns a NUL terminated string, with the length of the string written to
10068 Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
10071 char * const e = d + destlen - 3; /* two-character token, ending NUL */
10072 bool is_utf8 = cBOOL(UTF);
10074 PERL_ARGS_ASSERT_SCAN_WORD;
10076 parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, FALSE);
10082 /* Is the byte 'd' a legal single character identifier name? 'u' is true
10083 * iff Unicode semantics are to be used. The legal ones are any of:
10084 * a) all ASCII characters except:
10085 * 1) control and space-type ones, like NUL, SOH, \t, and SPACE;
10087 * The final case currently doesn't get this far in the program, so we
10088 * don't test for it. If that were to change, it would be ok to allow it.
10089 * b) When not under Unicode rules, any upper Latin1 character
10090 * c) Otherwise, when unicode rules are used, all XIDS characters.
10092 * Because all ASCII characters have the same representation whether
10093 * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
10094 * '{' without knowing if is UTF-8 or not. */
10095 #define VALID_LEN_ONE_IDENT(s, e, is_utf8) \
10096 (isGRAPH_A(*(s)) || ((is_utf8) \
10097 ? isIDFIRST_utf8_safe(s, e) \
10098 : (isGRAPH_L1(*s) \
10099 && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
10102 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
10104 I32 herelines = PL_parser->herelines;
10105 SSize_t bracket = -1;
10108 char * const e = d + destlen - 3; /* two-character token, ending NUL */
10109 bool is_utf8 = cBOOL(UTF);
10110 I32 orig_copline = 0, tmp_copline = 0;
10112 PERL_ARGS_ASSERT_SCAN_IDENT;
10114 if (isSPACE(*s) || !*s)
10116 if (isDIGIT(*s)) { /* handle $0 and $1 $2 and $10 and etc */
10117 bool is_zero= *s == '0' ? TRUE : FALSE;
10118 char *digit_start= d;
10120 while (s < PL_bufend && isDIGIT(*s)) {
10122 Perl_croak(aTHX_ "%s", ident_too_long);
10125 if (is_zero && d - digit_start > 1)
10126 Perl_croak(aTHX_ ident_var_zero_multi_digit);
10128 else { /* See if it is a "normal" identifier */
10129 parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE);
10134 /* Either a digit variable, or parse_ident() found an identifier
10135 (anything valid as a bareword), so job done and return. */
10136 if (PL_lex_state != LEX_NORMAL)
10137 PL_lex_state = LEX_INTERPENDMAYBE;
10141 /* Here, it is not a run-of-the-mill identifier name */
10143 if (*s == '$' && s[1]
10144 && ( isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8)
10145 || isDIGIT_A((U8)s[1])
10148 || memBEGINs(s+1, (STRLEN) (PL_bufend - (s+1)), "::")) )
10150 /* Dereferencing a value in a scalar variable.
10151 The alternatives are different syntaxes for a scalar variable.
10152 Using ' as a leading package separator isn't allowed. :: is. */
10155 /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */
10157 bracket = s - SvPVX(PL_linestr);
10159 orig_copline = CopLINE(PL_curcop);
10160 if (s < PL_bufend && isSPACE(*s)) {
10164 if ((s <= PL_bufend - ((is_utf8)
10167 && VALID_LEN_ONE_IDENT(s, PL_bufend, is_utf8))
10170 const STRLEN skip = UTF8SKIP(s);
10173 for ( i = 0; i < skip; i++ )
10178 /* special case to handle ${10}, ${11} the same way we handle ${1} etc */
10180 bool is_zero= *d == '0' ? TRUE : FALSE;
10181 char *digit_start= d;
10182 while (s < PL_bufend && isDIGIT(*s)) {
10185 Perl_croak(aTHX_ "%s", ident_too_long);
10188 if (is_zero && d - digit_start > 1)
10189 Perl_croak(aTHX_ ident_var_zero_multi_digit);
10194 /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
10195 if (*d == '^' && *s && isCONTROLVAR(*s)) {
10199 /* Warn about ambiguous code after unary operators if {...} notation isn't
10200 used. There's no difference in ambiguity; it's merely a heuristic
10201 about when not to warn. */
10202 else if (ck_uni && bracket == -1)
10204 if (bracket != -1) {
10207 /* If we were processing {...} notation then... */
10208 if (isIDFIRST_lazy_if_safe(d, e, is_utf8)
10209 || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
10212 /* note we have to check for a normal identifier first,
10213 * as it handles utf8 symbols, and only after that has
10214 * been ruled out can we look at the caret words */
10215 if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) {
10216 /* if it starts as a valid identifier, assume that it is one.
10217 (the later check for } being at the expected point will trap
10218 cases where this doesn't pan out.) */
10219 d += is_utf8 ? UTF8SKIP(d) : 1;
10220 parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE);
10223 else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
10225 while (isWORDCHAR(*s) && d < e) {
10229 Perl_croak(aTHX_ "%s", ident_too_long);
10232 tmp_copline = CopLINE(PL_curcop);
10233 if (s < PL_bufend && isSPACE(*s)) {
10236 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10237 /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation. */
10238 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10239 const char * const brack =
10241 ((*s == '[') ? "[...]" : "{...}");
10242 orig_copline = CopLINE(PL_curcop);
10243 CopLINE_set(PL_curcop, tmp_copline);
10244 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
10245 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10246 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10247 funny, dest, brack, funny, dest, brack);
10248 CopLINE_set(PL_curcop, orig_copline);
10251 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10252 PL_lex_allbrackets++;
10257 if ( !tmp_copline )
10258 tmp_copline = CopLINE(PL_curcop);
10259 if ((skip = s < PL_bufend && isSPACE(*s))) {
10260 /* Avoid incrementing line numbers or resetting PL_linestart,
10261 in case we have to back up. */
10262 STRLEN s_off = s - SvPVX(PL_linestr);
10264 s = SvPVX(PL_linestr) + s_off;
10269 /* Expect to find a closing } after consuming any trailing whitespace.
10272 /* Now increment line numbers if applicable. */
10276 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10277 PL_lex_state = LEX_INTERPEND;
10280 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) {
10281 if (ckWARN(WARN_AMBIGUOUS)
10282 && (keyword(dest, d - dest, 0)
10283 || get_cvn_flags(dest, d - dest, is_utf8
10287 SV *tmp = newSVpvn_flags( dest, d - dest,
10288 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
10291 orig_copline = CopLINE(PL_curcop);
10292 CopLINE_set(PL_curcop, tmp_copline);
10293 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10294 "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
10295 funny, SVfARG(tmp), funny, SVfARG(tmp));
10296 CopLINE_set(PL_curcop, orig_copline);
10301 /* Didn't find the closing } at the point we expected, so restore
10302 state such that the next thing to process is the opening { and */
10303 s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
10304 CopLINE_set(PL_curcop, orig_copline);
10305 PL_parser->herelines = herelines;
10307 PL_parser->sub_no_recover = TRUE;
10310 else if ( PL_lex_state == LEX_INTERPNORMAL
10311 && !PL_lex_brackets
10312 && !intuit_more(s, PL_bufend))
10313 PL_lex_state = LEX_INTERPEND;
10318 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
10320 /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
10321 * found in the parse starting at 's', based on the subset that are valid
10322 * in this context input to this routine in 'valid_flags'. Advances s.
10323 * Returns TRUE if the input should be treated as a valid flag, so the next
10324 * char may be as well; otherwise FALSE. 'charset' should point to a NUL
10325 * upon first call on the current regex. This routine will set it to any
10326 * charset modifier found. The caller shouldn't change it. This way,
10327 * another charset modifier encountered in the parse can be detected as an
10328 * error, as we have decided to allow only one */
10330 const char c = **s;
10331 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
10333 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
10334 if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) {
10335 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
10336 UTF ? SVf_UTF8 : 0);
10338 /* Pretend that it worked, so will continue processing before
10347 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
10348 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
10349 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
10350 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
10351 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
10352 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
10353 case LOCALE_PAT_MOD:
10355 goto multiple_charsets;
10357 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
10360 case UNICODE_PAT_MOD:
10362 goto multiple_charsets;
10364 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
10367 case ASCII_RESTRICT_PAT_MOD:
10369 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
10373 /* Error if previous modifier wasn't an 'a', but if it was, see
10374 * if, and accept, a second occurrence (only) */
10375 if (*charset != 'a'
10376 || get_regex_charset(*pmfl)
10377 != REGEX_ASCII_RESTRICTED_CHARSET)
10379 goto multiple_charsets;
10381 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
10385 case DEPENDS_PAT_MOD:
10387 goto multiple_charsets;
10389 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
10398 if (*charset != c) {
10399 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
10401 else if (c == 'a') {
10402 /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
10403 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
10406 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
10409 /* Pretend that it worked, so will continue processing before dieing */
10415 S_scan_pat(pTHX_ char *start, I32 type)
10419 const char * const valid_flags =
10420 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
10421 char charset = '\0'; /* character set modifier */
10422 unsigned int x_mod_count = 0;
10424 PERL_ARGS_ASSERT_SCAN_PAT;
10426 s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
10428 Perl_croak(aTHX_ "Search pattern not terminated");
10430 pm = (PMOP*)newPMOP(type, 0);
10431 if (PL_multi_open == '?') {
10432 /* This is the only point in the code that sets PMf_ONCE: */
10433 pm->op_pmflags |= PMf_ONCE;
10435 /* Hence it's safe to do this bit of PMOP book-keeping here, which
10436 allows us to restrict the list needed by reset to just the ??
10438 assert(type != OP_TRANS);
10440 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
10443 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
10446 elements = mg->mg_len / sizeof(PMOP**);
10447 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
10448 ((PMOP**)mg->mg_ptr) [elements++] = pm;
10449 mg->mg_len = elements * sizeof(PMOP**);
10450 PmopSTASH_set(pm,PL_curstash);
10454 /* if qr/...(?{..}).../, then need to parse the pattern within a new
10455 * anon CV. False positives like qr/[(?{]/ are harmless */
10457 if (type == OP_QR) {
10459 char *e, *p = SvPV(PL_lex_stuff, len);
10461 for (; p < e; p++) {
10462 if (p[0] == '(' && p[1] == '?'
10463 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
10465 pm->op_pmflags |= PMf_HAS_CV;
10469 pm->op_pmflags |= PMf_IS_QR;
10472 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
10473 &s, &charset, &x_mod_count))
10475 /* issue a warning if /c is specified,but /g is not */
10476 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
10478 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
10479 "Use of /c modifier is meaningless without /g" );
10482 PL_lex_op = (OP*)pm;
10483 pl_yylval.ival = OP_MATCH;
10488 S_scan_subst(pTHX_ char *start)
10494 line_t linediff = 0;
10496 char charset = '\0'; /* character set modifier */
10497 unsigned int x_mod_count = 0;
10500 PERL_ARGS_ASSERT_SCAN_SUBST;
10502 pl_yylval.ival = OP_NULL;
10504 s = scan_str(start, TRUE, FALSE, FALSE, &t);
10507 Perl_croak(aTHX_ "Substitution pattern not terminated");
10511 first_start = PL_multi_start;
10512 first_line = CopLINE(PL_curcop);
10513 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10515 SvREFCNT_dec_NN(PL_lex_stuff);
10516 PL_lex_stuff = NULL;
10517 Perl_croak(aTHX_ "Substitution replacement not terminated");
10519 PL_multi_start = first_start; /* so whole substitution is taken together */
10521 pm = (PMOP*)newPMOP(OP_SUBST, 0);
10525 if (*s == EXEC_PAT_MOD) {
10529 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
10530 &s, &charset, &x_mod_count))
10536 if ((pm->op_pmflags & PMf_CONTINUE)) {
10537 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
10541 SV * const repl = newSVpvs("");
10544 pm->op_pmflags |= PMf_EVAL;
10545 for (; es > 1; es--) {
10546 sv_catpvs(repl, "eval ");
10548 sv_catpvs(repl, "do {");
10549 sv_catsv(repl, PL_parser->lex_sub_repl);
10550 sv_catpvs(repl, "}");
10551 SvREFCNT_dec(PL_parser->lex_sub_repl);
10552 PL_parser->lex_sub_repl = repl;
10556 linediff = CopLINE(PL_curcop) - first_line;
10558 CopLINE_set(PL_curcop, first_line);
10560 if (linediff || es) {
10561 /* the IVX field indicates that the replacement string is a s///e;
10562 * the NVX field indicates how many src code lines the replacement
10564 sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
10565 ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = linediff;
10566 ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
10570 PL_lex_op = (OP*)pm;
10571 pl_yylval.ival = OP_SUBST;
10576 S_scan_trans(pTHX_ char *start)
10583 bool nondestruct = 0;
10586 PERL_ARGS_ASSERT_SCAN_TRANS;
10588 pl_yylval.ival = OP_NULL;
10590 s = scan_str(start,FALSE,FALSE,FALSE,&t);
10592 Perl_croak(aTHX_ "Transliteration pattern not terminated");
10596 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10598 SvREFCNT_dec_NN(PL_lex_stuff);
10599 PL_lex_stuff = NULL;
10600 Perl_croak(aTHX_ "Transliteration replacement not terminated");
10603 complement = del = squash = 0;
10607 complement = OPpTRANS_COMPLEMENT;
10610 del = OPpTRANS_DELETE;
10613 squash = OPpTRANS_SQUASH;
10625 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
10626 o->op_private &= ~OPpTRANS_ALL;
10627 o->op_private |= del|squash|complement;
10630 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
10637 Takes a pointer to the first < in <<FOO.
10638 Returns a pointer to the byte following <<FOO.
10640 This function scans a heredoc, which involves different methods
10641 depending on whether we are in a string eval, quoted construct, etc.
10642 This is because PL_linestr could containing a single line of input, or
10643 a whole string being evalled, or the contents of the current quote-
10646 The two basic methods are:
10647 - Steal lines from the input stream
10648 - Scan the heredoc in PL_linestr and remove it therefrom
10650 In a file scope or filtered eval, the first method is used; in a
10651 string eval, the second.
10653 In a quote-like operator, we have to choose between the two,
10654 depending on where we can find a newline. We peek into outer lex-
10655 ing scopes until we find one with a newline in it. If we reach the
10656 outermost lexing scope and it is a file, we use the stream method.
10657 Otherwise it is treated as an eval.
10661 S_scan_heredoc(pTHX_ char *s)
10663 I32 op_type = OP_SCALAR;
10671 I32 indent_len = 0;
10672 bool indented = FALSE;
10673 const bool infile = PL_rsfp || PL_parser->filtered;
10674 const line_t origline = CopLINE(PL_curcop);
10675 LEXSHARED *shared = PL_parser->lex_shared;
10677 PERL_ARGS_ASSERT_SCAN_HEREDOC;
10680 d = PL_tokenbuf + 1;
10681 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
10682 *PL_tokenbuf = '\n';
10685 if (*peek == '~') {
10690 while (SPACE_OR_TAB(*peek))
10693 if (*peek == '`' || *peek == '\'' || *peek =='"') {
10696 s = delimcpy(d, e, s, PL_bufend, term, &len);
10697 if (s == PL_bufend)
10698 Perl_croak(aTHX_ "Unterminated delimiter for here document");
10704 /* <<\FOO is equivalent to <<'FOO' */
10709 if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
10710 Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
10714 while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
10715 peek += UTF ? UTF8SKIP(peek) : 1;
10718 len = (peek - s >= e - d) ? (e - d) : (peek - s);
10719 Copy(s, d, len, char);
10724 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
10725 Perl_croak(aTHX_ "Delimiter for here document is too long");
10729 len = d - PL_tokenbuf;
10731 #ifndef PERL_STRICT_CR
10732 d = (char *) memchr(s, '\r', PL_bufend - s);
10734 char * const olds = s;
10736 while (s < PL_bufend) {
10742 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
10751 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10756 tmpstr = newSV_type(SVt_PVIV);
10757 SvGROW(tmpstr, 80);
10758 if (term == '\'') {
10759 op_type = OP_CONST;
10760 SvIV_set(tmpstr, -1);
10762 else if (term == '`') {
10763 op_type = OP_BACKTICK;
10764 SvIV_set(tmpstr, '\\');
10767 PL_multi_start = origline + 1 + PL_parser->herelines;
10768 PL_multi_open = PL_multi_close = '<';
10770 /* inside a string eval or quote-like operator */
10771 if (!infile || PL_lex_inwhat) {
10774 char * const olds = s;
10775 PERL_CONTEXT * const cx = CX_CUR();
10776 /* These two fields are not set until an inner lexing scope is
10777 entered. But we need them set here. */
10778 shared->ls_bufptr = s;
10779 shared->ls_linestr = PL_linestr;
10781 if (PL_lex_inwhat) {
10782 /* Look for a newline. If the current buffer does not have one,
10783 peek into the line buffer of the parent lexing scope, going
10784 up as many levels as necessary to find one with a newline
10787 while (!(s = (char *)memchr(
10788 (void *)shared->ls_bufptr, '\n',
10789 SvEND(shared->ls_linestr)-shared->ls_bufptr
10792 shared = shared->ls_prev;
10793 /* shared is only null if we have gone beyond the outermost
10794 lexing scope. In a file, we will have broken out of the
10795 loop in the previous iteration. In an eval, the string buf-
10796 fer ends with "\n;", so the while condition above will have
10797 evaluated to false. So shared can never be null. Or so you
10798 might think. Odd syntax errors like s;@{<<; can gobble up
10799 the implicit semicolon at the end of a flie, causing the
10800 file handle to be closed even when we are not in a string
10801 eval. So shared may be null in that case.
10802 (Closing '>>}' here to balance the earlier open brace for
10803 editors that look for matched pairs.) */
10804 if (UNLIKELY(!shared))
10806 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10807 most lexing scope. In a file, shared->ls_linestr at that
10808 level is just one line, so there is no body to steal. */
10809 if (infile && !shared->ls_prev) {
10815 else { /* eval or we've already hit EOF */
10816 s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10821 linestr = shared->ls_linestr;
10822 bufend = SvEND(linestr);
10827 while (s < bufend - len + 1) {
10829 ++PL_parser->herelines;
10831 if (memEQ(s, PL_tokenbuf + 1, len - 1)) {
10835 /* Only valid if it's preceded by whitespace only */
10836 while (backup != myolds && --backup >= myolds) {
10837 if (! SPACE_OR_TAB(*backup)) {
10843 /* No whitespace or all! */
10844 if (backup == s || *backup == '\n') {
10845 Newx(indent, indent_len + 1, char);
10846 memcpy(indent, backup + 1, indent_len);
10847 indent[indent_len] = 0;
10848 s--; /* before our delimiter */
10849 PL_parser->herelines--; /* this line doesn't count */
10856 while (s < bufend - len + 1
10857 && memNE(s,PL_tokenbuf,len) )
10860 ++PL_parser->herelines;
10864 if (s >= bufend - len + 1) {
10868 sv_setpvn(tmpstr,d+1,s-d);
10870 /* the preceding stmt passes a newline */
10871 PL_parser->herelines++;
10873 /* s now points to the newline after the heredoc terminator.
10874 d points to the newline before the body of the heredoc.
10877 /* We are going to modify linestr in place here, so set
10878 aside copies of the string if necessary for re-evals or
10880 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10881 check shared->re_eval_str. */
10882 if (shared->re_eval_start || shared->re_eval_str) {
10883 /* Set aside the rest of the regexp */
10884 if (!shared->re_eval_str)
10885 shared->re_eval_str =
10886 newSVpvn(shared->re_eval_start,
10887 bufend - shared->re_eval_start);
10888 shared->re_eval_start -= s-d;
10891 if (cxstack_ix >= 0
10892 && CxTYPE(cx) == CXt_EVAL
10893 && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
10894 && cx->blk_eval.cur_text == linestr)
10896 cx->blk_eval.cur_text = newSVsv(linestr);
10897 cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
10900 /* Copy everything from s onwards back to d. */
10901 Move(s,d,bufend-s + 1,char);
10902 SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10903 /* Setting PL_bufend only applies when we have not dug deeper
10904 into other scopes, because sublex_done sets PL_bufend to
10905 SvEND(PL_linestr). */
10906 if (shared == PL_parser->lex_shared)
10907 PL_bufend = SvEND(linestr);
10912 char *oldbufptr_save;
10913 char *oldoldbufptr_save;
10915 SvPVCLEAR(tmpstr); /* avoid "uninitialized" warning */
10916 term = PL_tokenbuf[1];
10918 linestr_save = PL_linestr; /* must restore this afterwards */
10919 d = s; /* and this */
10920 oldbufptr_save = PL_oldbufptr;
10921 oldoldbufptr_save = PL_oldoldbufptr;
10922 PL_linestr = newSVpvs("");
10923 PL_bufend = SvPVX(PL_linestr);
10926 PL_bufptr = PL_bufend;
10927 CopLINE_set(PL_curcop,
10928 origline + 1 + PL_parser->herelines);
10930 if ( !lex_next_chunk(LEX_NO_TERM)
10931 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n'))
10933 /* Simply freeing linestr_save might seem simpler here, as it
10934 does not matter what PL_linestr points to, since we are
10935 about to croak; but in a quote-like op, linestr_save
10936 will have been prospectively freed already, via
10937 SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
10938 restore PL_linestr. */
10939 SvREFCNT_dec_NN(PL_linestr);
10940 PL_linestr = linestr_save;
10941 PL_oldbufptr = oldbufptr_save;
10942 PL_oldoldbufptr = oldoldbufptr_save;
10946 CopLINE_set(PL_curcop, origline);
10948 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10949 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10950 /* ^That should be enough to avoid this needing to grow: */
10951 sv_catpvs(PL_linestr, "\n\0");
10952 assert(s == SvPVX(PL_linestr));
10953 PL_bufend = SvEND(PL_linestr);
10957 PL_parser->herelines++;
10958 PL_last_lop = PL_last_uni = NULL;
10960 #ifndef PERL_STRICT_CR
10961 if (PL_bufend - PL_linestart >= 2) {
10962 if ( (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
10963 || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10965 PL_bufend[-2] = '\n';
10967 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10969 else if (PL_bufend[-1] == '\r')
10970 PL_bufend[-1] = '\n';
10972 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10973 PL_bufend[-1] = '\n';
10976 if (indented && (PL_bufend-s) >= len) {
10977 char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
10980 char *backup = found;
10983 /* Only valid if it's preceded by whitespace only */
10984 while (backup != s && --backup >= s) {
10985 if (! SPACE_OR_TAB(*backup)) {
10991 /* All whitespace or none! */
10992 if (backup == found || SPACE_OR_TAB(*backup)) {
10993 Newx(indent, indent_len + 1, char);
10994 memcpy(indent, backup, indent_len);
10995 indent[indent_len] = 0;
10996 SvREFCNT_dec(PL_linestr);
10997 PL_linestr = linestr_save;
10998 PL_linestart = SvPVX(linestr_save);
10999 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11000 PL_oldbufptr = oldbufptr_save;
11001 PL_oldoldbufptr = oldoldbufptr_save;
11007 /* Didn't find it */
11008 sv_catsv(tmpstr,PL_linestr);
11011 if (*s == term && PL_bufend-s >= len
11012 && memEQ(s,PL_tokenbuf + 1,len))
11014 SvREFCNT_dec(PL_linestr);
11015 PL_linestr = linestr_save;
11016 PL_linestart = SvPVX(linestr_save);
11017 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11018 PL_oldbufptr = oldbufptr_save;
11019 PL_oldoldbufptr = oldoldbufptr_save;
11024 sv_catsv(tmpstr,PL_linestr);
11030 PL_multi_end = origline + PL_parser->herelines;
11032 if (indented && indent) {
11033 STRLEN linecount = 1;
11034 STRLEN herelen = SvCUR(tmpstr);
11035 char *ss = SvPVX(tmpstr);
11036 char *se = ss + herelen;
11037 SV *newstr = newSV(herelen+1);
11040 /* Trim leading whitespace */
11042 /* newline only? Copy and move on */
11044 sv_catpvs(newstr,"\n");
11048 /* Found our indentation? Strip it */
11050 else if (se - ss >= indent_len
11051 && memEQ(ss, indent, indent_len))
11056 while ((ss + le) < se && *(ss + le) != '\n')
11059 sv_catpvn(newstr, ss, le);
11062 /* Line doesn't begin with our indentation? Croak */
11067 "Indentation on line %d of here-doc doesn't match delimiter",
11073 /* avoid sv_setsv() as we dont wan't to COW here */
11074 sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
11076 SvREFCNT_dec_NN(newstr);
11079 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
11080 SvPV_shrink_to_cur(tmpstr);
11084 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
11088 PL_lex_stuff = tmpstr;
11089 pl_yylval.ival = op_type;
11095 SvREFCNT_dec(tmpstr);
11096 CopLINE_set(PL_curcop, origline);
11097 missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1);
11101 /* scan_inputsymbol
11102 takes: position of first '<' in input buffer
11103 returns: position of first char following the matching '>' in
11105 side-effects: pl_yylval and lex_op are set.
11110 <<>> read from ARGV without magic open
11111 <FH> read from filehandle
11112 <pkg::FH> read from package qualified filehandle
11113 <pkg'FH> read from package qualified filehandle
11114 <$fh> read from filehandle in $fh
11115 <*.h> filename glob
11120 S_scan_inputsymbol(pTHX_ char *start)
11122 char *s = start; /* current position in buffer */
11125 bool nomagicopen = FALSE;
11126 char *d = PL_tokenbuf; /* start of temp holding space */
11127 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
11129 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
11131 end = (char *) memchr(s, '\n', PL_bufend - s);
11134 if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
11135 nomagicopen = TRUE;
11141 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
11143 /* die if we didn't have space for the contents of the <>,
11144 or if it didn't end, or if we see a newline
11147 if (len >= (I32)sizeof PL_tokenbuf)
11148 Perl_croak(aTHX_ "Excessively long <> operator");
11150 Perl_croak(aTHX_ "Unterminated <> operator");
11155 Remember, only scalar variables are interpreted as filehandles by
11156 this code. Anything more complex (e.g., <$fh{$num}>) will be
11157 treated as a glob() call.
11158 This code makes use of the fact that except for the $ at the front,
11159 a scalar variable and a filehandle look the same.
11161 if (*d == '$' && d[1]) d++;
11163 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
11164 while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') {
11165 d += UTF ? UTF8SKIP(d) : 1;
11168 /* If we've tried to read what we allow filehandles to look like, and
11169 there's still text left, then it must be a glob() and not a getline.
11170 Use scan_str to pull out the stuff between the <> and treat it
11171 as nothing more than a string.
11174 if (d - PL_tokenbuf != len) {
11175 pl_yylval.ival = OP_GLOB;
11176 s = scan_str(start,FALSE,FALSE,FALSE,NULL);
11178 Perl_croak(aTHX_ "Glob not terminated");
11182 bool readline_overriden = FALSE;
11184 /* we're in a filehandle read situation */
11187 /* turn <> into <ARGV> */
11189 Copy("ARGV",d,5,char);
11191 /* Check whether readline() is overriden */
11192 if ((gv_readline = gv_override("readline",8)))
11193 readline_overriden = TRUE;
11195 /* if <$fh>, create the ops to turn the variable into a
11199 /* try to find it in the pad for this block, otherwise find
11200 add symbol table ops
11202 const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
11203 if (tmp != NOT_IN_PAD) {
11204 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11205 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11206 HEK * const stashname = HvNAME_HEK(stash);
11207 SV * const sym = sv_2mortal(newSVhek(stashname));
11208 sv_catpvs(sym, "::");
11209 sv_catpv(sym, d+1);
11214 OP * const o = newOP(OP_PADSV, 0);
11216 PL_lex_op = readline_overriden
11217 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11218 op_append_elem(OP_LIST, o,
11219 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11220 : newUNOP(OP_READLINE, 0, o);
11228 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
11230 PL_lex_op = readline_overriden
11231 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11232 op_append_elem(OP_LIST,
11233 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11234 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11235 : newUNOP(OP_READLINE, 0,
11236 newUNOP(OP_RV2SV, 0,
11237 newGVOP(OP_GV, 0, gv)));
11239 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
11240 pl_yylval.ival = OP_NULL;
11243 /* If it's none of the above, it must be a literal filehandle
11244 (<Foo::BAR> or <FOO>) so build a simple readline OP */
11246 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
11247 PL_lex_op = readline_overriden
11248 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11249 op_append_elem(OP_LIST,
11250 newGVOP(OP_GV, 0, gv),
11251 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11252 : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
11253 pl_yylval.ival = OP_NULL;
11255 /* leave the token generation above to avoid confusing the parser */
11256 if (!FEATURE_BAREWORD_FILEHANDLES_IS_ENABLED) {
11257 no_bareword_filehandle(d);
11268 start position in buffer
11269 keep_bracketed_quoted preserve \ quoting of embedded delimiters, but
11270 only if they are of the open/close form
11271 keep_delims preserve the delimiters around the string
11272 re_reparse compiling a run-time /(?{})/:
11273 collapse // to /, and skip encoding src
11274 delimp if non-null, this is set to the position of
11275 the closing delimiter, or just after it if
11276 the closing and opening delimiters differ
11277 (i.e., the opening delimiter of a substitu-
11279 returns: position to continue reading from buffer
11280 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11281 updates the read buffer.
11283 This subroutine pulls a string out of the input. It is called for:
11284 q single quotes q(literal text)
11285 ' single quotes 'literal text'
11286 qq double quotes qq(interpolate $here please)
11287 " double quotes "interpolate $here please"
11288 qx backticks qx(/bin/ls -l)
11289 ` backticks `/bin/ls -l`
11290 qw quote words @EXPORT_OK = qw( func() $spam )
11291 m// regexp match m/this/
11292 s/// regexp substitute s/this/that/
11293 tr/// string transliterate tr/this/that/
11294 y/// string transliterate y/this/that/
11295 ($*@) sub prototypes sub foo ($)
11296 (stuff) sub attr parameters sub foo : attr(stuff)
11297 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
11299 In most of these cases (all but <>, patterns and transliterate)
11300 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
11301 calls scan_str(). s/// makes yylex() call scan_subst() which calls
11302 scan_str(). tr/// and y/// make yylex() call scan_trans() which
11305 It skips whitespace before the string starts, and treats the first
11306 character as the delimiter. If the delimiter is one of ([{< then
11307 the corresponding "close" character )]}> is used as the closing
11308 delimiter. It allows quoting of delimiters, and if the string has
11309 balanced delimiters ([{<>}]) it allows nesting.
11311 On success, the SV with the resulting string is put into lex_stuff or,
11312 if that is already non-NULL, into lex_repl. The second case occurs only
11313 when parsing the RHS of the special constructs s/// and tr/// (y///).
11314 For convenience, the terminating delimiter character is stuffed into
11319 Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
11323 SV *sv; /* scalar value: string */
11324 const char *tmps; /* temp string, used for delimiter matching */
11325 char *s = start; /* current position in the buffer */
11326 char term; /* terminating character */
11327 char *to; /* current position in the sv's data */
11328 int brackets = 1; /* bracket nesting level */
11329 bool d_is_utf8 = FALSE; /* is there any utf8 content? */
11330 IV termcode; /* terminating char. code */
11331 U8 termstr[UTF8_MAXBYTES+1]; /* terminating string */
11332 STRLEN termlen; /* length of terminating string */
11335 /* The delimiters that have a mirror-image closing one */
11336 const char * opening_delims = "([{<";
11337 const char * closing_delims = ")]}>";
11339 /* The only non-UTF character that isn't a stand alone grapheme is
11340 * white-space, hence can't be a delimiter. */
11341 const char * non_grapheme_msg = "Use of unassigned code point or"
11342 " non-standalone grapheme for a delimiter"
11344 PERL_ARGS_ASSERT_SCAN_STR;
11346 /* skip space before the delimiter */
11351 /* mark where we are, in case we need to report errors */
11354 /* after skipping whitespace, the next character is the terminator */
11356 if (!UTF || UTF8_IS_INVARIANT(term)) {
11357 termcode = termstr[0] = term;
11361 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
11362 if (UTF && UNLIKELY(! is_grapheme((U8 *) start,
11367 yyerror(non_grapheme_msg);
11370 Copy(s, termstr, termlen, U8);
11373 /* mark where we are */
11374 PL_multi_start = CopLINE(PL_curcop);
11375 PL_multi_open = termcode;
11376 herelines = PL_parser->herelines;
11378 /* If the delimiter has a mirror-image closing one, get it */
11379 if (term && (tmps = strchr(opening_delims, term))) {
11380 termcode = termstr[0] = term = closing_delims[tmps - opening_delims];
11383 PL_multi_close = termcode;
11385 if (PL_multi_open == PL_multi_close) {
11386 keep_bracketed_quoted = FALSE;
11389 /* create a new SV to hold the contents. 79 is the SV's initial length.
11390 What a random number. */
11391 sv = newSV_type(SVt_PVIV);
11393 SvIV_set(sv, termcode);
11394 (void)SvPOK_only(sv); /* validate pointer */
11396 /* move past delimiter and try to read a complete string */
11398 sv_catpvn(sv, s, termlen);
11401 /* extend sv if need be */
11402 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11403 /* set 'to' to the next character in the sv's string */
11404 to = SvPVX(sv)+SvCUR(sv);
11406 /* if open delimiter is the close delimiter read unbridle */
11407 if (PL_multi_open == PL_multi_close) {
11408 for (; s < PL_bufend; s++,to++) {
11409 /* embedded newlines increment the current line number */
11410 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
11411 COPLINE_INC_WITH_HERELINES;
11412 /* handle quoted delimiters */
11413 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
11414 if (!keep_bracketed_quoted
11416 || (re_reparse && s[1] == '\\'))
11419 else /* any other quotes are simply copied straight through */
11422 /* terminate when run out of buffer (the for() condition), or
11423 have found the terminator */
11424 else if (*s == term) { /* First byte of terminator matches */
11425 if (termlen == 1) /* If is the only byte, are done */
11428 /* If the remainder of the terminator matches, also are
11429 * done, after checking that is a separate grapheme */
11430 if ( s + termlen <= PL_bufend
11431 && memEQ(s + 1, (char*)termstr + 1, termlen - 1))
11434 && UNLIKELY(! is_grapheme((U8 *) start,
11439 yyerror(non_grapheme_msg);
11444 else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) {
11452 /* if the terminator isn't the same as the start character (e.g.,
11453 matched brackets), we have to allow more in the quoting, and
11454 be prepared for nested brackets.
11457 /* read until we run out of string, or we find the terminator */
11458 for (; s < PL_bufend; s++,to++) {
11459 /* embedded newlines increment the line count */
11460 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
11461 COPLINE_INC_WITH_HERELINES;
11462 /* backslashes can escape the open or closing characters */
11463 if (*s == '\\' && s+1 < PL_bufend) {
11464 if (!keep_bracketed_quoted
11465 && ( ((UV)s[1] == PL_multi_open)
11466 || ((UV)s[1] == PL_multi_close) ))
11473 /* allow nested opens and closes */
11474 else if (*(U8 *) s == PL_multi_close && --brackets <= 0)
11476 else if (*(U8 *) s == PL_multi_open)
11478 else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11483 /* terminate the copied string and update the sv's end-of-string */
11485 SvCUR_set(sv, to - SvPVX_const(sv));
11488 * this next chunk reads more into the buffer if we're not done yet
11492 break; /* handle case where we are done yet :-) */
11494 #ifndef PERL_STRICT_CR
11495 if (to - SvPVX_const(sv) >= 2) {
11496 if ( (to[-2] == '\r' && to[-1] == '\n')
11497 || (to[-2] == '\n' && to[-1] == '\r'))
11501 SvCUR_set(sv, to - SvPVX_const(sv));
11503 else if (to[-1] == '\r')
11506 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11510 /* if we're out of file, or a read fails, bail and reset the current
11511 line marker so we can report where the unterminated string began
11513 COPLINE_INC_WITH_HERELINES;
11514 PL_bufptr = PL_bufend;
11515 if (!lex_next_chunk(0)) {
11517 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11520 s = start = PL_bufptr;
11523 /* at this point, we have successfully read the delimited string */
11526 sv_catpvn(sv, s, termlen);
11532 PL_multi_end = CopLINE(PL_curcop);
11533 CopLINE_set(PL_curcop, PL_multi_start);
11534 PL_parser->herelines = herelines;
11536 /* if we allocated too much space, give some back */
11537 if (SvCUR(sv) + 5 < SvLEN(sv)) {
11538 SvLEN_set(sv, SvCUR(sv) + 1);
11539 SvPV_shrink_to_cur(sv);
11542 /* decide whether this is the first or second quoted string we've read
11547 PL_parser->lex_sub_repl = sv;
11550 if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
11556 takes: pointer to position in buffer
11557 returns: pointer to new position in buffer
11558 side-effects: builds ops for the constant in pl_yylval.op
11560 Read a number in any of the formats that Perl accepts:
11562 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
11563 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
11564 0b[01](_?[01])* binary integers
11565 0o?[0-7](_?[0-7])* octal integers
11566 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* hexadecimal integers
11567 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+ hexadecimal floats
11569 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
11572 If it reads a number without a decimal point or an exponent, it will
11573 try converting the number to an integer and see if it can do so
11574 without loss of precision.
11578 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
11580 const char *s = start; /* current position in buffer */
11581 char *d; /* destination in temp buffer */
11582 char *e; /* end of temp buffer */
11583 NV nv; /* number read, as a double */
11584 SV *sv = NULL; /* place to put the converted number */
11585 bool floatit; /* boolean: int or float? */
11586 const char *lastub = NULL; /* position of last underbar */
11587 static const char* const number_too_long = "Number too long";
11588 bool warned_about_underscore = 0;
11589 I32 shift; /* shift per digit for hex/oct/bin, hoisted here for fp */
11590 #define WARN_ABOUT_UNDERSCORE() \
11592 if (!warned_about_underscore) { \
11593 warned_about_underscore = 1; \
11594 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \
11595 "Misplaced _ in number"); \
11598 /* Hexadecimal floating point.
11600 * In many places (where we have quads and NV is IEEE 754 double)
11601 * we can fit the mantissa bits of a NV into an unsigned quad.
11602 * (Note that UVs might not be quads even when we have quads.)
11603 * This will not work everywhere, though (either no quads, or
11604 * using long doubles), in which case we have to resort to NV,
11605 * which will probably mean horrible loss of precision due to
11606 * multiple fp operations. */
11607 bool hexfp = FALSE;
11608 int total_bits = 0;
11609 int significant_bits = 0;
11610 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
11611 # define HEXFP_UQUAD
11612 Uquad_t hexfp_uquad = 0;
11613 int hexfp_frac_bits = 0;
11618 NV hexfp_mult = 1.0;
11619 UV high_non_zero = 0; /* highest digit */
11620 int non_zero_integer_digits = 0;
11621 bool new_octal = FALSE; /* octal with "0o" prefix */
11623 PERL_ARGS_ASSERT_SCAN_NUM;
11625 /* We use the first character to decide what type of number this is */
11629 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
11631 /* if it starts with a 0, it could be an octal number, a decimal in
11632 0.13 disguise, or a hexadecimal number, or a binary number. */
11636 u holds the "number so far"
11637 overflowed was the number more than we can hold?
11639 Shift is used when we add a digit. It also serves as an "are
11640 we in octal/hex/binary?" indicator to disallow hex characters
11641 when in octal mode.
11645 bool overflowed = FALSE;
11646 bool just_zero = TRUE; /* just plain 0 or binary number? */
11647 bool has_digs = FALSE;
11648 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11649 static const char* const bases[5] =
11650 { "", "binary", "", "octal", "hexadecimal" };
11651 static const char* const Bases[5] =
11652 { "", "Binary", "", "Octal", "Hexadecimal" };
11653 static const char* const maxima[5] =
11655 "0b11111111111111111111111111111111",
11660 /* check for hex */
11661 if (isALPHA_FOLD_EQ(s[1], 'x')) {
11665 } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
11670 /* check for a decimal in disguise */
11671 else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
11673 /* so it must be octal */
11677 if (isALPHA_FOLD_EQ(*s, 'o')) {
11685 WARN_ABOUT_UNDERSCORE();
11689 /* read the rest of the number */
11691 /* x is used in the overflow test,
11692 b is the digit we're adding on. */
11697 /* if we don't mention it, we're done */
11701 /* _ are ignored -- but warned about if consecutive */
11703 if (lastub && s == lastub + 1)
11704 WARN_ABOUT_UNDERSCORE();
11708 /* 8 and 9 are not octal */
11709 case '8': case '9':
11711 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11715 case '2': case '3': case '4':
11716 case '5': case '6': case '7':
11718 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11721 case '0': case '1':
11722 b = *s++ & 15; /* ASCII digit -> value of digit */
11726 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11727 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11728 /* make sure they said 0x */
11731 b = (*s++ & 7) + 9;
11733 /* Prepare to put the digit we have onto the end
11734 of the number so far. We check for overflows.
11741 assert(shift >= 0);
11742 x = u << shift; /* make room for the digit */
11744 total_bits += shift;
11746 if ((x >> shift) != u
11747 && !(PL_hints & HINT_NEW_BINARY)) {
11750 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11751 "Integer overflow in %s number",
11754 u = x | b; /* add the digit to the end */
11757 n *= nvshift[shift];
11758 /* If an NV has not enough bits in its
11759 * mantissa to represent an UV this summing of
11760 * small low-order numbers is a waste of time
11761 * (because the NV cannot preserve the
11762 * low-order bits anyway): we could just
11763 * remember when did we overflow and in the
11764 * end just multiply n by the right
11769 if (high_non_zero == 0 && b > 0)
11773 non_zero_integer_digits++;
11775 /* this could be hexfp, but peek ahead
11776 * to avoid matching ".." */
11777 if (UNLIKELY(HEXFP_PEEK(s))) {
11785 /* if we get here, we had success: make a scalar value from
11790 /* final misplaced underbar check */
11792 WARN_ABOUT_UNDERSCORE();
11794 if (UNLIKELY(HEXFP_PEEK(s))) {
11795 /* Do sloppy (on the underbars) but quick detection
11796 * (and value construction) for hexfp, the decimal
11797 * detection will shortly be more thorough with the
11798 * underbar checks. */
11800 significant_bits = non_zero_integer_digits * shift;
11803 #else /* HEXFP_NV */
11806 /* Ignore the leading zero bits of
11807 * the high (first) non-zero digit. */
11808 if (high_non_zero) {
11809 if (high_non_zero < 0x8)
11810 significant_bits--;
11811 if (high_non_zero < 0x4)
11812 significant_bits--;
11813 if (high_non_zero < 0x2)
11814 significant_bits--;
11821 bool accumulate = TRUE;
11823 int lim = 1 << shift;
11824 for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) ||
11826 if (isXDIGIT(*h)) {
11827 significant_bits += shift;
11830 if (significant_bits < NV_MANT_DIG) {
11831 /* We are in the long "run" of xdigits,
11832 * accumulate the full four bits. */
11833 assert(shift >= 0);
11834 hexfp_uquad <<= shift;
11836 hexfp_frac_bits += shift;
11837 } else if (significant_bits - shift < NV_MANT_DIG) {
11838 /* We are at a hexdigit either at,
11839 * or straddling, the edge of mantissa.
11840 * We will try grabbing as many as
11841 * possible bits. */
11843 significant_bits - NV_MANT_DIG;
11847 hexfp_uquad <<= tail;
11848 assert((shift - tail) >= 0);
11849 hexfp_uquad |= b >> (shift - tail);
11850 hexfp_frac_bits += tail;
11852 /* Ignore the trailing zero bits
11853 * of the last non-zero xdigit.
11855 * The assumption here is that if
11856 * one has input of e.g. the xdigit
11857 * eight (0x8), there is only one
11858 * bit being input, not the full
11859 * four bits. Conversely, if one
11860 * specifies a zero xdigit, the
11861 * assumption is that one really
11862 * wants all those bits to be zero. */
11864 if ((b & 0x1) == 0x0) {
11865 significant_bits--;
11866 if ((b & 0x2) == 0x0) {
11867 significant_bits--;
11868 if ((b & 0x4) == 0x0) {
11869 significant_bits--;
11875 accumulate = FALSE;
11878 /* Keep skipping the xdigits, and
11879 * accumulating the significant bits,
11880 * but do not shift the uquad
11881 * (which would catastrophically drop
11882 * high-order bits) or accumulate the
11883 * xdigits anymore. */
11885 #else /* HEXFP_NV */
11887 nv_mult /= nvshift[shift];
11889 hexfp_nv += b * nv_mult;
11891 accumulate = FALSE;
11895 if (significant_bits >= NV_MANT_DIG)
11896 accumulate = FALSE;
11900 if ((total_bits > 0 || significant_bits > 0) &&
11901 isALPHA_FOLD_EQ(*h, 'p')) {
11902 bool negexp = FALSE;
11906 else if (*h == '-') {
11912 while (isDIGIT(*h) || *h == '_') {
11915 hexfp_exp += *h - '0';
11918 && -hexfp_exp < NV_MIN_EXP - 1) {
11919 /* NOTE: this means that the exponent
11920 * underflow warning happens for
11921 * the IEEE 754 subnormals (denormals),
11922 * because DBL_MIN_EXP etc are the lowest
11923 * possible binary (or, rather, DBL_RADIX-base)
11924 * exponent for normals, not subnormals.
11926 * This may or may not be a good thing. */
11927 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11928 "Hexadecimal float: exponent underflow");
11934 && hexfp_exp > NV_MAX_EXP - 1) {
11935 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11936 "Hexadecimal float: exponent overflow");
11944 hexfp_exp = -hexfp_exp;
11946 hexfp_exp -= hexfp_frac_bits;
11948 hexfp_mult = Perl_pow(2.0, hexfp_exp);
11955 if (!just_zero && !has_digs) {
11956 /* 0x, 0o or 0b with no digits, treat it as an error.
11957 Originally this backed up the parse before the b or
11958 x, but that has the potential for silent changes in
11959 behaviour, like for: "0x.3" and "0x+$foo".
11962 char *oldbp = PL_bufptr;
11963 if (*d) ++d; /* so the user sees the bad non-digit */
11964 PL_bufptr = (char *)d; /* so yyerror reports the context */
11965 yyerror(Perl_form(aTHX_ "No digits found for %s literal",
11971 if (n > 4294967295.0)
11972 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11973 "%s number > %s non-portable",
11975 new_octal ? "0o37777777777" : maxima[shift]);
11980 if (u > 0xffffffff)
11981 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11982 "%s number > %s non-portable",
11984 new_octal ? "0o37777777777" : maxima[shift]);
11988 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
11989 sv = new_constant(start, s - start, "integer",
11990 sv, NULL, NULL, 0, NULL);
11991 else if (PL_hints & HINT_NEW_BINARY)
11992 sv = new_constant(start, s - start, "binary",
11993 sv, NULL, NULL, 0, NULL);
11998 handle decimal numbers.
11999 we're also sent here when we read a 0 as the first digit
12001 case '1': case '2': case '3': case '4': case '5':
12002 case '6': case '7': case '8': case '9': case '.':
12005 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12028 NOT_REACHED; /* NOTREACHED */
12032 /* read next group of digits and _ and copy into d */
12035 || UNLIKELY(hexfp && isXDIGIT(*s)))
12037 /* skip underscores, checking for misplaced ones
12041 if (lastub && s == lastub + 1)
12042 WARN_ABOUT_UNDERSCORE();
12046 /* check for end of fixed-length buffer */
12048 Perl_croak(aTHX_ "%s", number_too_long);
12049 /* if we're ok, copy the character */
12054 /* final misplaced underbar check */
12055 if (lastub && s == lastub + 1)
12056 WARN_ABOUT_UNDERSCORE();
12058 /* read a decimal portion if there is one. avoid
12059 3..5 being interpreted as the number 3. followed
12062 if (*s == '.' && s[1] != '.') {
12067 WARN_ABOUT_UNDERSCORE();
12071 /* copy, ignoring underbars, until we run out of digits.
12075 || UNLIKELY(hexfp && isXDIGIT(*s));
12078 /* fixed length buffer check */
12080 Perl_croak(aTHX_ "%s", number_too_long);
12082 if (lastub && s == lastub + 1)
12083 WARN_ABOUT_UNDERSCORE();
12089 /* fractional part ending in underbar? */
12091 WARN_ABOUT_UNDERSCORE();
12092 if (*s == '.' && isDIGIT(s[1])) {
12093 /* oops, it's really a v-string, but without the "v" */
12099 /* read exponent part, if present */
12100 if ((isALPHA_FOLD_EQ(*s, 'e')
12101 || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
12102 && memCHRs("+-0123456789_", s[1]))
12104 int exp_digits = 0;
12105 const char *save_s = s;
12108 /* regardless of whether user said 3E5 or 3e5, use lower 'e',
12109 ditto for p (hexfloats) */
12110 if ((isALPHA_FOLD_EQ(*s, 'e'))) {
12111 /* At least some Mach atof()s don't grok 'E' */
12114 else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
12121 /* stray preinitial _ */
12123 WARN_ABOUT_UNDERSCORE();
12127 /* allow positive or negative exponent */
12128 if (*s == '+' || *s == '-')
12131 /* stray initial _ */
12133 WARN_ABOUT_UNDERSCORE();
12137 /* read digits of exponent */
12138 while (isDIGIT(*s) || *s == '_') {
12142 Perl_croak(aTHX_ "%s", number_too_long);
12146 if (((lastub && s == lastub + 1)
12147 || (!isDIGIT(s[1]) && s[1] != '_')))
12148 WARN_ABOUT_UNDERSCORE();
12154 /* no exponent digits, the [eEpP] could be for something else,
12155 * though in practice we don't get here for p since that's preparsed
12156 * earlier, and results in only the 0xX being consumed, so behave similarly
12157 * for decimal floats and consume only the D.DD, leaving the [eE] to the
12170 We try to do an integer conversion first if no characters
12171 indicating "float" have been found.
12176 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12178 if (flags == IS_NUMBER_IN_UV) {
12180 sv = newSViv(uv); /* Prefer IVs over UVs. */
12183 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12184 if (uv <= (UV) IV_MIN)
12185 sv = newSViv(-(IV)uv);
12192 /* terminate the string */
12194 if (UNLIKELY(hexfp)) {
12195 # ifdef NV_MANT_DIG
12196 if (significant_bits > NV_MANT_DIG)
12197 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12198 "Hexadecimal float: mantissa overflow");
12201 nv = hexfp_uquad * hexfp_mult;
12202 #else /* HEXFP_NV */
12203 nv = hexfp_nv * hexfp_mult;
12206 nv = Atof(PL_tokenbuf);
12212 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12213 const char *const key = floatit ? "float" : "integer";
12214 const STRLEN keylen = floatit ? 5 : 7;
12215 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12216 key, keylen, sv, NULL, NULL, 0, NULL);
12220 /* if it starts with a v, it could be a v-string */
12223 sv = newSV(5); /* preallocate storage space */
12224 ENTER_with_name("scan_vstring");
12226 s = scan_vstring(s, PL_bufend, sv);
12227 SvREFCNT_inc_simple_void_NN(sv);
12228 LEAVE_with_name("scan_vstring");
12232 /* make the op for the constant and return */
12235 lvalp->opval = newSVOP(OP_CONST, 0, sv);
12237 lvalp->opval = NULL;
12243 S_scan_formline(pTHX_ char *s)
12245 SV * const stuff = newSVpvs("");
12246 bool needargs = FALSE;
12247 bool eofmt = FALSE;
12249 PERL_ARGS_ASSERT_SCAN_FORMLINE;
12251 while (!needargs) {
12255 #ifdef PERL_STRICT_CR
12256 while (SPACE_OR_TAB(*t))
12259 while (SPACE_OR_TAB(*t) || *t == '\r')
12262 if (*t == '\n' || t == PL_bufend) {
12267 eol = (char *) memchr(s,'\n',PL_bufend-s);
12272 for (t = s; t < eol; t++) {
12273 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12275 goto enough; /* ~~ must be first line in formline */
12277 if (*t == '@' || *t == '^')
12281 sv_catpvn(stuff, s, eol-s);
12282 #ifndef PERL_STRICT_CR
12283 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12284 char *end = SvPVX(stuff) + SvCUR(stuff);
12287 SvCUR_set(stuff, SvCUR(stuff) - 1);
12295 if ((PL_rsfp || PL_parser->filtered)
12296 && PL_parser->form_lex_state == LEX_NORMAL) {
12298 PL_bufptr = PL_bufend;
12299 COPLINE_INC_WITH_HERELINES;
12300 got_some = lex_next_chunk(0);
12301 CopLINE_dec(PL_curcop);
12306 incline(s, PL_bufend);
12309 if (!SvCUR(stuff) || needargs)
12310 PL_lex_state = PL_parser->form_lex_state;
12311 if (SvCUR(stuff)) {
12312 PL_expect = XSTATE;
12314 const char *s2 = s;
12315 while (isSPACE(*s2) && *s2 != '\n')
12318 PL_expect = XTERMBLOCK;
12319 NEXTVAL_NEXTTOKE.ival = 0;
12322 NEXTVAL_NEXTTOKE.ival = 0;
12323 force_next(FORMLBRACK);
12326 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12329 NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff);
12333 SvREFCNT_dec(stuff);
12335 PL_lex_formbrack = 0;
12341 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12343 const I32 oldsavestack_ix = PL_savestack_ix;
12344 CV* const outsidecv = PL_compcv;
12346 SAVEI32(PL_subline);
12347 save_item(PL_subname);
12348 SAVESPTR(PL_compcv);
12350 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
12351 CvFLAGS(PL_compcv) |= flags;
12353 PL_subline = CopLINE(PL_curcop);
12354 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12355 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
12356 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12357 if (outsidecv && CvPADLIST(outsidecv))
12358 CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
12360 return oldsavestack_ix;
12364 /* Do extra initialisation of a CV (typically one just created by
12365 * start_subparse()) if that CV is for a named sub
12369 Perl_init_named_cv(pTHX_ CV *cv, OP *nameop)
12371 PERL_ARGS_ASSERT_INIT_NAMED_CV;
12373 if (nameop->op_type == OP_CONST) {
12374 const char *const name = SvPV_nolen_const(((SVOP*)nameop)->op_sv);
12375 if ( strEQ(name, "BEGIN")
12376 || strEQ(name, "END")
12377 || strEQ(name, "INIT")
12378 || strEQ(name, "CHECK")
12379 || strEQ(name, "UNITCHECK")
12384 /* State subs inside anonymous subs need to be
12385 clonable themselves. */
12386 if ( CvANON(CvOUTSIDE(cv))
12387 || CvCLONE(CvOUTSIDE(cv))
12388 || !PadnameIsSTATE(PadlistNAMESARRAY(CvPADLIST(
12390 ))[nameop->op_targ])
12397 S_yywarn(pTHX_ const char *const s, U32 flags)
12399 PERL_ARGS_ASSERT_YYWARN;
12401 PL_in_eval |= EVAL_WARNONLY;
12402 yyerror_pv(s, flags);
12407 Perl_abort_execution(pTHX_ const char * const msg, const char * const name)
12409 PERL_ARGS_ASSERT_ABORT_EXECUTION;
12412 Perl_croak(aTHX_ "%s%s had compilation errors.\n", msg, name);
12415 "%sExecution of %s aborted due to compilation errors.\n", msg, name);
12417 NOT_REACHED; /* NOTREACHED */
12423 /* Called, after at least one error has been found, to abort the parse now,
12424 * instead of trying to forge ahead */
12426 yyerror_pvn(NULL, 0, 0);
12430 Perl_yyerror(pTHX_ const char *const s)
12432 PERL_ARGS_ASSERT_YYERROR;
12433 return yyerror_pvn(s, strlen(s), 0);
12437 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
12439 PERL_ARGS_ASSERT_YYERROR_PV;
12440 return yyerror_pvn(s, strlen(s), flags);
12444 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
12446 const char *context = NULL;
12449 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
12450 int yychar = PL_parser->yychar;
12452 /* Output error message 's' with length 'len'. 'flags' are SV flags that
12453 * apply. If the number of errors found is large enough, it abandons
12454 * parsing. If 's' is NULL, there is no message, and it abandons
12455 * processing unconditionally */
12458 if (!yychar || (yychar == PERLY_SEMICOLON && !PL_rsfp))
12459 sv_catpvs(where_sv, "at EOF");
12460 else if ( PL_oldoldbufptr
12461 && PL_bufptr > PL_oldoldbufptr
12462 && PL_bufptr - PL_oldoldbufptr < 200
12463 && PL_oldoldbufptr != PL_oldbufptr
12464 && PL_oldbufptr != PL_bufptr)
12466 while (isSPACE(*PL_oldoldbufptr))
12468 context = PL_oldoldbufptr;
12469 contlen = PL_bufptr - PL_oldoldbufptr;
12471 else if ( PL_oldbufptr
12472 && PL_bufptr > PL_oldbufptr
12473 && PL_bufptr - PL_oldbufptr < 200
12474 && PL_oldbufptr != PL_bufptr)
12476 while (isSPACE(*PL_oldbufptr))
12478 context = PL_oldbufptr;
12479 contlen = PL_bufptr - PL_oldbufptr;
12481 else if (yychar > 255)
12482 sv_catpvs(where_sv, "next token ???");
12483 else if (yychar == YYEMPTY) {
12484 if (PL_lex_state == LEX_NORMAL)
12485 sv_catpvs(where_sv, "at end of line");
12486 else if (PL_lex_inpat)
12487 sv_catpvs(where_sv, "within pattern");
12489 sv_catpvs(where_sv, "within string");
12492 sv_catpvs(where_sv, "next char ");
12494 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12495 else if (isPRINT_LC(yychar)) {
12496 const char string = yychar;
12497 sv_catpvn(where_sv, &string, 1);
12500 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12502 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
12503 Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ",
12504 OutCopFILE(PL_curcop),
12505 (IV)(PL_parser->preambling == NOLINE
12506 ? CopLINE(PL_curcop)
12507 : PL_parser->preambling));
12509 Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
12510 UTF8fARG(UTF, contlen, context));
12512 Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
12513 if ( PL_multi_start < PL_multi_end
12514 && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1)
12516 Perl_sv_catpvf(aTHX_ msg,
12517 " (Might be a runaway multi-line %c%c string starting on"
12518 " line %" IVdf ")\n",
12519 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
12522 if (PL_in_eval & EVAL_WARNONLY) {
12523 PL_in_eval &= ~EVAL_WARNONLY;
12524 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
12530 if (s == NULL || PL_error_count >= 10) {
12531 const char * msg = "";
12532 const char * const name = OutCopFILE(PL_curcop);
12535 SV * errsv = ERRSV;
12536 if (SvCUR(errsv)) {
12537 msg = Perl_form(aTHX_ "%" SVf, SVfARG(errsv));
12542 abort_execution(msg, name);
12545 Perl_croak(aTHX_ "%s%s has too many errors.\n", msg, name);
12549 PL_in_my_stash = NULL;
12554 S_swallow_bom(pTHX_ U8 *s)
12556 const STRLEN slen = SvCUR(PL_linestr);
12558 PERL_ARGS_ASSERT_SWALLOW_BOM;
12562 if (s[1] == 0xFE) {
12563 /* UTF-16 little-endian? (or UTF-32LE?) */
12564 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
12565 /* diag_listed_as: Unsupported script encoding %s */
12566 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
12567 #ifndef PERL_NO_UTF16_FILTER
12569 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
12572 if (PL_bufend > (char*)s) {
12573 s = add_utf16_textfilter(s, TRUE);
12576 /* diag_listed_as: Unsupported script encoding %s */
12577 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
12582 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
12583 #ifndef PERL_NO_UTF16_FILTER
12585 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12588 if (PL_bufend > (char *)s) {
12589 s = add_utf16_textfilter(s, FALSE);
12592 /* diag_listed_as: Unsupported script encoding %s */
12593 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
12597 case BOM_UTF8_FIRST_BYTE: {
12598 if (memBEGINs(s+1, slen - 1, BOM_UTF8_TAIL)) {
12600 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12602 s += sizeof(BOM_UTF8) - 1; /* UTF-8 */
12609 if (s[2] == 0xFE && s[3] == 0xFF) {
12610 /* UTF-32 big-endian */
12611 /* diag_listed_as: Unsupported script encoding %s */
12612 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
12615 else if (s[2] == 0 && s[3] != 0) {
12618 * are a good indicator of UTF-16BE. */
12619 #ifndef PERL_NO_UTF16_FILTER
12621 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
12623 s = add_utf16_textfilter(s, FALSE);
12625 /* diag_listed_as: Unsupported script encoding %s */
12626 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
12633 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12636 * are a good indicator of UTF-16LE. */
12637 #ifndef PERL_NO_UTF16_FILTER
12639 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12641 s = add_utf16_textfilter(s, TRUE);
12643 /* diag_listed_as: Unsupported script encoding %s */
12644 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
12652 #ifndef PERL_NO_UTF16_FILTER
12654 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12656 SV *const filter = FILTER_DATA(idx);
12657 /* We re-use this each time round, throwing the contents away before we
12659 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
12660 SV *const utf8_buffer = filter;
12661 IV status = IoPAGE(filter);
12662 const bool reverse = cBOOL(IoLINES(filter));
12665 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
12667 /* As we're automatically added, at the lowest level, and hence only called
12668 from this file, we can be sure that we're not called in block mode. Hence
12669 don't bother writing code to deal with block mode. */
12671 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
12674 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
12676 DEBUG_P(PerlIO_printf(Perl_debug_log,
12677 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
12678 FPTR2DPTR(void *, S_utf16_textfilter),
12679 reverse ? 'l' : 'b', idx, maxlen, status,
12680 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12687 /* First, look in our buffer of existing UTF-8 data: */
12688 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
12692 } else if (status == 0) {
12694 IoPAGE(filter) = 0;
12695 nl = SvEND(utf8_buffer);
12698 STRLEN got = nl - SvPVX(utf8_buffer);
12699 /* Did we have anything to append? */
12701 sv_catpvn(sv, SvPVX(utf8_buffer), got);
12702 /* Everything else in this code works just fine if SVp_POK isn't
12703 set. This, however, needs it, and we need it to work, else
12704 we loop infinitely because the buffer is never consumed. */
12705 sv_chop(utf8_buffer, nl);
12709 /* OK, not a complete line there, so need to read some more UTF-16.
12710 Read an extra octect if the buffer currently has an odd number. */
12714 if (SvCUR(utf16_buffer) >= 2) {
12715 /* Location of the high octet of the last complete code point.
12716 Gosh, UTF-16 is a pain. All the benefits of variable length,
12717 *coupled* with all the benefits of partial reads and
12719 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
12720 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
12722 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
12726 /* We have the first half of a surrogate. Read more. */
12727 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
12730 status = FILTER_READ(idx + 1, utf16_buffer,
12731 160 + (SvCUR(utf16_buffer) & 1));
12732 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
12733 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
12736 IoPAGE(filter) = status;
12741 /* 'chars' isn't quite the right name, as code points above 0xFFFF
12742 * require 4 bytes per char */
12743 chars = SvCUR(utf16_buffer) >> 1;
12744 have = SvCUR(utf8_buffer);
12746 /* Assume the worst case size as noted by the functions: twice the
12747 * number of input bytes */
12748 SvGROW(utf8_buffer, have + chars * 4 + 1);
12751 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
12752 (U8*)SvPVX_const(utf8_buffer) + have,
12753 chars * 2, &newlen);
12755 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
12756 (U8*)SvPVX_const(utf8_buffer) + have,
12757 chars * 2, &newlen);
12759 SvCUR_set(utf8_buffer, have + newlen);
12762 /* No need to keep this SV "well-formed" with a '\0' after the end, as
12763 it's private to us, and utf16_to_utf8{,reversed} take a
12764 (pointer,length) pair, rather than a NUL-terminated string. */
12765 if(SvCUR(utf16_buffer) & 1) {
12766 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
12767 SvCUR_set(utf16_buffer, 1);
12769 SvCUR_set(utf16_buffer, 0);
12772 DEBUG_P(PerlIO_printf(Perl_debug_log,
12773 "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
12775 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12776 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
12781 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
12783 SV *filter = filter_add(S_utf16_textfilter, NULL);
12785 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
12787 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
12789 IoLINES(filter) = reversed;
12790 IoPAGE(filter) = 1; /* Not EOF */
12792 /* Sadly, we have to return a valid pointer, come what may, so we have to
12793 ignore any error return from this. */
12794 SvCUR_set(PL_linestr, 0);
12795 if (FILTER_READ(0, PL_linestr, 0)) {
12796 SvUTF8_on(PL_linestr);
12798 SvUTF8_on(PL_linestr);
12800 PL_bufend = SvEND(PL_linestr);
12801 return (U8*)SvPVX(PL_linestr);
12806 Returns a pointer to the next character after the parsed
12807 vstring, as well as updating the passed in sv.
12809 Function must be called like
12811 sv = sv_2mortal(newSV(5));
12812 s = scan_vstring(s,e,sv);
12814 where s and e are the start and end of the string.
12815 The sv should already be large enough to store the vstring
12816 passed in, for performance reasons.
12818 This function may croak if fatal warnings are enabled in the
12819 calling scope, hence the sv_2mortal in the example (to prevent
12820 a leak). Make sure to do SvREFCNT_inc afterwards if you use
12826 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
12828 const char *pos = s;
12829 const char *start = s;
12831 PERL_ARGS_ASSERT_SCAN_VSTRING;
12833 if (*pos == 'v') pos++; /* get past 'v' */
12834 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12836 if ( *pos != '.') {
12837 /* this may not be a v-string if followed by => */
12838 const char *next = pos;
12839 while (next < e && isSPACE(*next))
12841 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
12842 /* return string not v-string */
12843 sv_setpvn(sv,(char *)s,pos-s);
12844 return (char *)pos;
12848 if (!isALPHA(*pos)) {
12849 U8 tmpbuf[UTF8_MAXBYTES+1];
12852 s++; /* get past 'v' */
12857 /* this is atoi() that tolerates underscores */
12860 const char *end = pos;
12862 while (--end >= s) {
12864 const UV orev = rev;
12865 rev += (*end - '0') * mult;
12868 /* diag_listed_as: Integer overflow in %s number */
12869 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12870 "Integer overflow in decimal number");
12874 /* Append native character for the rev point */
12875 tmpend = uvchr_to_utf8(tmpbuf, rev);
12876 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12877 if (!UVCHR_IS_INVARIANT(rev))
12879 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
12885 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12889 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12896 Perl_keyword_plugin_standard(pTHX_
12897 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
12899 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
12900 PERL_UNUSED_CONTEXT;
12901 PERL_UNUSED_ARG(keyword_ptr);
12902 PERL_UNUSED_ARG(keyword_len);
12903 PERL_UNUSED_ARG(op_ptr);
12904 return KEYWORD_PLUGIN_DECLINE;
12908 =for apidoc wrap_keyword_plugin
12910 Puts a C function into the chain of keyword plugins. This is the
12911 preferred way to manipulate the L</PL_keyword_plugin> variable.
12912 C<new_plugin> is a pointer to the C function that is to be added to the
12913 keyword plugin chain, and C<old_plugin_p> points to the storage location
12914 where a pointer to the next function in the chain will be stored. The
12915 value of C<new_plugin> is written into the L</PL_keyword_plugin> variable,
12916 while the value previously stored there is written to C<*old_plugin_p>.
12918 L</PL_keyword_plugin> is global to an entire process, and a module wishing
12919 to hook keyword parsing may find itself invoked more than once per
12920 process, typically in different threads. To handle that situation, this
12921 function is idempotent. The location C<*old_plugin_p> must initially
12922 (once per process) contain a null pointer. A C variable of static
12923 duration (declared at file scope, typically also marked C<static> to give
12924 it internal linkage) will be implicitly initialised appropriately, if it
12925 does not have an explicit initialiser. This function will only actually
12926 modify the plugin chain if it finds C<*old_plugin_p> to be null. This
12927 function is also thread safe on the small scale. It uses appropriate
12928 locking to avoid race conditions in accessing L</PL_keyword_plugin>.
12930 When this function is called, the function referenced by C<new_plugin>
12931 must be ready to be called, except for C<*old_plugin_p> being unfilled.
12932 In a threading situation, C<new_plugin> may be called immediately, even
12933 before this function has returned. C<*old_plugin_p> will always be
12934 appropriately set before C<new_plugin> is called. If C<new_plugin>
12935 decides not to do anything special with the identifier that it is given
12936 (which is the usual case for most calls to a keyword plugin), it must
12937 chain the plugin function referenced by C<*old_plugin_p>.
12939 Taken all together, XS code to install a keyword plugin should typically
12940 look something like this:
12942 static Perl_keyword_plugin_t next_keyword_plugin;
12943 static OP *my_keyword_plugin(pTHX_
12944 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
12946 if (memEQs(keyword_ptr, keyword_len,
12947 "my_new_keyword")) {
12950 return next_keyword_plugin(aTHX_
12951 keyword_ptr, keyword_len, op_ptr);
12955 wrap_keyword_plugin(my_keyword_plugin,
12956 &next_keyword_plugin);
12958 Direct access to L</PL_keyword_plugin> should be avoided.
12964 Perl_wrap_keyword_plugin(pTHX_
12965 Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p)
12968 PERL_UNUSED_CONTEXT;
12969 PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN;
12970 if (*old_plugin_p) return;
12971 KEYWORD_PLUGIN_MUTEX_LOCK;
12972 if (!*old_plugin_p) {
12973 *old_plugin_p = PL_keyword_plugin;
12974 PL_keyword_plugin = new_plugin;
12976 KEYWORD_PLUGIN_MUTEX_UNLOCK;
12979 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
12981 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
12983 SAVEI32(PL_lex_brackets);
12984 if (PL_lex_brackets > 100)
12985 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
12986 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
12987 SAVEI32(PL_lex_allbrackets);
12988 PL_lex_allbrackets = 0;
12989 SAVEI8(PL_lex_fakeeof);
12990 PL_lex_fakeeof = (U8)fakeeof;
12991 if(yyparse(gramtype) && !PL_parser->error_count)
12992 qerror(Perl_mess(aTHX_ "Parse error"));
12995 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
12997 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
13001 SAVEVPTR(PL_eval_root);
13002 PL_eval_root = NULL;
13003 parse_recdescent(gramtype, fakeeof);
13009 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
13011 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
13014 if (flags & ~PARSE_OPTIONAL)
13015 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
13016 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
13017 if (!exprop && !(flags & PARSE_OPTIONAL)) {
13018 if (!PL_parser->error_count)
13019 qerror(Perl_mess(aTHX_ "Parse error"));
13020 exprop = newOP(OP_NULL, 0);
13026 =for apidoc parse_arithexpr
13028 Parse a Perl arithmetic expression. This may contain operators of precedence
13029 down to the bit shift operators. The expression must be followed (and thus
13030 terminated) either by a comparison or lower-precedence operator or by
13031 something that would normally terminate an expression such as semicolon.
13032 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13033 otherwise it is mandatory. It is up to the caller to ensure that the
13034 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13035 the source of the code to be parsed and the lexical context for the
13038 The op tree representing the expression is returned. If an optional
13039 expression is absent, a null pointer is returned, otherwise the pointer
13042 If an error occurs in parsing or compilation, in most cases a valid op
13043 tree is returned anyway. The error is reflected in the parser state,
13044 normally resulting in a single exception at the top level of parsing
13045 which covers all the compilation errors that occurred. Some compilation
13046 errors, however, will throw an exception immediately.
13048 =for apidoc Amnh||PARSE_OPTIONAL
13055 Perl_parse_arithexpr(pTHX_ U32 flags)
13057 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
13061 =for apidoc parse_termexpr
13063 Parse a Perl term expression. This may contain operators of precedence
13064 down to the assignment operators. The expression must be followed (and thus
13065 terminated) either by a comma or lower-precedence operator or by
13066 something that would normally terminate an expression such as semicolon.
13067 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13068 otherwise it is mandatory. It is up to the caller to ensure that the
13069 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13070 the source of the code to be parsed and the lexical context for the
13073 The op tree representing the expression is returned. If an optional
13074 expression is absent, a null pointer is returned, otherwise the pointer
13077 If an error occurs in parsing or compilation, in most cases a valid op
13078 tree is returned anyway. The error is reflected in the parser state,
13079 normally resulting in a single exception at the top level of parsing
13080 which covers all the compilation errors that occurred. Some compilation
13081 errors, however, will throw an exception immediately.
13087 Perl_parse_termexpr(pTHX_ U32 flags)
13089 return parse_expr(LEX_FAKEEOF_COMMA, flags);
13093 =for apidoc parse_listexpr
13095 Parse a Perl list expression. This may contain operators of precedence
13096 down to the comma operator. The expression must be followed (and thus
13097 terminated) either by a low-precedence logic operator such as C<or> or by
13098 something that would normally terminate an expression such as semicolon.
13099 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13100 otherwise it is mandatory. It is up to the caller to ensure that the
13101 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13102 the source of the code to be parsed and the lexical context for the
13105 The op tree representing the expression is returned. If an optional
13106 expression is absent, a null pointer is returned, otherwise the pointer
13109 If an error occurs in parsing or compilation, in most cases a valid op
13110 tree is returned anyway. The error is reflected in the parser state,
13111 normally resulting in a single exception at the top level of parsing
13112 which covers all the compilation errors that occurred. Some compilation
13113 errors, however, will throw an exception immediately.
13119 Perl_parse_listexpr(pTHX_ U32 flags)
13121 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
13125 =for apidoc parse_fullexpr
13127 Parse a single complete Perl expression. This allows the full
13128 expression grammar, including the lowest-precedence operators such
13129 as C<or>. The expression must be followed (and thus terminated) by a
13130 token that an expression would normally be terminated by: end-of-file,
13131 closing bracketing punctuation, semicolon, or one of the keywords that
13132 signals a postfix expression-statement modifier. If C<flags> has the
13133 C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
13134 mandatory. It is up to the caller to ensure that the dynamic parser
13135 state (L</PL_parser> et al) is correctly set to reflect the source of
13136 the code to be parsed and the lexical context for the expression.
13138 The op tree representing the expression is returned. If an optional
13139 expression is absent, a null pointer is returned, otherwise the pointer
13142 If an error occurs in parsing or compilation, in most cases a valid op
13143 tree is returned anyway. The error is reflected in the parser state,
13144 normally resulting in a single exception at the top level of parsing
13145 which covers all the compilation errors that occurred. Some compilation
13146 errors, however, will throw an exception immediately.
13152 Perl_parse_fullexpr(pTHX_ U32 flags)
13154 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
13158 =for apidoc parse_block
13160 Parse a single complete Perl code block. This consists of an opening
13161 brace, a sequence of statements, and a closing brace. The block
13162 constitutes a lexical scope, so C<my> variables and various compile-time
13163 effects can be contained within it. It is up to the caller to ensure
13164 that the dynamic parser state (L</PL_parser> et al) is correctly set to
13165 reflect the source of the code to be parsed and the lexical context for
13168 The op tree representing the code block is returned. This is always a
13169 real op, never a null pointer. It will normally be a C<lineseq> list,
13170 including C<nextstate> or equivalent ops. No ops to construct any kind
13171 of runtime scope are included by virtue of it being a block.
13173 If an error occurs in parsing or compilation, in most cases a valid op
13174 tree (most likely null) is returned anyway. The error is reflected in
13175 the parser state, normally resulting in a single exception at the top
13176 level of parsing which covers all the compilation errors that occurred.
13177 Some compilation errors, however, will throw an exception immediately.
13179 The C<flags> parameter is reserved for future use, and must always
13186 Perl_parse_block(pTHX_ U32 flags)
13189 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
13190 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
13194 =for apidoc parse_barestmt
13196 Parse a single unadorned Perl statement. This may be a normal imperative
13197 statement or a declaration that has compile-time effect. It does not
13198 include any label or other affixture. It is up to the caller to ensure
13199 that the dynamic parser state (L</PL_parser> et al) is correctly set to
13200 reflect the source of the code to be parsed and the lexical context for
13203 The op tree representing the statement is returned. This may be a
13204 null pointer if the statement is null, for example if it was actually
13205 a subroutine definition (which has compile-time side effects). If not
13206 null, it will be ops directly implementing the statement, suitable to
13207 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
13208 equivalent op (except for those embedded in a scope contained entirely
13209 within the statement).
13211 If an error occurs in parsing or compilation, in most cases a valid op
13212 tree (most likely null) is returned anyway. The error is reflected in
13213 the parser state, normally resulting in a single exception at the top
13214 level of parsing which covers all the compilation errors that occurred.
13215 Some compilation errors, however, will throw an exception immediately.
13217 The C<flags> parameter is reserved for future use, and must always
13224 Perl_parse_barestmt(pTHX_ U32 flags)
13227 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
13228 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
13232 =for apidoc parse_label
13234 Parse a single label, possibly optional, of the type that may prefix a
13235 Perl statement. It is up to the caller to ensure that the dynamic parser
13236 state (L</PL_parser> et al) is correctly set to reflect the source of
13237 the code to be parsed. If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
13238 label is optional, otherwise it is mandatory.
13240 The name of the label is returned in the form of a fresh scalar. If an
13241 optional label is absent, a null pointer is returned.
13243 If an error occurs in parsing, which can only occur if the label is
13244 mandatory, a valid label is returned anyway. The error is reflected in
13245 the parser state, normally resulting in a single exception at the top
13246 level of parsing which covers all the compilation errors that occurred.
13252 Perl_parse_label(pTHX_ U32 flags)
13254 if (flags & ~PARSE_OPTIONAL)
13255 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
13257 PL_parser->yychar = yylex();
13258 if (PL_parser->yychar == LABEL) {
13259 SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv;
13260 PL_parser->yychar = YYEMPTY;
13261 cSVOPx(pl_yylval.opval)->op_sv = NULL;
13262 op_free(pl_yylval.opval);
13270 STRLEN wlen, bufptr_pos;
13273 if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
13275 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
13276 if (word_takes_any_delimiter(s, wlen))
13278 bufptr_pos = s - SvPVX(PL_linestr);
13280 lex_read_space(LEX_KEEP_PREVIOUS);
13282 s = SvPVX(PL_linestr) + bufptr_pos;
13283 if (t[0] == ':' && t[1] != ':') {
13284 PL_oldoldbufptr = PL_oldbufptr;
13287 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
13291 if (flags & PARSE_OPTIONAL) {
13294 qerror(Perl_mess(aTHX_ "Parse error"));
13295 return newSVpvs("x");
13302 =for apidoc parse_fullstmt
13304 Parse a single complete Perl statement. This may be a normal imperative
13305 statement or a declaration that has compile-time effect, and may include
13306 optional labels. It is up to the caller to ensure that the dynamic
13307 parser state (L</PL_parser> et al) is correctly set to reflect the source
13308 of the code to be parsed and the lexical context for the statement.
13310 The op tree representing the statement is returned. This may be a
13311 null pointer if the statement is null, for example if it was actually
13312 a subroutine definition (which has compile-time side effects). If not
13313 null, it will be the result of a L</newSTATEOP> call, normally including
13314 a C<nextstate> or equivalent op.
13316 If an error occurs in parsing or compilation, in most cases a valid op
13317 tree (most likely null) is returned anyway. The error is reflected in
13318 the parser state, normally resulting in a single exception at the top
13319 level of parsing which covers all the compilation errors that occurred.
13320 Some compilation errors, however, will throw an exception immediately.
13322 The C<flags> parameter is reserved for future use, and must always
13329 Perl_parse_fullstmt(pTHX_ U32 flags)
13332 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
13333 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
13337 =for apidoc parse_stmtseq
13339 Parse a sequence of zero or more Perl statements. These may be normal
13340 imperative statements, including optional labels, or declarations
13341 that have compile-time effect, or any mixture thereof. The statement
13342 sequence ends when a closing brace or end-of-file is encountered in a
13343 place where a new statement could have validly started. It is up to
13344 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
13345 is correctly set to reflect the source of the code to be parsed and the
13346 lexical context for the statements.
13348 The op tree representing the statement sequence is returned. This may
13349 be a null pointer if the statements were all null, for example if there
13350 were no statements or if there were only subroutine definitions (which
13351 have compile-time side effects). If not null, it will be a C<lineseq>
13352 list, normally including C<nextstate> or equivalent ops.
13354 If an error occurs in parsing or compilation, in most cases a valid op
13355 tree is returned anyway. The error is reflected in the parser state,
13356 normally resulting in a single exception at the top level of parsing
13357 which covers all the compilation errors that occurred. Some compilation
13358 errors, however, will throw an exception immediately.
13360 The C<flags> parameter is reserved for future use, and must always
13367 Perl_parse_stmtseq(pTHX_ U32 flags)
13372 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
13373 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
13374 c = lex_peek_unichar(0);
13375 if (c != -1 && c != /*{*/'}')
13376 qerror(Perl_mess(aTHX_ "Parse error"));
13381 =for apidoc parse_subsignature
13383 Parse a subroutine signature declaration. This is the contents of the
13384 parentheses following a named or anonymous subroutine declaration when the
13385 C<signatures> feature is enabled. Note that this function neither expects
13386 nor consumes the opening and closing parentheses around the signature; it
13387 is the caller's job to handle these.
13389 This function must only be called during parsing of a subroutine; after
13390 L</start_subparse> has been called. It might allocate lexical variables on
13391 the pad for the current subroutine.
13393 The op tree to unpack the arguments from the stack at runtime is returned.
13394 This op tree should appear at the beginning of the compiled function. The
13395 caller may wish to use L</op_append_list> to build their function body
13396 after it, or splice it together with the body before calling L</newATTRSUB>.
13398 The C<flags> parameter is reserved for future use, and must always
13405 Perl_parse_subsignature(pTHX_ U32 flags)
13408 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_subsignature");
13409 return parse_recdescent_for_op(GRAMSUBSIGNATURE, LEX_FAKEEOF_NONEXPR);
13413 * ex: set ts=8 sts=4 sw=4 et: