3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 =head1 Lexer interface
26 This is the lower layer of the Perl parser, managing characters and tokens.
28 =for apidoc AmnU|yy_parser *|PL_parser
30 Pointer to a structure encapsulating the state of the parsing operation
31 currently in progress. The pointer can be locally changed to perform
32 a nested parse without interfering with the state of an outer parse.
33 Individual members of C<PL_parser> have their own documentation.
39 #define PERL_IN_TOKE_C
41 #include "invlist_inline.h"
43 #define new_constant(a,b,c,d,e,f,g, h) \
44 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h)
46 #define pl_yylval (PL_parser->yylval)
48 /* XXX temporary backwards compatibility */
49 #define PL_lex_brackets (PL_parser->lex_brackets)
50 #define PL_lex_allbrackets (PL_parser->lex_allbrackets)
51 #define PL_lex_fakeeof (PL_parser->lex_fakeeof)
52 #define PL_lex_brackstack (PL_parser->lex_brackstack)
53 #define PL_lex_casemods (PL_parser->lex_casemods)
54 #define PL_lex_casestack (PL_parser->lex_casestack)
55 #define PL_lex_dojoin (PL_parser->lex_dojoin)
56 #define PL_lex_formbrack (PL_parser->lex_formbrack)
57 #define PL_lex_inpat (PL_parser->lex_inpat)
58 #define PL_lex_inwhat (PL_parser->lex_inwhat)
59 #define PL_lex_op (PL_parser->lex_op)
60 #define PL_lex_repl (PL_parser->lex_repl)
61 #define PL_lex_starts (PL_parser->lex_starts)
62 #define PL_lex_stuff (PL_parser->lex_stuff)
63 #define PL_multi_start (PL_parser->multi_start)
64 #define PL_multi_open (PL_parser->multi_open)
65 #define PL_multi_close (PL_parser->multi_close)
66 #define PL_preambled (PL_parser->preambled)
67 #define PL_linestr (PL_parser->linestr)
68 #define PL_expect (PL_parser->expect)
69 #define PL_copline (PL_parser->copline)
70 #define PL_bufptr (PL_parser->bufptr)
71 #define PL_oldbufptr (PL_parser->oldbufptr)
72 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
73 #define PL_linestart (PL_parser->linestart)
74 #define PL_bufend (PL_parser->bufend)
75 #define PL_last_uni (PL_parser->last_uni)
76 #define PL_last_lop (PL_parser->last_lop)
77 #define PL_last_lop_op (PL_parser->last_lop_op)
78 #define PL_lex_state (PL_parser->lex_state)
79 #define PL_rsfp (PL_parser->rsfp)
80 #define PL_rsfp_filters (PL_parser->rsfp_filters)
81 #define PL_in_my (PL_parser->in_my)
82 #define PL_in_my_stash (PL_parser->in_my_stash)
83 #define PL_tokenbuf (PL_parser->tokenbuf)
84 #define PL_multi_end (PL_parser->multi_end)
85 #define PL_error_count (PL_parser->error_count)
87 # define PL_nexttoke (PL_parser->nexttoke)
88 # define PL_nexttype (PL_parser->nexttype)
89 # define PL_nextval (PL_parser->nextval)
92 #define SvEVALED(sv) \
93 (SvTYPE(sv) >= SVt_PVNV \
94 && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen)
96 static const char ident_too_long[] = "Identifier too long";
97 static const char ident_var_zero_multi_digit[] = "Numeric variables with more than one digit may not start with '0'";
99 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
101 #define XENUMMASK 0x3f
102 #define XFAKEEOF 0x40
103 #define XFAKEBRACK 0x80
105 #ifdef USE_UTF8_SCRIPTS
106 # define UTF cBOOL(!IN_BYTES)
108 # define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
111 /* The maximum number of characters preceding the unrecognized one to display */
112 #define UNRECOGNIZED_PRECEDE_COUNT 10
114 /* In variables named $^X, these are the legal values for X.
115 * 1999-02-27 mjd-perl-patch@plover.com */
116 #define isCONTROLVAR(x) (isUPPER(x) || memCHRs("[\\]^_?", (x)))
118 #define SPACE_OR_TAB(c) isBLANK_A(c)
120 #define HEXFP_PEEK(s) \
122 (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
123 isALPHA_FOLD_EQ(s[0], 'p'))
125 /* LEX_* are values for PL_lex_state, the state of the lexer.
126 * They are arranged oddly so that the guard on the switch statement
127 * can get by with a single comparison (if the compiler is smart enough).
129 * These values refer to the various states within a sublex parse,
130 * i.e. within a double quotish string
133 /* #define LEX_NOTPARSING 11 is done in perl.h. */
135 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
136 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
137 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
138 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
139 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
141 /* at end of code, eg "$x" followed by: */
142 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
143 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
145 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
146 string or after \E, $foo, etc */
147 #define LEX_INTERPCONST 2 /* NOT USED */
148 #define LEX_FORMLINE 1 /* expecting a format line */
150 /* returned to yyl_try() to request it to retry the parse loop, expected to only
151 be returned directly by yyl_fake_eof(), but functions that call yyl_fake_eof()
154 yylex (aka Perl_yylex) returns 0 on EOF rather than returning -1,
155 other token values are 258 or higher (see perly.h), so -1 should be
158 #define YYL_RETRY (-1)
161 static const char* const lex_state_names[] = {
176 #include "keywords.h"
178 /* CLINE is a macro that ensures PL_copline has a sane value */
180 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
183 * Convenience functions to return different tokens and prime the
184 * lexer for the next token. They all take an argument.
186 * TOKEN : generic token (used for '(', DOLSHARP, etc)
187 * OPERATOR : generic operator
188 * AOPERATOR : assignment operator
189 * PREBLOCK : beginning the block after an if, while, foreach, ...
190 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
191 * PREREF : *EXPR where EXPR is not a simple identifier
192 * TERM : expression term
193 * POSTDEREF : postfix dereference (->$* ->@[...] etc.)
194 * LOOPX : loop exiting command (goto, last, dump, etc)
195 * FTST : file test operator
196 * FUN0 : zero-argument function
197 * FUN0OP : zero-argument function, with its op created in this file
198 * FUN1 : not used, except for not, which isn't a UNIOP
199 * BOop : bitwise or or xor
201 * BCop : bitwise complement
202 * SHop : shift operator
203 * PWop : power operator
204 * PMop : pattern-matching operator
205 * Aop : addition-level operator
206 * AopNOASSIGN : addition-level operator that is never part of .=
207 * Mop : multiplication-level operator
208 * ChEop : chaining equality-testing operator
209 * NCEop : non-chaining comparison operator at equality precedence
210 * ChRop : chaining relational operator <= != gt
211 * NCRop : non-chaining relational operator isa
213 * Also see LOP and lop() below.
216 #ifdef DEBUGGING /* Serve -DT. */
217 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
219 # define REPORT(retval) (retval)
222 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
223 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
224 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
225 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
226 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
227 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
228 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
229 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
230 #define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \
232 PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
234 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
235 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
236 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
237 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
238 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
239 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
240 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
242 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
243 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
244 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
245 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
246 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
247 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
248 #define ChEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHEQOP))
249 #define NCEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCEQOP))
250 #define ChRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHRELOP))
251 #define NCRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCRELOP))
253 /* This bit of chicanery makes a unary function followed by
254 * a parenthesis into a function with one argument, highest precedence.
255 * The UNIDOR macro is for unary functions that can be followed by the //
256 * operator (such as C<shift // 0>).
258 #define UNI3(f,x,have_x) { \
259 pl_yylval.ival = f; \
260 if (have_x) PL_expect = x; \
262 PL_last_uni = PL_oldbufptr; \
263 PL_last_lop_op = (f) < 0 ? -(f) : (f); \
265 return REPORT( (int)FUNC1 ); \
267 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
269 #define UNI(f) UNI3(f,XTERM,1)
270 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
271 #define UNIPROTO(f,optional) { \
272 if (optional) PL_last_uni = PL_oldbufptr; \
276 #define UNIBRACK(f) UNI3(f,0,0)
278 /* return has special case parsing.
280 * List operators have low precedence. Functions have high precedence.
281 * Every built in, *except return*, if written with () around its arguments, is
282 * parsed as a function. Hence every other list built in:
284 * $ perl -lwe 'sub foo { join 2,4,6 * 1.5 } print for foo()' # join 2,4,9
286 * $ perl -lwe 'sub foo { join(2,4,6) * 1.5 } print for foo()' # 426 * 1.5
288 * $ perl -lwe 'sub foo { join+(2,4,6) * 1.5 } print for foo()'
289 * Useless use of a constant (2) in void context at -e line 1.
290 * Useless use of a constant (4) in void context at -e line 1.
294 * empty line output because C<(2, 4, 6) * 1.5> is the comma operator, not a
295 * list. * forces scalar context, 6 * 1.5 is 9, and join(9) is the empty string.
299 * $ perl -lwe 'sub foo { return 2,4,6 * 1.5 } print for foo()'
303 * $ perl -lwe 'sub foo { return(2,4,6) * 1.5 } print for foo()'
304 * Useless use of a constant (2) in void context at -e line 1.
305 * Useless use of a constant (4) in void context at -e line 1.
307 * $ perl -lwe 'sub foo { return+(2,4,6) * 1.5 } print for foo()'
308 * Useless use of a constant (2) in void context at -e line 1.
309 * Useless use of a constant (4) in void context at -e line 1.
314 * $ perl -lwe 'sub foo { return(2,4,6) } print for foo()'
319 * This last example is what we expect, but it's clearly inconsistent with how
320 * C<return(2,4,6) * 1.5> *ought* to behave, if the rules were consistently
324 * Perl 3 attempted to be consistent:
326 * The rules are more consistent about where parens are needed and
327 * where they are not. In particular, unary operators and list operators now
328 * behave like functions if they're called like functions.
330 * However, the behaviour for return was reverted to the "old" parsing with
335 * did not do what was expected, since return was swallowing the
336 * parens in order to consider itself a function. The solution,
337 * since return never wants any trailing expression such as
338 * return (1,2,3) + 2;
339 * is to simply make return an exception to the paren-makes-a-function
340 * rule, and treat it the way it always was, so that it doesn't
343 * To demonstrate the special-case parsing, replace OLDLOP(OP_RETURN); with
344 * LOP(OP_RETURN, XTERM);
346 * and constructs such as
348 * return (Internals::V())[2]
350 * turn into syntax errors
355 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
356 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
357 pl_yylval.ival = (f); \
363 #define COPLINE_INC_WITH_HERELINES \
365 CopLINE_inc(PL_curcop); \
366 if (PL_parser->herelines) \
367 CopLINE(PL_curcop) += PL_parser->herelines, \
368 PL_parser->herelines = 0; \
370 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
371 * is no sublex_push to follow. */
372 #define COPLINE_SET_FROM_MULTI_END \
374 CopLINE_set(PL_curcop, PL_multi_end); \
375 if (PL_multi_end != PL_multi_start) \
376 PL_parser->herelines = 0; \
380 /* A file-local structure for passing around information about subroutines and
381 * related definable words */
391 static const struct code no_code = { NULL, NULL, NULL, NULL, NULL, 0, FALSE };
395 /* how to interpret the pl_yylval associated with the token */
399 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
404 #define DEBUG_TOKEN(Type, Name) \
405 { Name, TOKENTYPE_##Type, #Name }
407 static struct debug_tokens {
409 enum token_type type;
411 } const debug_tokens[] =
413 DEBUG_TOKEN (OPNUM, ADDOP),
414 DEBUG_TOKEN (NONE, ANDAND),
415 DEBUG_TOKEN (NONE, ANDOP),
416 DEBUG_TOKEN (NONE, ARROW),
417 DEBUG_TOKEN (OPNUM, ASSIGNOP),
418 DEBUG_TOKEN (OPNUM, BITANDOP),
419 DEBUG_TOKEN (OPNUM, BITOROP),
420 DEBUG_TOKEN (OPNUM, CHEQOP),
421 DEBUG_TOKEN (OPNUM, CHRELOP),
422 DEBUG_TOKEN (NONE, COLONATTR),
423 DEBUG_TOKEN (NONE, DOLSHARP),
424 DEBUG_TOKEN (NONE, DORDOR),
425 DEBUG_TOKEN (IVAL, DOTDOT),
426 DEBUG_TOKEN (NONE, FORMLBRACK),
427 DEBUG_TOKEN (NONE, FORMRBRACK),
428 DEBUG_TOKEN (OPNUM, FUNC),
429 DEBUG_TOKEN (OPNUM, FUNC0),
430 DEBUG_TOKEN (OPVAL, FUNC0OP),
431 DEBUG_TOKEN (OPVAL, FUNC0SUB),
432 DEBUG_TOKEN (OPNUM, FUNC1),
433 DEBUG_TOKEN (NONE, HASHBRACK),
434 DEBUG_TOKEN (IVAL, KW_CATCH),
435 DEBUG_TOKEN (IVAL, KW_CONTINUE),
436 DEBUG_TOKEN (IVAL, KW_DEFAULT),
437 DEBUG_TOKEN (IVAL, KW_DO),
438 DEBUG_TOKEN (IVAL, KW_ELSE),
439 DEBUG_TOKEN (IVAL, KW_ELSIF),
440 DEBUG_TOKEN (IVAL, KW_GIVEN),
441 DEBUG_TOKEN (IVAL, KW_FOR),
442 DEBUG_TOKEN (IVAL, KW_FORMAT),
443 DEBUG_TOKEN (IVAL, KW_IF),
444 DEBUG_TOKEN (IVAL, KW_LOCAL),
445 DEBUG_TOKEN (IVAL, KW_MY),
446 DEBUG_TOKEN (IVAL, KW_PACKAGE),
447 DEBUG_TOKEN (IVAL, KW_REQUIRE),
448 DEBUG_TOKEN (IVAL, KW_SUB_anon),
449 DEBUG_TOKEN (IVAL, KW_SUB_anon_sig),
450 DEBUG_TOKEN (IVAL, KW_SUB_named),
451 DEBUG_TOKEN (IVAL, KW_SUB_named_sig),
452 DEBUG_TOKEN (IVAL, KW_TRY),
453 DEBUG_TOKEN (IVAL, KW_USE_or_NO),
454 DEBUG_TOKEN (IVAL, KW_UNLESS),
455 DEBUG_TOKEN (IVAL, KW_UNTIL),
456 DEBUG_TOKEN (IVAL, KW_WHEN),
457 DEBUG_TOKEN (IVAL, KW_WHILE),
458 DEBUG_TOKEN (OPVAL, LABEL),
459 DEBUG_TOKEN (OPNUM, LOOPEX),
460 DEBUG_TOKEN (OPNUM, LSTOP),
461 DEBUG_TOKEN (OPVAL, LSTOPSUB),
462 DEBUG_TOKEN (OPNUM, MATCHOP),
463 DEBUG_TOKEN (OPVAL, METHCALL),
464 DEBUG_TOKEN (OPVAL, METHCALL0),
465 DEBUG_TOKEN (OPNUM, MULOP),
466 DEBUG_TOKEN (OPNUM, NCEQOP),
467 DEBUG_TOKEN (OPNUM, NCRELOP),
468 DEBUG_TOKEN (NONE, NOAMP),
469 DEBUG_TOKEN (NONE, NOTOP),
470 DEBUG_TOKEN (IVAL, OROP),
471 DEBUG_TOKEN (NONE, OROR),
472 DEBUG_TOKEN (IVAL, PERLY_AMPERSAND),
473 DEBUG_TOKEN (IVAL, PERLY_BRACE_CLOSE),
474 DEBUG_TOKEN (IVAL, PERLY_BRACE_OPEN),
475 DEBUG_TOKEN (IVAL, PERLY_BRACKET_CLOSE),
476 DEBUG_TOKEN (IVAL, PERLY_BRACKET_OPEN),
477 DEBUG_TOKEN (IVAL, PERLY_COLON),
478 DEBUG_TOKEN (IVAL, PERLY_COMMA),
479 DEBUG_TOKEN (IVAL, PERLY_DOT),
480 DEBUG_TOKEN (IVAL, PERLY_EQUAL_SIGN),
481 DEBUG_TOKEN (IVAL, PERLY_EXCLAMATION_MARK),
482 DEBUG_TOKEN (IVAL, PERLY_MINUS),
483 DEBUG_TOKEN (IVAL, PERLY_PAREN_OPEN),
484 DEBUG_TOKEN (IVAL, PERLY_PERCENT_SIGN),
485 DEBUG_TOKEN (IVAL, PERLY_PLUS),
486 DEBUG_TOKEN (IVAL, PERLY_QUESTION_MARK),
487 DEBUG_TOKEN (IVAL, PERLY_SEMICOLON),
488 DEBUG_TOKEN (IVAL, PERLY_SLASH),
489 DEBUG_TOKEN (IVAL, PERLY_SNAIL),
490 DEBUG_TOKEN (IVAL, PERLY_STAR),
491 DEBUG_TOKEN (IVAL, PERLY_TILDE),
492 DEBUG_TOKEN (OPVAL, PLUGEXPR),
493 DEBUG_TOKEN (OPVAL, PLUGSTMT),
494 DEBUG_TOKEN (OPVAL, PMFUNC),
495 DEBUG_TOKEN (NONE, POSTJOIN),
496 DEBUG_TOKEN (NONE, POSTDEC),
497 DEBUG_TOKEN (NONE, POSTINC),
498 DEBUG_TOKEN (OPNUM, POWOP),
499 DEBUG_TOKEN (NONE, PREDEC),
500 DEBUG_TOKEN (NONE, PREINC),
501 DEBUG_TOKEN (OPVAL, PRIVATEREF),
502 DEBUG_TOKEN (OPVAL, QWLIST),
503 DEBUG_TOKEN (NONE, REFGEN),
504 DEBUG_TOKEN (OPNUM, SHIFTOP),
505 DEBUG_TOKEN (NONE, SUBLEXEND),
506 DEBUG_TOKEN (NONE, SUBLEXSTART),
507 DEBUG_TOKEN (OPVAL, THING),
508 DEBUG_TOKEN (NONE, UMINUS),
509 DEBUG_TOKEN (OPNUM, UNIOP),
510 DEBUG_TOKEN (OPVAL, UNIOPSUB),
511 DEBUG_TOKEN (OPVAL, BAREWORD),
512 DEBUG_TOKEN (IVAL, YADAYADA),
513 { 0, TOKENTYPE_NONE, NULL }
518 /* dump the returned token in rv, plus any optional arg in pl_yylval */
521 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
523 PERL_ARGS_ASSERT_TOKEREPORT;
526 const char *name = NULL;
527 enum token_type type = TOKENTYPE_NONE;
528 const struct debug_tokens *p;
529 SV* const report = newSVpvs("<== ");
531 for (p = debug_tokens; p->token; p++) {
532 if (p->token == (int)rv) {
539 Perl_sv_catpv(aTHX_ report, name);
540 else if (isGRAPH(rv))
542 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
544 sv_catpvs(report, " (pending identifier)");
547 sv_catpvs(report, "EOF");
549 Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
554 Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
556 case TOKENTYPE_OPNUM:
557 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
558 PL_op_name[lvalp->ival]);
561 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
563 case TOKENTYPE_OPVAL:
565 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
566 PL_op_name[lvalp->opval->op_type]);
567 if (lvalp->opval->op_type == OP_CONST) {
568 Perl_sv_catpvf(aTHX_ report, " %s",
569 SvPEEK(cSVOPx_sv(lvalp->opval)));
574 sv_catpvs(report, "(opval=null)");
577 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
583 /* print the buffer with suitable escapes */
586 S_printbuf(pTHX_ const char *const fmt, const char *const s)
588 SV* const tmp = newSVpvs("");
590 PERL_ARGS_ASSERT_PRINTBUF;
592 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */
593 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
594 GCC_DIAG_RESTORE_STMT;
603 * This subroutine looks for an '=' next to the operator that has just been
604 * parsed and turns it into an ASSIGNOP if it finds one.
608 S_ao(pTHX_ int toketype)
610 if (*PL_bufptr == '=') {
614 case ANDAND: pl_yylval.ival = OP_ANDASSIGN; break;
615 case OROR: pl_yylval.ival = OP_ORASSIGN; break;
616 case DORDOR: pl_yylval.ival = OP_DORASSIGN; break;
621 return REPORT(toketype);
626 * When Perl expects an operator and finds something else, no_op
627 * prints the warning. It always prints "<something> found where
628 * operator expected. It prints "Missing semicolon on previous line?"
629 * if the surprise occurs at the start of the line. "do you need to
630 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
631 * where the compiler doesn't know if foo is a method call or a function.
632 * It prints "Missing operator before end of line" if there's nothing
633 * after the missing operator, or "... before <...>" if there is something
634 * after the missing operator.
636 * PL_bufptr is expected to point to the start of the thing that was found,
637 * and s after the next token or partial token.
641 S_no_op(pTHX_ const char *const what, char *s)
643 char * const oldbp = PL_bufptr;
644 const bool is_first = (PL_oldbufptr == PL_linestart);
646 PERL_ARGS_ASSERT_NO_OP;
652 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
653 if (ckWARN_d(WARN_SYNTAX)) {
655 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
656 "\t(Missing semicolon on previous line?)\n");
657 else if (PL_oldoldbufptr && isIDFIRST_lazy_if_safe(PL_oldoldbufptr,
662 for (t = PL_oldoldbufptr;
663 (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
664 t += UTF ? UTF8SKIP(t) : 1)
668 if (t < PL_bufptr && isSPACE(*t))
669 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
670 "\t(Do you need to predeclare %" UTF8f "?)\n",
671 UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
675 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
676 "\t(Missing operator before %" UTF8f "?)\n",
677 UTF8fARG(UTF, s - oldbp, oldbp));
685 * Complain about missing quote/regexp/heredoc terminator.
686 * If it's called with NULL then it cauterizes the line buffer.
687 * If we're in a delimited string and the delimiter is a control
688 * character, it's reformatted into a two-char sequence like ^C.
693 S_missingterm(pTHX_ char *s, STRLEN len)
695 char tmpbuf[UTF8_MAXBYTES + 1];
699 char * const nl = (char *) my_memrchr(s, '\n', len);
706 else if (PL_multi_close < 32) {
708 tmpbuf[1] = (char)toCTRL(PL_multi_close);
714 if (! UTF && LIKELY(PL_multi_close < 256)) {
715 *tmpbuf = (char)PL_multi_close;
720 char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close);
727 q = memchr(s, '"', len) ? '\'' : '"';
728 Perl_croak(aTHX_ "Can't find string terminator %c%" UTF8f "%c"
729 " anywhere before EOF", q, UTF8fARG(uni, len, s), q);
735 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
736 * utf16-to-utf8-reversed.
739 #ifdef PERL_CR_FILTER
743 const char *s = SvPVX_const(sv);
744 const char * const e = s + SvCUR(sv);
746 PERL_ARGS_ASSERT_STRIP_RETURN;
748 /* outer loop optimized to do nothing if there are no CR-LFs */
750 if (*s++ == '\r' && *s == '\n') {
751 /* hit a CR-LF, need to copy the rest */
755 if (*s == '\r' && s[1] == '\n')
766 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
768 const I32 count = FILTER_READ(idx+1, sv, maxlen);
769 if (count > 0 && !maxlen)
776 =for apidoc lex_start
778 Creates and initialises a new lexer/parser state object, supplying
779 a context in which to lex and parse from a new source of Perl code.
780 A pointer to the new state object is placed in L</PL_parser>. An entry
781 is made on the save stack so that upon unwinding, the new state object
782 will be destroyed and the former value of L</PL_parser> will be restored.
783 Nothing else need be done to clean up the parsing context.
785 The code to be parsed comes from C<line> and C<rsfp>. C<line>, if
786 non-null, provides a string (in SV form) containing code to be parsed.
787 A copy of the string is made, so subsequent modification of C<line>
788 does not affect parsing. C<rsfp>, if non-null, provides an input stream
789 from which code will be read to be parsed. If both are non-null, the
790 code in C<line> comes first and must consist of complete lines of input,
791 and C<rsfp> supplies the remainder of the source.
793 The C<flags> parameter is reserved for future use. Currently it is only
794 used by perl internally, so extensions should always pass zero.
799 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
800 can share filters with the current parser.
801 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
802 caller, hence isn't owned by the parser, so shouldn't be closed on parser
803 destruction. This is used to handle the case of defaulting to reading the
804 script from the standard input because no filename was given on the command
805 line (without getting confused by situation where STDIN has been closed, so
806 the script handle is opened on fd 0) */
809 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
811 const char *s = NULL;
812 yy_parser *parser, *oparser;
814 if (flags && flags & ~LEX_START_FLAGS)
815 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
817 /* create and initialise a parser */
819 Newxz(parser, 1, yy_parser);
820 parser->old_parser = oparser = PL_parser;
823 parser->stack = NULL;
824 parser->stack_max1 = NULL;
827 /* on scope exit, free this parser and restore any outer one */
829 parser->saved_curcop = PL_curcop;
831 /* initialise lexer state */
833 parser->nexttoke = 0;
834 parser->error_count = oparser ? oparser->error_count : 0;
835 parser->copline = parser->preambling = NOLINE;
836 parser->lex_state = LEX_NORMAL;
837 parser->expect = XSTATE;
839 parser->recheck_utf8_validity = TRUE;
840 parser->rsfp_filters =
841 !(flags & LEX_START_SAME_FILTER) || !oparser
843 : MUTABLE_AV(SvREFCNT_inc(
844 oparser->rsfp_filters
845 ? oparser->rsfp_filters
846 : (oparser->rsfp_filters = newAV())
849 Newx(parser->lex_brackstack, 120, char);
850 Newx(parser->lex_casestack, 12, char);
851 *parser->lex_casestack = '\0';
852 Newxz(parser->lex_shared, 1, LEXSHARED);
856 const U8* first_bad_char_loc;
858 s = SvPV_const(line, len);
861 && UNLIKELY(! is_utf8_string_loc((U8 *) s,
863 &first_bad_char_loc)))
865 _force_out_malformed_utf8_message(first_bad_char_loc,
866 (U8 *) s + SvCUR(line),
868 1 /* 1 means die */ );
869 NOT_REACHED; /* NOTREACHED */
872 parser->linestr = flags & LEX_START_COPIED
873 ? SvREFCNT_inc_simple_NN(line)
874 : newSVpvn_flags(s, len, SvUTF8(line));
876 sv_catpvs(parser->linestr, "\n;");
878 parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
881 parser->oldoldbufptr =
884 parser->linestart = SvPVX(parser->linestr);
885 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
886 parser->last_lop = parser->last_uni = NULL;
888 STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
889 |LEX_DONT_CLOSE_RSFP));
890 parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
891 |LEX_DONT_CLOSE_RSFP));
893 parser->in_pod = parser->filtered = 0;
897 /* delete a parser object */
900 Perl_parser_free(pTHX_ const yy_parser *parser)
902 PERL_ARGS_ASSERT_PARSER_FREE;
904 PL_curcop = parser->saved_curcop;
905 SvREFCNT_dec(parser->linestr);
907 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
908 PerlIO_clearerr(parser->rsfp);
909 else if (parser->rsfp && (!parser->old_parser
910 || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
911 PerlIO_close(parser->rsfp);
912 SvREFCNT_dec(parser->rsfp_filters);
913 SvREFCNT_dec(parser->lex_stuff);
914 SvREFCNT_dec(parser->lex_sub_repl);
916 Safefree(parser->lex_brackstack);
917 Safefree(parser->lex_casestack);
918 Safefree(parser->lex_shared);
919 PL_parser = parser->old_parser;
924 Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab)
926 I32 nexttoke = parser->nexttoke;
927 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
929 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
930 && parser->nextval[nexttoke].opval
931 && parser->nextval[nexttoke].opval->op_slabbed
932 && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
933 op_free(parser->nextval[nexttoke].opval);
934 parser->nextval[nexttoke].opval = NULL;
941 =for apidoc AmnxUN|SV *|PL_parser-E<gt>linestr
943 Buffer scalar containing the chunk currently under consideration of the
944 text currently being lexed. This is always a plain string scalar (for
945 which C<SvPOK> is true). It is not intended to be used as a scalar by
946 normal scalar means; instead refer to the buffer directly by the pointer
947 variables described below.
949 The lexer maintains various C<char*> pointers to things in the
950 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
951 reallocated, all of these pointers must be updated. Don't attempt to
952 do this manually, but rather use L</lex_grow_linestr> if you need to
953 reallocate the buffer.
955 The content of the text chunk in the buffer is commonly exactly one
956 complete line of input, up to and including a newline terminator,
957 but there are situations where it is otherwise. The octets of the
958 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
959 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
960 flag on this scalar, which may disagree with it.
962 For direct examination of the buffer, the variable
963 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
964 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
965 of these pointers is usually preferable to examination of the scalar
966 through normal scalar means.
968 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufend
970 Direct pointer to the end of the chunk of text currently being lexed, the
971 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
972 + SvCUR(PL_parser-E<gt>linestr)>. A C<NUL> character (zero octet) is
973 always located at the end of the buffer, and does not count as part of
974 the buffer's contents.
976 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufptr
978 Points to the current position of lexing inside the lexer buffer.
979 Characters around this point may be freely examined, within
980 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
981 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
982 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
984 Lexing code (whether in the Perl core or not) moves this pointer past
985 the characters that it consumes. It is also expected to perform some
986 bookkeeping whenever a newline character is consumed. This movement
987 can be more conveniently performed by the function L</lex_read_to>,
988 which handles newlines appropriately.
990 Interpretation of the buffer's octets can be abstracted out by
991 using the slightly higher-level functions L</lex_peek_unichar> and
992 L</lex_read_unichar>.
994 =for apidoc AmnxUN|char *|PL_parser-E<gt>linestart
996 Points to the start of the current line inside the lexer buffer.
997 This is useful for indicating at which column an error occurred, and
998 not much else. This must be updated by any lexing code that consumes
999 a newline; the function L</lex_read_to> handles this detail.
1005 =for apidoc lex_bufutf8
1007 Indicates whether the octets in the lexer buffer
1008 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
1009 of Unicode characters. If not, they should be interpreted as Latin-1
1010 characters. This is analogous to the C<SvUTF8> flag for scalars.
1012 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
1013 contains valid UTF-8. Lexing code must be robust in the face of invalid
1016 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
1017 is significant, but not the whole story regarding the input character
1018 encoding. Normally, when a file is being read, the scalar contains octets
1019 and its C<SvUTF8> flag is off, but the octets should be interpreted as
1020 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
1021 however, the scalar may have the C<SvUTF8> flag on, and in this case its
1022 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
1023 is in effect. This logic may change in the future; use this function
1024 instead of implementing the logic yourself.
1030 Perl_lex_bufutf8(pTHX)
1036 =for apidoc lex_grow_linestr
1038 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
1039 at least C<len> octets (including terminating C<NUL>). Returns a
1040 pointer to the reallocated buffer. This is necessary before making
1041 any direct modification of the buffer that would increase its length.
1042 L</lex_stuff_pvn> provides a more convenient way to insert text into
1045 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
1046 this function updates all of the lexer's variables that point directly
1053 Perl_lex_grow_linestr(pTHX_ STRLEN len)
1057 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1058 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
1061 linestr = PL_parser->linestr;
1062 buf = SvPVX(linestr);
1063 if (len <= SvLEN(linestr))
1066 /* Is the lex_shared linestr SV the same as the current linestr SV?
1067 * Only in this case does re_eval_start need adjusting, since it
1068 * points within lex_shared->ls_linestr's buffer */
1069 current = ( !PL_parser->lex_shared->ls_linestr
1070 || linestr == PL_parser->lex_shared->ls_linestr);
1072 bufend_pos = PL_parser->bufend - buf;
1073 bufptr_pos = PL_parser->bufptr - buf;
1074 oldbufptr_pos = PL_parser->oldbufptr - buf;
1075 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1076 linestart_pos = PL_parser->linestart - buf;
1077 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1078 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1079 re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ?
1080 PL_parser->lex_shared->re_eval_start - buf : 0;
1082 buf = sv_grow(linestr, len);
1084 PL_parser->bufend = buf + bufend_pos;
1085 PL_parser->bufptr = buf + bufptr_pos;
1086 PL_parser->oldbufptr = buf + oldbufptr_pos;
1087 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1088 PL_parser->linestart = buf + linestart_pos;
1089 if (PL_parser->last_uni)
1090 PL_parser->last_uni = buf + last_uni_pos;
1091 if (PL_parser->last_lop)
1092 PL_parser->last_lop = buf + last_lop_pos;
1093 if (current && PL_parser->lex_shared->re_eval_start)
1094 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
1099 =for apidoc lex_stuff_pvn
1101 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1102 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1103 reallocating the buffer if necessary. This means that lexing code that
1104 runs later will see the characters as if they had appeared in the input.
1105 It is not recommended to do this as part of normal parsing, and most
1106 uses of this facility run the risk of the inserted characters being
1107 interpreted in an unintended manner.
1109 The string to be inserted is represented by C<len> octets starting
1110 at C<pv>. These octets are interpreted as either UTF-8 or Latin-1,
1111 according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
1112 The characters are recoded for the lexer buffer, according to how the
1113 buffer is currently being interpreted (L</lex_bufutf8>). If a string
1114 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1115 function is more convenient.
1117 =for apidoc Amnh||LEX_STUFF_UTF8
1123 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1126 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1127 if (flags & ~(LEX_STUFF_UTF8))
1128 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1130 if (flags & LEX_STUFF_UTF8) {
1133 STRLEN highhalf = variant_under_utf8_count((U8 *) pv,
1135 const char *p, *e = pv+len;;
1138 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1139 bufptr = PL_parser->bufptr;
1140 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1141 SvCUR_set(PL_parser->linestr,
1142 SvCUR(PL_parser->linestr) + len+highhalf);
1143 PL_parser->bufend += len+highhalf;
1144 for (p = pv; p != e; p++) {
1145 append_utf8_from_native_byte(*p, (U8 **) &bufptr);
1149 if (flags & LEX_STUFF_UTF8) {
1150 STRLEN highhalf = 0;
1151 const char *p, *e = pv+len;
1152 for (p = pv; p != e; p++) {
1154 if (UTF8_IS_ABOVE_LATIN1(c)) {
1155 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1156 "non-Latin-1 character into Latin-1 input");
1157 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1160 } else assert(UTF8_IS_INVARIANT(c));
1164 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1165 bufptr = PL_parser->bufptr;
1166 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1167 SvCUR_set(PL_parser->linestr,
1168 SvCUR(PL_parser->linestr) + len-highhalf);
1169 PL_parser->bufend += len-highhalf;
1172 if (UTF8_IS_INVARIANT(*p)) {
1178 *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
1184 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1185 bufptr = PL_parser->bufptr;
1186 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1187 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1188 PL_parser->bufend += len;
1189 Copy(pv, bufptr, len, char);
1195 =for apidoc lex_stuff_pv
1197 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1198 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1199 reallocating the buffer if necessary. This means that lexing code that
1200 runs later will see the characters as if they had appeared in the input.
1201 It is not recommended to do this as part of normal parsing, and most
1202 uses of this facility run the risk of the inserted characters being
1203 interpreted in an unintended manner.
1205 The string to be inserted is represented by octets starting at C<pv>
1206 and continuing to the first nul. These octets are interpreted as either
1207 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1208 in C<flags>. The characters are recoded for the lexer buffer, according
1209 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1210 If it is not convenient to nul-terminate a string to be inserted, the
1211 L</lex_stuff_pvn> function is more appropriate.
1217 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1219 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1220 lex_stuff_pvn(pv, strlen(pv), flags);
1224 =for apidoc lex_stuff_sv
1226 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1227 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1228 reallocating the buffer if necessary. This means that lexing code that
1229 runs later will see the characters as if they had appeared in the input.
1230 It is not recommended to do this as part of normal parsing, and most
1231 uses of this facility run the risk of the inserted characters being
1232 interpreted in an unintended manner.
1234 The string to be inserted is the string value of C<sv>. The characters
1235 are recoded for the lexer buffer, according to how the buffer is currently
1236 being interpreted (L</lex_bufutf8>). If a string to be inserted is
1237 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1238 need to construct a scalar.
1244 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1248 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1250 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1252 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1256 =for apidoc lex_unstuff
1258 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1259 C<ptr>. Text following C<ptr> will be moved, and the buffer shortened.
1260 This hides the discarded text from any lexing code that runs later,
1261 as if the text had never appeared.
1263 This is not the normal way to consume lexed text. For that, use
1270 Perl_lex_unstuff(pTHX_ char *ptr)
1274 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1275 buf = PL_parser->bufptr;
1277 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1280 bufend = PL_parser->bufend;
1282 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1283 unstuff_len = ptr - buf;
1284 Move(ptr, buf, bufend+1-ptr, char);
1285 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1286 PL_parser->bufend = bufend - unstuff_len;
1290 =for apidoc lex_read_to
1292 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1293 to C<ptr>. This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
1294 performing the correct bookkeeping whenever a newline character is passed.
1295 This is the normal way to consume lexed text.
1297 Interpretation of the buffer's octets can be abstracted out by
1298 using the slightly higher-level functions L</lex_peek_unichar> and
1299 L</lex_read_unichar>.
1305 Perl_lex_read_to(pTHX_ char *ptr)
1308 PERL_ARGS_ASSERT_LEX_READ_TO;
1309 s = PL_parser->bufptr;
1310 if (ptr < s || ptr > PL_parser->bufend)
1311 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1312 for (; s != ptr; s++)
1314 COPLINE_INC_WITH_HERELINES;
1315 PL_parser->linestart = s+1;
1317 PL_parser->bufptr = ptr;
1321 =for apidoc lex_discard_to
1323 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1324 up to C<ptr>. The remaining content of the buffer will be moved, and
1325 all pointers into the buffer updated appropriately. C<ptr> must not
1326 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1327 it is not permitted to discard text that has yet to be lexed.
1329 Normally it is not necessarily to do this directly, because it suffices to
1330 use the implicit discarding behaviour of L</lex_next_chunk> and things
1331 based on it. However, if a token stretches across multiple lines,
1332 and the lexing code has kept multiple lines of text in the buffer for
1333 that purpose, then after completion of the token it would be wise to
1334 explicitly discard the now-unneeded earlier lines, to avoid future
1335 multi-line tokens growing the buffer without bound.
1341 Perl_lex_discard_to(pTHX_ char *ptr)
1345 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1346 buf = SvPVX(PL_parser->linestr);
1348 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1351 if (ptr > PL_parser->bufptr)
1352 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1353 discard_len = ptr - buf;
1354 if (PL_parser->oldbufptr < ptr)
1355 PL_parser->oldbufptr = ptr;
1356 if (PL_parser->oldoldbufptr < ptr)
1357 PL_parser->oldoldbufptr = ptr;
1358 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1359 PL_parser->last_uni = NULL;
1360 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1361 PL_parser->last_lop = NULL;
1362 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1363 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1364 PL_parser->bufend -= discard_len;
1365 PL_parser->bufptr -= discard_len;
1366 PL_parser->oldbufptr -= discard_len;
1367 PL_parser->oldoldbufptr -= discard_len;
1368 if (PL_parser->last_uni)
1369 PL_parser->last_uni -= discard_len;
1370 if (PL_parser->last_lop)
1371 PL_parser->last_lop -= discard_len;
1375 Perl_notify_parser_that_changed_to_utf8(pTHX)
1377 /* Called when $^H is changed to indicate that HINT_UTF8 has changed from
1378 * off to on. At compile time, this has the effect of entering a 'use
1379 * utf8' section. This means that any input was not previously checked for
1380 * UTF-8 (because it was off), but now we do need to check it, or our
1381 * assumptions about the input being sane could be wrong, and we could
1382 * segfault. This routine just sets a flag so that the next time we look
1383 * at the input we do the well-formed UTF-8 check. If we aren't in the
1384 * proper phase, there may not be a parser object, but if there is, setting
1385 * the flag is harmless */
1388 PL_parser->recheck_utf8_validity = TRUE;
1393 =for apidoc lex_next_chunk
1395 Reads in the next chunk of text to be lexed, appending it to
1396 L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1397 looked to the end of the current chunk and wants to know more. It is
1398 usual, but not necessary, for lexing to have consumed the entirety of
1399 the current chunk at this time.
1401 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1402 chunk (i.e., the current chunk has been entirely consumed), normally the
1403 current chunk will be discarded at the same time that the new chunk is
1404 read in. If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
1405 will not be discarded. If the current chunk has not been entirely
1406 consumed, then it will not be discarded regardless of the flag.
1408 Returns true if some new text was added to the buffer, or false if the
1409 buffer has reached the end of the input text.
1411 =for apidoc Amnh||LEX_KEEP_PREVIOUS
1416 #define LEX_FAKE_EOF 0x80000000
1417 #define LEX_NO_TERM 0x40000000 /* here-doc */
1420 Perl_lex_next_chunk(pTHX_ U32 flags)
1424 STRLEN old_bufend_pos, new_bufend_pos;
1425 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1426 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1427 bool got_some_for_debugger = 0;
1430 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1431 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1432 if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1434 linestr = PL_parser->linestr;
1435 buf = SvPVX(linestr);
1436 if (!(flags & LEX_KEEP_PREVIOUS)
1437 && PL_parser->bufptr == PL_parser->bufend)
1439 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1441 if (PL_parser->last_uni != PL_parser->bufend)
1442 PL_parser->last_uni = NULL;
1443 if (PL_parser->last_lop != PL_parser->bufend)
1444 PL_parser->last_lop = NULL;
1445 last_uni_pos = last_lop_pos = 0;
1447 SvCUR_set(linestr, 0);
1449 old_bufend_pos = PL_parser->bufend - buf;
1450 bufptr_pos = PL_parser->bufptr - buf;
1451 oldbufptr_pos = PL_parser->oldbufptr - buf;
1452 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1453 linestart_pos = PL_parser->linestart - buf;
1454 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1455 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1457 if (flags & LEX_FAKE_EOF) {
1459 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1461 } else if (filter_gets(linestr, old_bufend_pos)) {
1463 got_some_for_debugger = 1;
1464 } else if (flags & LEX_NO_TERM) {
1467 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1470 /* End of real input. Close filehandle (unless it was STDIN),
1471 * then add implicit termination.
1473 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1474 PerlIO_clearerr(PL_parser->rsfp);
1475 else if (PL_parser->rsfp)
1476 (void)PerlIO_close(PL_parser->rsfp);
1477 PL_parser->rsfp = NULL;
1478 PL_parser->in_pod = PL_parser->filtered = 0;
1479 if (!PL_in_eval && PL_minus_p) {
1481 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1482 PL_minus_n = PL_minus_p = 0;
1483 } else if (!PL_in_eval && PL_minus_n) {
1484 sv_catpvs(linestr, /*{*/";}");
1487 sv_catpvs(linestr, ";");
1490 buf = SvPVX(linestr);
1491 new_bufend_pos = SvCUR(linestr);
1492 PL_parser->bufend = buf + new_bufend_pos;
1493 PL_parser->bufptr = buf + bufptr_pos;
1496 const U8* first_bad_char_loc;
1497 if (UNLIKELY(! is_utf8_string_loc(
1498 (U8 *) PL_parser->bufptr,
1499 PL_parser->bufend - PL_parser->bufptr,
1500 &first_bad_char_loc)))
1502 _force_out_malformed_utf8_message(first_bad_char_loc,
1503 (U8 *) PL_parser->bufend,
1505 1 /* 1 means die */ );
1506 NOT_REACHED; /* NOTREACHED */
1510 PL_parser->oldbufptr = buf + oldbufptr_pos;
1511 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1512 PL_parser->linestart = buf + linestart_pos;
1513 if (PL_parser->last_uni)
1514 PL_parser->last_uni = buf + last_uni_pos;
1515 if (PL_parser->last_lop)
1516 PL_parser->last_lop = buf + last_lop_pos;
1517 if (PL_parser->preambling != NOLINE) {
1518 CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1519 PL_parser->preambling = NOLINE;
1521 if ( got_some_for_debugger
1522 && PERLDB_LINE_OR_SAVESRC
1523 && PL_curstash != PL_debstash)
1525 /* debugger active and we're not compiling the debugger code,
1526 * so store the line into the debugger's array of lines
1528 update_debugger_info(NULL, buf+old_bufend_pos,
1529 new_bufend_pos-old_bufend_pos);
1535 =for apidoc lex_peek_unichar
1537 Looks ahead one (Unicode) character in the text currently being lexed.
1538 Returns the codepoint (unsigned integer value) of the next character,
1539 or -1 if lexing has reached the end of the input text. To consume the
1540 peeked character, use L</lex_read_unichar>.
1542 If the next character is in (or extends into) the next chunk of input
1543 text, the next chunk will be read in. Normally the current chunk will be
1544 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1545 bit set, then the current chunk will not be discarded.
1547 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1548 is encountered, an exception is generated.
1554 Perl_lex_peek_unichar(pTHX_ U32 flags)
1557 if (flags & ~(LEX_KEEP_PREVIOUS))
1558 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1559 s = PL_parser->bufptr;
1560 bufend = PL_parser->bufend;
1566 if (!lex_next_chunk(flags))
1568 s = PL_parser->bufptr;
1569 bufend = PL_parser->bufend;
1572 if (UTF8_IS_INVARIANT(head))
1574 if (UTF8_IS_START(head)) {
1575 len = UTF8SKIP(&head);
1576 while ((STRLEN)(bufend-s) < len) {
1577 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1579 s = PL_parser->bufptr;
1580 bufend = PL_parser->bufend;
1583 unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1584 if (retlen == (STRLEN)-1) {
1585 _force_out_malformed_utf8_message((U8 *) s,
1588 1 /* 1 means die */ );
1589 NOT_REACHED; /* NOTREACHED */
1594 if (!lex_next_chunk(flags))
1596 s = PL_parser->bufptr;
1603 =for apidoc lex_read_unichar
1605 Reads the next (Unicode) character in the text currently being lexed.
1606 Returns the codepoint (unsigned integer value) of the character read,
1607 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1608 if lexing has reached the end of the input text. To non-destructively
1609 examine the next character, use L</lex_peek_unichar> instead.
1611 If the next character is in (or extends into) the next chunk of input
1612 text, the next chunk will be read in. Normally the current chunk will be
1613 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1614 bit set, then the current chunk will not be discarded.
1616 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1617 is encountered, an exception is generated.
1623 Perl_lex_read_unichar(pTHX_ U32 flags)
1626 if (flags & ~(LEX_KEEP_PREVIOUS))
1627 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1628 c = lex_peek_unichar(flags);
1631 COPLINE_INC_WITH_HERELINES;
1633 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1635 ++(PL_parser->bufptr);
1641 =for apidoc lex_read_space
1643 Reads optional spaces, in Perl style, in the text currently being
1644 lexed. The spaces may include ordinary whitespace characters and
1645 Perl-style comments. C<#line> directives are processed if encountered.
1646 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1647 at a non-space character (or the end of the input text).
1649 If spaces extend into the next chunk of input text, the next chunk will
1650 be read in. Normally the current chunk will be discarded at the same
1651 time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
1652 chunk will not be discarded.
1657 #define LEX_NO_INCLINE 0x40000000
1658 #define LEX_NO_NEXT_CHUNK 0x80000000
1661 Perl_lex_read_space(pTHX_ U32 flags)
1664 const bool can_incline = !(flags & LEX_NO_INCLINE);
1665 bool need_incline = 0;
1666 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1667 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1668 s = PL_parser->bufptr;
1669 bufend = PL_parser->bufend;
1675 } while (!(c == '\n' || (c == 0 && s == bufend)));
1676 } else if (c == '\n') {
1679 PL_parser->linestart = s;
1685 } else if (isSPACE(c)) {
1687 } else if (c == 0 && s == bufend) {
1690 if (flags & LEX_NO_NEXT_CHUNK)
1692 PL_parser->bufptr = s;
1693 l = CopLINE(PL_curcop);
1694 CopLINE(PL_curcop) += PL_parser->herelines + 1;
1695 got_more = lex_next_chunk(flags);
1696 CopLINE_set(PL_curcop, l);
1697 s = PL_parser->bufptr;
1698 bufend = PL_parser->bufend;
1701 if (can_incline && need_incline && PL_parser->rsfp) {
1711 PL_parser->bufptr = s;
1716 =for apidoc validate_proto
1718 This function performs syntax checking on a prototype, C<proto>.
1719 If C<warn> is true, any illegal characters or mismatched brackets
1720 will trigger illegalproto warnings, declaring that they were
1721 detected in the prototype for C<name>.
1723 The return value is C<true> if this is a valid prototype, and
1724 C<false> if it is not, regardless of whether C<warn> was C<true> or
1727 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1734 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash)
1736 STRLEN len, origlen;
1738 bool bad_proto = FALSE;
1739 bool in_brackets = FALSE;
1740 bool after_slash = FALSE;
1741 char greedy_proto = ' ';
1742 bool proto_after_greedy_proto = FALSE;
1743 bool must_be_last = FALSE;
1744 bool underscore = FALSE;
1745 bool bad_proto_after_underscore = FALSE;
1747 PERL_ARGS_ASSERT_VALIDATE_PROTO;
1752 p = SvPV(proto, len);
1754 for (; len--; p++) {
1757 proto_after_greedy_proto = TRUE;
1759 if (!memCHRs(";@%", *p))
1760 bad_proto_after_underscore = TRUE;
1763 if (!memCHRs("$@%*;[]&\\_+", *p) || *p == '\0') {
1770 in_brackets = FALSE;
1771 else if ((*p == '@' || *p == '%')
1775 must_be_last = TRUE;
1784 after_slash = FALSE;
1789 SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1792 ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1793 origlen, UNI_DISPLAY_ISPRINT)
1794 : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1796 if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) {
1797 SV *name2 = sv_2mortal(newSVsv(PL_curstname));
1798 sv_catpvs(name2, "::");
1799 sv_catsv(name2, (SV *)name);
1803 if (proto_after_greedy_proto)
1804 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1805 "Prototype after '%c' for %" SVf " : %s",
1806 greedy_proto, SVfARG(name), p);
1808 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1809 "Missing ']' in prototype for %" SVf " : %s",
1812 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1813 "Illegal character in prototype for %" SVf " : %s",
1815 if (bad_proto_after_underscore)
1816 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1817 "Illegal character after '_' in prototype for %" SVf " : %s",
1821 return (! (proto_after_greedy_proto || bad_proto) );
1826 * This subroutine has nothing to do with tilting, whether at windmills
1827 * or pinball tables. Its name is short for "increment line". It
1828 * increments the current line number in CopLINE(PL_curcop) and checks
1829 * to see whether the line starts with a comment of the form
1830 * # line 500 "foo.pm"
1831 * If so, it sets the current line number and file to the values in the comment.
1835 S_incline(pTHX_ const char *s, const char *end)
1843 PERL_ARGS_ASSERT_INCLINE;
1847 COPLINE_INC_WITH_HERELINES;
1848 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1849 && s+1 == PL_bufend && *s == ';') {
1850 /* fake newline in string eval */
1851 CopLINE_dec(PL_curcop);
1856 while (SPACE_OR_TAB(*s))
1858 if (memBEGINs(s, (STRLEN) (end - s), "line"))
1859 s += sizeof("line") - 1;
1862 if (SPACE_OR_TAB(*s))
1866 while (SPACE_OR_TAB(*s))
1874 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1876 while (SPACE_OR_TAB(*s))
1878 if (*s == '"' && (t = (char *) memchr(s+1, '"', end - s))) {
1884 while (*t && !isSPACE(*t))
1888 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1890 if (*e != '\n' && *e != '\0')
1891 return; /* false alarm */
1893 if (!grok_atoUV(n, &uv, &e))
1895 line_num = ((line_t)uv) - 1;
1898 const STRLEN len = t - s;
1900 if (!PL_rsfp && !PL_parser->filtered) {
1901 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1902 * to *{"::_<newfilename"} */
1903 /* However, the long form of evals is only turned on by the
1904 debugger - usually they're "(eval %lu)" */
1905 GV * const cfgv = CopFILEGV(PL_curcop);
1908 STRLEN tmplen2 = len;
1912 if (tmplen2 + 2 <= sizeof smallbuf)
1915 Newx(tmpbuf2, tmplen2 + 2, char);
1920 memcpy(tmpbuf2 + 2, s, tmplen2);
1923 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1925 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1926 /* adjust ${"::_<newfilename"} to store the new file name */
1927 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1928 /* The line number may differ. If that is the case,
1929 alias the saved lines that are in the array.
1930 Otherwise alias the whole array. */
1931 if (CopLINE(PL_curcop) == line_num) {
1932 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1933 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1935 else if (GvAV(cfgv)) {
1936 AV * const av = GvAV(cfgv);
1937 const line_t start = CopLINE(PL_curcop)+1;
1938 SSize_t items = AvFILLp(av) - start;
1940 AV * const av2 = GvAVn(gv2);
1941 SV **svp = AvARRAY(av) + start;
1942 Size_t l = line_num+1;
1943 while (items-- && l < SSize_t_MAX && l == (line_t)l)
1944 av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++));
1949 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1952 CopFILE_free(PL_curcop);
1953 CopFILE_setn(PL_curcop, s, len);
1955 CopLINE_set(PL_curcop, line_num);
1959 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1961 AV *av = CopFILEAVx(PL_curcop);
1964 if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1966 sv = *av_fetch(av, 0, 1);
1967 SvUPGRADE(sv, SVt_PVMG);
1969 if (!SvPOK(sv)) SvPVCLEAR(sv);
1971 sv_catsv(sv, orig_sv);
1973 sv_catpvn(sv, buf, len);
1978 if (PL_parser->preambling == NOLINE)
1979 av_store(av, CopLINE(PL_curcop), sv);
1985 * Called to gobble the appropriate amount and type of whitespace.
1986 * Skips comments as well.
1987 * Returns the next character after the whitespace that is skipped.
1990 * Same thing, but look ahead without incrementing line numbers or
1991 * adjusting PL_linestart.
1994 #define skipspace(s) skipspace_flags(s, 0)
1995 #define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
1998 Perl_skipspace_flags(pTHX_ char *s, U32 flags)
2000 PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
2001 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
2002 while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
2005 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
2007 lex_read_space(flags | LEX_KEEP_PREVIOUS |
2008 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
2009 LEX_NO_NEXT_CHUNK : 0));
2011 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
2012 if (PL_linestart > PL_bufptr)
2013 PL_bufptr = PL_linestart;
2021 * Check the unary operators to ensure there's no ambiguity in how they're
2022 * used. An ambiguous piece of code would be:
2024 * This doesn't mean rand() + 5. Because rand() is a unary operator,
2025 * the +5 is its argument.
2033 if (PL_oldoldbufptr != PL_last_uni)
2035 while (isSPACE(*PL_last_uni))
2038 while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
2039 s += UTF ? UTF8SKIP(s) : 1;
2040 if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s))
2043 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
2044 "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
2045 UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
2049 * LOP : macro to build a list operator. Its behaviour has been replaced
2050 * with a subroutine, S_lop() for which LOP is just another name.
2053 #define LOP(f,x) return lop(f,x,s)
2057 * Build a list operator (or something that might be one). The rules:
2058 * - if we have a next token, then it's a list operator (no parens) for
2059 * which the next token has already been parsed; e.g.,
2062 * - if the next thing is an opening paren, then it's a function
2063 * - else it's a list operator
2067 S_lop(pTHX_ I32 f, U8 x, char *s)
2069 PERL_ARGS_ASSERT_LOP;
2074 PL_last_lop = PL_oldbufptr;
2075 PL_last_lop_op = (OPCODE)f;
2080 return REPORT(FUNC);
2083 return REPORT(FUNC);
2086 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
2087 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2088 return REPORT(LSTOP);
2094 * When the lexer realizes it knows the next token (for instance,
2095 * it is reordering tokens for the parser) then it can call S_force_next
2096 * to know what token to return the next time the lexer is called. Caller
2097 * will need to set PL_nextval[] and possibly PL_expect to ensure
2098 * the lexer handles the token correctly.
2102 S_force_next(pTHX_ I32 type)
2106 PerlIO_printf(Perl_debug_log, "### forced token:\n");
2107 tokereport(type, &NEXTVAL_NEXTTOKE);
2110 assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
2111 PL_nexttype[PL_nexttoke] = type;
2118 * This subroutine handles postfix deref syntax after the arrow has already
2119 * been emitted. @* $* etc. are emitted as two separate tokens right here.
2120 * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2121 * only the first, leaving yylex to find the next.
2125 S_postderef(pTHX_ int const funny, char const next)
2127 assert(funny == DOLSHARP
2128 || funny == PERLY_DOLLAR
2129 || funny == PERLY_SNAIL
2130 || funny == PERLY_PERCENT_SIGN
2131 || funny == PERLY_AMPERSAND
2132 || funny == PERLY_STAR
2135 PL_expect = XOPERATOR;
2136 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2137 assert(PERLY_SNAIL == funny || PERLY_DOLLAR == funny || DOLSHARP == funny);
2138 PL_lex_state = LEX_INTERPEND;
2139 if (PERLY_SNAIL == funny)
2140 force_next(POSTJOIN);
2142 force_next(PERLY_STAR);
2146 if (PERLY_SNAIL == funny && PL_lex_state == LEX_INTERPNORMAL
2147 && !PL_lex_brackets)
2149 PL_expect = XOPERATOR;
2158 int yyc = PL_parser->yychar;
2159 if (yyc != YYEMPTY) {
2161 NEXTVAL_NEXTTOKE = PL_parser->yylval;
2162 if (yyc == PERLY_BRACE_OPEN || yyc == HASHBRACK || yyc == PERLY_BRACKET_OPEN) {
2163 PL_lex_allbrackets--;
2165 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2166 } else if (yyc == PERLY_PAREN_OPEN) {
2167 PL_lex_allbrackets--;
2172 PL_parser->yychar = YYEMPTY;
2177 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2179 SV * const sv = newSVpvn_utf8(start, len,
2183 && is_utf8_non_invariant_string((const U8*)start, len));
2189 * When the lexer knows the next thing is a word (for instance, it has
2190 * just seen -> and it knows that the next char is a word char, then
2191 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2195 * char *start : buffer position (must be within PL_linestr)
2196 * int token : PL_next* will be this type of bare word
2197 * (e.g., METHCALL0,BAREWORD)
2198 * int check_keyword : if true, Perl checks to make sure the word isn't
2199 * a keyword (do this if the word is a label, e.g. goto FOO)
2200 * int allow_pack : if true, : characters will also be allowed (require,
2201 * use, etc. do this)
2205 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2210 PERL_ARGS_ASSERT_FORCE_WORD;
2212 start = skipspace(start);
2214 if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
2215 || (allow_pack && *s == ':' && s[1] == ':') )
2217 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2218 if (check_keyword) {
2219 char *s2 = PL_tokenbuf;
2221 if (allow_pack && memBEGINPs(s2, len, "CORE::")) {
2222 s2 += sizeof("CORE::") - 1;
2223 len2 -= sizeof("CORE::") - 1;
2225 if (keyword(s2, len2, 0))
2228 if (token == METHCALL0) {
2233 PL_expect = XOPERATOR;
2236 NEXTVAL_NEXTTOKE.opval
2237 = newSVOP(OP_CONST,0,
2238 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2239 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2247 * Called when the lexer wants $foo *foo &foo etc, but the program
2248 * text only contains the "foo" portion. The first argument is a pointer
2249 * to the "foo", and the second argument is the type symbol to prefix.
2250 * Forces the next token to be a "BAREWORD".
2251 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2255 S_force_ident(pTHX_ const char *s, int kind)
2257 PERL_ARGS_ASSERT_FORCE_IDENT;
2260 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2261 OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2262 UTF ? SVf_UTF8 : 0));
2263 NEXTVAL_NEXTTOKE.opval = o;
2264 force_next(BAREWORD);
2266 o->op_private = OPpCONST_ENTERED;
2267 /* XXX see note in pp_entereval() for why we forgo typo
2268 warnings if the symbol must be introduced in an eval.
2270 gv_fetchpvn_flags(s, len,
2271 (PL_in_eval ? GV_ADDMULTI
2272 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2273 kind == PERLY_DOLLAR ? SVt_PV :
2274 kind == PERLY_SNAIL ? SVt_PVAV :
2275 kind == PERLY_PERCENT_SIGN ? SVt_PVHV :
2283 S_force_ident_maybe_lex(pTHX_ char pit)
2285 NEXTVAL_NEXTTOKE.ival = pit;
2290 Perl_str_to_version(pTHX_ SV *sv)
2295 const char *start = SvPV_const(sv,len);
2296 const char * const end = start + len;
2297 const bool utf = cBOOL(SvUTF8(sv));
2299 PERL_ARGS_ASSERT_STR_TO_VERSION;
2301 while (start < end) {
2305 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2310 retval += ((NV)n)/nshift;
2319 * Forces the next token to be a version number.
2320 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2321 * and if "guessing" is TRUE, then no new token is created (and the caller
2322 * must use an alternative parsing method).
2326 S_force_version(pTHX_ char *s, int guessing)
2331 PERL_ARGS_ASSERT_FORCE_VERSION;
2339 while (isDIGIT(*d) || *d == '_' || *d == '.')
2341 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2343 s = scan_num(s, &pl_yylval);
2344 version = pl_yylval.opval;
2345 ver = cSVOPx(version)->op_sv;
2346 if (SvPOK(ver) && !SvNIOK(ver)) {
2347 SvUPGRADE(ver, SVt_PVNV);
2348 SvNV_set(ver, str_to_version(ver));
2349 SvNOK_on(ver); /* hint that it is a version */
2352 else if (guessing) {
2357 /* NOTE: The parser sees the package name and the VERSION swapped */
2358 NEXTVAL_NEXTTOKE.opval = version;
2359 force_next(BAREWORD);
2365 * S_force_strict_version
2366 * Forces the next token to be a version number using strict syntax rules.
2370 S_force_strict_version(pTHX_ char *s)
2373 const char *errstr = NULL;
2375 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2377 while (isSPACE(*s)) /* leading whitespace */
2380 if (is_STRICT_VERSION(s,&errstr)) {
2381 SV *ver = newSV_type(SVt_NULL);
2382 s = (char *)scan_version(s, ver, 0);
2383 version = newSVOP(OP_CONST, 0, ver);
2385 else if ((*s != ';' && *s != '{' && *s != '}' )
2386 && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
2390 yyerror(errstr); /* version required */
2394 /* NOTE: The parser sees the package name and the VERSION swapped */
2395 NEXTVAL_NEXTTOKE.opval = version;
2396 force_next(BAREWORD);
2403 * Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv',
2404 * modified as necessary. However, if HINT_NEW_STRING is on, 'sv' is
2405 * unchanged, and a new SV containing the modified input is returned.
2409 S_tokeq(pTHX_ SV *sv)
2416 PERL_ARGS_ASSERT_TOKEQ;
2420 assert (!SvIsCOW(sv));
2421 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2425 /* This is relying on the SV being "well formed" with a trailing '\0' */
2426 while (s < send && !(*s == '\\' && s[1] == '\\'))
2431 if ( PL_hints & HINT_NEW_STRING ) {
2432 pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2433 SVs_TEMP | SvUTF8(sv));
2437 if (s + 1 < send && (s[1] == '\\'))
2438 s++; /* all that, just for this */
2443 SvCUR_set(sv, d - SvPVX_const(sv));
2445 if ( PL_hints & HINT_NEW_STRING )
2446 return new_constant(NULL, 0, "q", sv, pv, "q", 1, NULL);
2451 * Now come three functions related to double-quote context,
2452 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2453 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2454 * interact with PL_lex_state, and create fake ( ... ) argument lists
2455 * to handle functions and concatenation.
2459 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
2464 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2466 * Pattern matching will set PL_lex_op to the pattern-matching op to
2467 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2469 * OP_CONST is easy--just make the new op and return.
2471 * Everything else becomes a FUNC.
2473 * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we
2474 * had an OP_CONST. This just sets us up for a
2475 * call to S_sublex_push().
2479 S_sublex_start(pTHX)
2481 const I32 op_type = pl_yylval.ival;
2483 if (op_type == OP_NULL) {
2484 pl_yylval.opval = PL_lex_op;
2488 if (op_type == OP_CONST) {
2489 SV *sv = PL_lex_stuff;
2490 PL_lex_stuff = NULL;
2493 if (SvTYPE(sv) == SVt_PVIV) {
2494 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2496 const char * const p = SvPV_const(sv, len);
2497 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2501 pl_yylval.opval = newSVOP(op_type, 0, sv);
2505 PL_parser->lex_super_state = PL_lex_state;
2506 PL_parser->lex_sub_inwhat = (U16)op_type;
2507 PL_parser->lex_sub_op = PL_lex_op;
2508 PL_parser->sub_no_recover = FALSE;
2509 PL_parser->sub_error_count = PL_error_count;
2510 PL_lex_state = LEX_INTERPPUSH;
2514 pl_yylval.opval = PL_lex_op;
2524 * Create a new scope to save the lexing state. The scope will be
2525 * ended in S_sublex_done. Returns a '(', starting the function arguments
2526 * to the uc, lc, etc. found before.
2527 * Sets PL_lex_state to LEX_INTERPCONCAT.
2534 const bool is_heredoc = PL_multi_close == '<';
2537 PL_lex_state = PL_parser->lex_super_state;
2538 SAVEI8(PL_lex_dojoin);
2539 SAVEI32(PL_lex_brackets);
2540 SAVEI32(PL_lex_allbrackets);
2541 SAVEI32(PL_lex_formbrack);
2542 SAVEI8(PL_lex_fakeeof);
2543 SAVEI32(PL_lex_casemods);
2544 SAVEI32(PL_lex_starts);
2545 SAVEI8(PL_lex_state);
2546 SAVESPTR(PL_lex_repl);
2547 SAVEVPTR(PL_lex_inpat);
2548 SAVEI16(PL_lex_inwhat);
2551 SAVECOPLINE(PL_curcop);
2552 SAVEI32(PL_multi_end);
2553 SAVEI32(PL_parser->herelines);
2554 PL_parser->herelines = 0;
2556 SAVEIV(PL_multi_close);
2557 SAVEPPTR(PL_bufptr);
2558 SAVEPPTR(PL_bufend);
2559 SAVEPPTR(PL_oldbufptr);
2560 SAVEPPTR(PL_oldoldbufptr);
2561 SAVEPPTR(PL_last_lop);
2562 SAVEPPTR(PL_last_uni);
2563 SAVEPPTR(PL_linestart);
2564 SAVESPTR(PL_linestr);
2565 SAVEGENERICPV(PL_lex_brackstack);
2566 SAVEGENERICPV(PL_lex_casestack);
2567 SAVEGENERICPV(PL_parser->lex_shared);
2568 SAVEBOOL(PL_parser->lex_re_reparsing);
2569 SAVEI32(PL_copline);
2571 /* The here-doc parser needs to be able to peek into outer lexing
2572 scopes to find the body of the here-doc. So we put PL_linestr and
2573 PL_bufptr into lex_shared, to 'share' those values.
2575 PL_parser->lex_shared->ls_linestr = PL_linestr;
2576 PL_parser->lex_shared->ls_bufptr = PL_bufptr;
2578 PL_linestr = PL_lex_stuff;
2579 PL_lex_repl = PL_parser->lex_sub_repl;
2580 PL_lex_stuff = NULL;
2581 PL_parser->lex_sub_repl = NULL;
2583 /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2584 set for an inner quote-like operator and then an error causes scope-
2585 popping. We must not have a PL_lex_stuff value left dangling, as
2586 that breaks assumptions elsewhere. See bug #123617. */
2587 SAVEGENERICSV(PL_lex_stuff);
2588 SAVEGENERICSV(PL_parser->lex_sub_repl);
2590 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2591 = SvPVX(PL_linestr);
2592 PL_bufend += SvCUR(PL_linestr);
2593 PL_last_lop = PL_last_uni = NULL;
2594 SAVEFREESV(PL_linestr);
2595 if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2597 PL_lex_dojoin = FALSE;
2598 PL_lex_brackets = PL_lex_formbrack = 0;
2599 PL_lex_allbrackets = 0;
2600 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2601 Newx(PL_lex_brackstack, 120, char);
2602 Newx(PL_lex_casestack, 12, char);
2603 PL_lex_casemods = 0;
2604 *PL_lex_casestack = '\0';
2606 PL_lex_state = LEX_INTERPCONCAT;
2608 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2609 PL_copline = NOLINE;
2611 Newxz(shared, 1, LEXSHARED);
2612 shared->ls_prev = PL_parser->lex_shared;
2613 PL_parser->lex_shared = shared;
2615 PL_lex_inwhat = PL_parser->lex_sub_inwhat;
2616 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2617 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2618 PL_lex_inpat = PL_parser->lex_sub_op;
2620 PL_lex_inpat = NULL;
2622 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2623 PL_in_eval &= ~EVAL_RE_REPARSING;
2630 * Restores lexer state after a S_sublex_push.
2636 if (!PL_lex_starts++) {
2637 SV * const sv = newSVpvs("");
2638 if (SvUTF8(PL_linestr))
2640 PL_expect = XOPERATOR;
2641 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
2645 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2646 PL_lex_state = LEX_INTERPCASEMOD;
2650 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2651 assert(PL_lex_inwhat != OP_TRANSR);
2653 assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2654 PL_linestr = PL_lex_repl;
2656 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2657 PL_bufend += SvCUR(PL_linestr);
2658 PL_last_lop = PL_last_uni = NULL;
2659 PL_lex_dojoin = FALSE;
2660 PL_lex_brackets = 0;
2661 PL_lex_allbrackets = 0;
2662 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2663 PL_lex_casemods = 0;
2664 *PL_lex_casestack = '\0';
2666 if (SvEVALED(PL_lex_repl)) {
2667 PL_lex_state = LEX_INTERPNORMAL;
2669 /* we don't clear PL_lex_repl here, so that we can check later
2670 whether this is an evalled subst; that means we rely on the
2671 logic to ensure sublex_done() is called again only via the
2672 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2675 PL_lex_state = LEX_INTERPCONCAT;
2678 if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2679 CopLINE(PL_curcop) +=
2680 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines
2681 + PL_parser->herelines;
2682 PL_parser->herelines = 0;
2687 const line_t l = CopLINE(PL_curcop);
2689 if (PL_parser->sub_error_count != PL_error_count) {
2690 if (PL_parser->sub_no_recover) {
2695 if (PL_multi_close == '<')
2696 PL_parser->herelines += l - PL_multi_end;
2697 PL_bufend = SvPVX(PL_linestr);
2698 PL_bufend += SvCUR(PL_linestr);
2699 PL_expect = XOPERATOR;
2705 Perl_load_charnames(pTHX_ SV * char_name, const char * context,
2706 const STRLEN context_len, const char ** error_msg)
2708 /* Load the official _charnames module if not already there. The
2709 * parameters are just to give info for any error messages generated:
2710 * char_name a name to look up which is the reason for loading this
2711 * context 'char_name' in the context in the input in which it appears
2712 * context_len how many bytes 'context' occupies
2713 * error_msg *error_msg will be set to any error
2715 * Returns the ^H table if success; otherwise NULL */
2722 PERL_ARGS_ASSERT_LOAD_CHARNAMES;
2724 /* This loop is executed 1 1/2 times. On the first time through, if it
2725 * isn't already loaded, try loading it, and iterate just once to see if it
2727 for (i = 0; i < 2; i++) {
2728 table = GvHV(PL_hintgv); /* ^H */
2731 && (PL_hints & HINT_LOCALIZE_HH)
2732 && (cvp = hv_fetchs(table, "charnames", FALSE))
2735 return table; /* Quit if already loaded */
2739 Perl_load_module(aTHX_
2741 newSVpvs("_charnames"),
2743 /* version parameter; no need to specify it, as if we get too early
2744 * a version, will fail anyway, not being able to find 'charnames'
2753 /* Here, it failed; new_constant will give appropriate error messages */
2755 res = new_constant( NULL, 0, "charnames", char_name, NULL,
2756 context, context_len, error_msg);
2763 S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e)
2765 /* This justs wraps get_and_check_backslash_N_name() to output any error
2766 * message it returns. */
2768 const char * error_msg = NULL;
2771 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER;
2773 /* charnames doesn't work well if there have been errors found */
2774 if (PL_error_count > 0) {
2778 result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg);
2781 yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0);
2788 Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
2791 const char ** error_msg)
2793 /* <s> points to first character of interior of \N{}, <e> to one beyond the
2794 * interior, hence to the "}". Finds what the name resolves to, returning
2795 * an SV* containing it; NULL if no valid one found.
2797 * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it
2798 * doesn't have to be. */
2808 /* Points to the beginning of the \N{... so that any messages include the
2809 * context of what's failing*/
2810 const char* context = s - 3;
2811 STRLEN context_len = e - context + 1; /* include all of \N{...} */
2814 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2817 assert(s > (char *) 3);
2819 while (s < e && isBLANK(*s)) {
2823 while (s < e && isBLANK(*(e - 1))) {
2827 char_name = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0);
2829 if (!SvCUR(char_name)) {
2830 SvREFCNT_dec_NN(char_name);
2831 /* diag_listed_as: Unknown charname '%s' */
2832 *error_msg = Perl_form(aTHX_ "Unknown charname ''");
2836 /* Autoload the charnames module */
2838 table = load_charnames(char_name, context, context_len, error_msg);
2839 if (table == NULL) {
2844 res = new_constant( NULL, 0, "charnames", char_name, NULL,
2845 context, context_len, error_msg);
2847 *error_msg = Perl_form(aTHX_ "Unknown charname '%s'", SvPVX(char_name));
2853 /* See if the charnames handler is the Perl core's, and if so, we can skip
2854 * the validation needed for a user-supplied one, as Perl's does its own
2856 cvp = hv_fetchs(table, "charnames", FALSE);
2857 if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2858 SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2860 const char * const name = HvNAME(stash);
2861 if (memEQs(name, HvNAMELEN(stash), "_charnames")) {
2866 /* Here, it isn't Perl's charname handler. We can't rely on a
2867 * user-supplied handler to validate the input name. For non-ut8 input,
2868 * look to see that the first character is legal. Then loop through the
2869 * rest checking that each is a continuation */
2871 /* This code makes the reasonable assumption that the only Latin1-range
2872 * characters that begin a character name alias are alphabetic, otherwise
2873 * would have to create a isCHARNAME_BEGIN macro */
2876 if (! isALPHAU(*s)) {
2881 if (! isCHARNAME_CONT(*s)) {
2884 if (*s == ' ' && *(s-1) == ' ') {
2891 /* Similarly for utf8. For invariants can check directly; for other
2892 * Latin1, can calculate their code point and check; otherwise use an
2894 if (UTF8_IS_INVARIANT(*s)) {
2895 if (! isALPHAU(*s)) {
2899 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2900 if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
2906 if (! _invlist_contains_cp(PL_utf8_charname_begin,
2907 utf8_to_uvchr_buf((U8 *) s,
2917 if (UTF8_IS_INVARIANT(*s)) {
2918 if (! isCHARNAME_CONT(*s)) {
2921 if (*s == ' ' && *(s-1) == ' ') {
2926 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2927 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
2934 if (! _invlist_contains_cp(PL_utf8_charname_continue,
2935 utf8_to_uvchr_buf((U8 *) s,
2945 if (*(s-1) == ' ') {
2946 /* diag_listed_as: charnames alias definitions may not contain
2947 trailing white-space; marked by <-- HERE in %s
2949 *error_msg = Perl_form(aTHX_
2950 "charnames alias definitions may not contain trailing "
2951 "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2952 (int)(s - context + 1), context,
2953 (int)(e - s + 1), s + 1);
2957 if (SvUTF8(res)) { /* Don't accept malformed charname value */
2958 const U8* first_bad_char_loc;
2960 const char* const str = SvPV_const(res, len);
2961 if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len,
2962 &first_bad_char_loc)))
2964 _force_out_malformed_utf8_message(first_bad_char_loc,
2965 (U8 *) PL_parser->bufend,
2967 0 /* 0 means don't die */ );
2968 /* diag_listed_as: Malformed UTF-8 returned by \N{%s}
2969 immediately after '%s' */
2970 *error_msg = Perl_form(aTHX_
2971 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2972 (int) context_len, context,
2973 (int) ((char *) first_bad_char_loc - str), str);
2982 /* The final %.*s makes sure that should the trailing NUL be missing
2983 * that this print won't run off the end of the string */
2984 /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
2986 *error_msg = Perl_form(aTHX_
2987 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2988 (int)(s - context + 1), context,
2989 (int)(e - s + 1), s + 1);
2994 /* diag_listed_as: charnames alias definitions may not contain a
2995 sequence of multiple spaces; marked by <-- HERE
2997 *error_msg = Perl_form(aTHX_
2998 "charnames alias definitions may not contain a sequence of "
2999 "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
3000 (int)(s - context + 1), context,
3001 (int)(e - s + 1), s + 1);
3008 Extracts the next constant part of a pattern, double-quoted string,
3009 or transliteration. This is terrifying code.
3011 For example, in parsing the double-quoted string "ab\x63$d", it would
3012 stop at the '$' and return an OP_CONST containing 'abc'.
3014 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3015 processing a pattern (PL_lex_inpat is true), a transliteration
3016 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
3018 Returns a pointer to the character scanned up to. If this is
3019 advanced from the start pointer supplied (i.e. if anything was
3020 successfully parsed), will leave an OP_CONST for the substring scanned
3021 in pl_yylval. Caller must intuit reason for not parsing further
3022 by looking at the next characters herself.
3026 \N{FOO} => \N{U+hex_for_character_FOO}
3027 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
3030 all other \-char, including \N and \N{ apart from \N{ABC}
3033 @ and $ where it appears to be a var, but not for $ as tail anchor
3037 In transliterations:
3038 characters are VERY literal, except for - not at the start or end
3039 of the string, which indicates a range. However some backslash sequences
3040 are recognized: \r, \n, and the like
3041 \007 \o{}, \x{}, \N{}
3042 If all elements in the transliteration are below 256,
3043 scan_const expands the range to the full set of intermediate
3044 characters. If the range is in utf8, the hyphen is replaced with
3045 a certain range mark which will be handled by pmtrans() in op.c.
3047 In double-quoted strings:
3049 all those recognized in transliterations
3050 deprecated backrefs: \1 (in substitution replacements)
3051 case and quoting: \U \Q \E
3054 scan_const does *not* construct ops to handle interpolated strings.
3055 It stops processing as soon as it finds an embedded $ or @ variable
3056 and leaves it to the caller to work out what's going on.
3058 embedded arrays (whether in pattern or not) could be:
3059 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
3061 $ in double-quoted strings must be the symbol of an embedded scalar.
3063 $ in pattern could be $foo or could be tail anchor. Assumption:
3064 it's a tail anchor if $ is the last thing in the string, or if it's
3065 followed by one of "()| \r\n\t"
3067 \1 (backreferences) are turned into $1 in substitutions
3069 The structure of the code is
3070 while (there's a character to process) {
3071 handle transliteration ranges
3072 skip regexp comments /(?#comment)/ and codes /(?{code})/
3073 skip #-initiated comments in //x patterns
3074 check for embedded arrays
3075 check for embedded scalars
3077 deprecate \1 in substitution replacements
3078 handle string-changing backslashes \l \U \Q \E, etc.
3079 switch (what was escaped) {
3080 handle \- in a transliteration (becomes a literal -)
3081 if a pattern and not \N{, go treat as regular character
3082 handle \132 (octal characters)
3083 handle \x15 and \x{1234} (hex characters)
3084 handle \N{name} (named characters, also \N{3,5} in a pattern)
3085 handle \cV (control characters)
3086 handle printf-style backslashes (\f, \r, \n, etc)
3089 } (end if backslash)
3090 handle regular character
3091 } (end while character to read)
3096 S_scan_const(pTHX_ char *start)
3098 const char * const send = PL_bufend;/* end of the constant */
3099 SV *sv = newSV(send - start); /* sv for the constant. See note below
3101 char *s = start; /* start of the constant */
3102 char *d = SvPVX(sv); /* destination for copies */
3103 bool dorange = FALSE; /* are we in a translit range? */
3104 bool didrange = FALSE; /* did we just finish a range? */
3105 bool in_charclass = FALSE; /* within /[...]/ */
3106 const bool s_is_utf8 = cBOOL(UTF); /* Is the source string assumed to be
3107 UTF8? But, this can show as true
3108 when the source isn't utf8, as for
3109 example when it is entirely composed
3111 bool d_is_utf8 = FALSE; /* Output constant is UTF8 */
3112 STRLEN utf8_variant_count = 0; /* When not in UTF-8, this counts the
3113 number of characters found so far
3114 that will expand (into 2 bytes)
3115 should we have to convert to
3117 SV *res; /* result from charnames */
3118 STRLEN offset_to_max = 0; /* The offset in the output to where the range
3119 high-end character is temporarily placed */
3121 /* Does something require special handling in tr/// ? This avoids extra
3122 * work in a less likely case. As such, khw didn't feel it was worth
3123 * adding any branches to the more mainline code to handle this, which
3124 * means that this doesn't get set in some circumstances when things like
3125 * \x{100} get expanded out. As a result there needs to be extra testing
3126 * done in the tr code */
3127 bool has_above_latin1 = FALSE;
3129 /* Note on sizing: The scanned constant is placed into sv, which is
3130 * initialized by newSV() assuming one byte of output for every byte of
3131 * input. This routine expects newSV() to allocate an extra byte for a
3132 * trailing NUL, which this routine will append if it gets to the end of
3133 * the input. There may be more bytes of input than output (eg., \N{LATIN
3134 * CAPITAL LETTER A}), or more output than input if the constant ends up
3135 * recoded to utf8, but each time a construct is found that might increase
3136 * the needed size, SvGROW() is called. Its size parameter each time is
3137 * based on the best guess estimate at the time, namely the length used so
3138 * far, plus the length the current construct will occupy, plus room for
3139 * the trailing NUL, plus one byte for every input byte still unscanned */
3141 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3144 int backslash_N = 0; /* ? was the character from \N{} */
3145 int non_portable_endpoint = 0; /* ? In a range is an endpoint
3146 platform-specific like \x65 */
3149 PERL_ARGS_ASSERT_SCAN_CONST;
3151 assert(PL_lex_inwhat != OP_TRANSR);
3153 /* Protect sv from errors and fatal warnings. */
3154 ENTER_with_name("scan_const");
3157 /* A bunch of code in the loop below assumes that if s[n] exists and is not
3158 * NUL, then s[n+1] exists. This assertion makes sure that assumption is
3160 assert(*send == '\0');
3163 || dorange /* Handle tr/// range at right edge of input */
3166 /* get transliterations out of the way (they're most literal) */
3167 if (PL_lex_inwhat == OP_TRANS) {
3169 /* But there isn't any special handling necessary unless there is a
3170 * range, so for most cases we just drop down and handle the value
3171 * as any other. There are two exceptions.
3173 * 1. A hyphen indicates that we are actually going to have a
3174 * range. In this case, skip the '-', set a flag, then drop
3175 * down to handle what should be the end range value.
3176 * 2. After we've handled that value, the next time through, that
3177 * flag is set and we fix up the range.
3179 * Ranges entirely within Latin1 are expanded out entirely, in
3180 * order to make the transliteration a simple table look-up.
3181 * Ranges that extend above Latin1 have to be done differently, so
3182 * there is no advantage to expanding them here, so they are
3183 * stored here as Min, RANGE_INDICATOR, Max. 'RANGE_INDICATOR' is
3184 * a byte that can't occur in legal UTF-8, and hence can signify a
3185 * hyphen without any possible ambiguity. On EBCDIC machines, if
3186 * the range is expressed as Unicode, the Latin1 portion is
3187 * expanded out even if the range extends above Latin1. This is
3188 * because each code point in it has to be processed here
3189 * individually to get its native translation */
3193 /* Here, we don't think we're in a range. If the new character
3194 * is not a hyphen; or if it is a hyphen, but it's too close to
3195 * either edge to indicate a range, or if we haven't output any
3196 * characters yet then it's a regular character. */
3197 if (*s != '-' || s >= send - 1 || s == start || d == SvPVX(sv))
3200 /* A regular character. Process like any other, but first
3201 * clear any flags */
3205 non_portable_endpoint = 0;
3208 /* The tests here for being above Latin1 and similar ones
3209 * in the following 'else' suffice to find all such
3210 * occurences in the constant, except those added by a
3211 * backslash escape sequence, like \x{100}. Mostly, those
3212 * set 'has_above_latin1' as appropriate */
3213 if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3214 has_above_latin1 = TRUE;
3217 /* Drops down to generic code to process current byte */
3219 else { /* Is a '-' in the context where it means a range */
3220 if (didrange) { /* Something like y/A-C-Z// */
3221 Perl_croak(aTHX_ "Ambiguous range in transliteration"
3227 s++; /* Skip past the hyphen */
3229 /* d now points to where the end-range character will be
3230 * placed. Drop down to get that character. We'll finish
3231 * processing the range the next time through the loop */
3233 if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3234 has_above_latin1 = TRUE;
3237 /* Drops down to generic code to process current byte */
3239 } /* End of not a range */
3241 /* Here we have parsed a range. Now must handle it. At this
3243 * 'sv' is a SV* that contains the output string we are
3244 * constructing. The final two characters in that string
3245 * are the range start and range end, in order.
3246 * 'd' points to just beyond the range end in the 'sv' string,
3247 * where we would next place something
3252 IV range_max; /* last character in range */
3254 Size_t offset_to_min = 0;
3257 bool convert_unicode;
3258 IV real_range_max = 0;
3260 /* Get the code point values of the range ends. */
3261 max_ptr = (d_is_utf8) ? (char *) utf8_hop( (U8*) d, -1) : d - 1;
3262 offset_to_max = max_ptr - SvPVX_const(sv);
3264 /* We know the utf8 is valid, because we just constructed
3265 * it ourselves in previous loop iterations */
3266 min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
3267 range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
3268 range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
3270 /* This compensates for not all code setting
3271 * 'has_above_latin1', so that we don't skip stuff that
3272 * should be executed */
3273 if (range_max > 255) {
3274 has_above_latin1 = TRUE;
3278 min_ptr = max_ptr - 1;
3279 range_min = * (U8*) min_ptr;
3280 range_max = * (U8*) max_ptr;
3283 /* If the range is just a single code point, like tr/a-a/.../,
3284 * that code point is already in the output, twice. We can
3285 * just back up over the second instance and avoid all the rest
3286 * of the work. But if it is a variant character, it's been
3287 * counted twice, so decrement. (This unlikely scenario is
3288 * special cased, like the one for a range of 2 code points
3289 * below, only because the main-line code below needs a range
3290 * of 3 or more to work without special casing. Might as well
3291 * get it out of the way now.) */
3292 if (UNLIKELY(range_max == range_min)) {
3294 if (! d_is_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
3295 utf8_variant_count--;
3301 /* On EBCDIC platforms, we may have to deal with portable
3302 * ranges. These happen if at least one range endpoint is a
3303 * Unicode value (\N{...}), or if the range is a subset of
3304 * [A-Z] or [a-z], and both ends are literal characters,
3305 * like 'A', and not like \x{C1} */
3307 cBOOL(backslash_N) /* \N{} forces Unicode,
3308 hence portable range */
3309 || ( ! non_portable_endpoint
3310 && (( isLOWER_A(range_min) && isLOWER_A(range_max))
3311 || (isUPPER_A(range_min) && isUPPER_A(range_max))));
3312 if (convert_unicode) {
3314 /* Special handling is needed for these portable ranges.
3315 * They are defined to be in Unicode terms, which includes
3316 * all the Unicode code points between the end points.
3317 * Convert to Unicode to get the Unicode range. Later we
3318 * will convert each code point in the range back to
3320 range_min = NATIVE_TO_UNI(range_min);
3321 range_max = NATIVE_TO_UNI(range_max);
3325 if (range_min > range_max) {
3327 if (convert_unicode) {
3328 /* Need to convert back to native for meaningful
3329 * messages for this platform */
3330 range_min = UNI_TO_NATIVE(range_min);
3331 range_max = UNI_TO_NATIVE(range_max);
3334 /* Use the characters themselves for the error message if
3335 * ASCII printables; otherwise some visible representation
3337 if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
3339 "Invalid range \"%c-%c\" in transliteration operator",
3340 (char)range_min, (char)range_max);
3343 else if (convert_unicode) {
3344 /* diag_listed_as: Invalid range "%s" in transliteration operator */
3346 "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04"
3347 UVXf "}\" in transliteration operator",
3348 range_min, range_max);
3352 /* diag_listed_as: Invalid range "%s" in transliteration operator */
3354 "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
3355 " in transliteration operator",
3356 range_min, range_max);
3360 /* If the range is exactly two code points long, they are
3361 * already both in the output */
3362 if (UNLIKELY(range_min + 1 == range_max)) {
3366 /* Here the range contains at least 3 code points */
3370 /* If everything in the transliteration is below 256, we
3371 * can avoid special handling later. A translation table
3372 * for each of those bytes is created by op.c. So we
3373 * expand out all ranges to their constituent code points.
3374 * But if we've encountered something above 255, the
3375 * expanding won't help, so skip doing that. But if it's
3376 * EBCDIC, we may have to look at each character below 256
3377 * if we have to convert to/from Unicode values */
3378 if ( has_above_latin1
3380 && (range_min > 255 || ! convert_unicode)
3383 const STRLEN off = d - SvPVX(sv);
3384 const STRLEN extra = 1 + (send - s) + 1;
3387 /* Move the high character one byte to the right; then
3388 * insert between it and the range begin, an illegal
3389 * byte which serves to indicate this is a range (using
3390 * a '-' would be ambiguous). */
3392 if (off + extra > SvLEN(sv)) {
3393 d = off + SvGROW(sv, off + extra);
3394 max_ptr = d - off + offset_to_max;
3398 while (e-- > max_ptr) {
3401 *(e + 1) = (char) RANGE_INDICATOR;
3405 /* Here, we're going to expand out the range. For EBCDIC
3406 * the range can extend above 255 (not so in ASCII), so
3407 * for EBCDIC, split it into the parts above and below
3410 if (range_max > 255) {
3411 real_range_max = range_max;
3417 /* Here we need to expand out the string to contain each
3418 * character in the range. Grow the output to handle this.
3419 * For non-UTF8, we need a byte for each code point in the
3420 * range, minus the three that we've already allocated for: the
3421 * hyphen, the min, and the max. For UTF-8, we need this
3422 * plus an extra byte for each code point that occupies two
3423 * bytes (is variant) when in UTF-8 (except we've already
3424 * allocated for the end points, including if they are
3425 * variants). For ASCII platforms and Unicode ranges on EBCDIC
3426 * platforms, it's easy to calculate a precise number. To
3427 * start, we count the variants in the range, which we need
3428 * elsewhere in this function anyway. (For the case where it
3429 * isn't easy to calculate, 'extras' has been initialized to 0,
3430 * and the calculation is done in a loop further down.) */
3432 if (convert_unicode)
3435 /* This is executed unconditionally on ASCII, and for
3436 * Unicode ranges on EBCDIC. Under these conditions, all
3437 * code points above a certain value are variant; and none
3438 * under that value are. We just need to find out how much
3439 * of the range is above that value. We don't count the
3440 * end points here, as they will already have been counted
3441 * as they were parsed. */
3442 if (range_min >= UTF_CONTINUATION_MARK) {
3444 /* The whole range is made up of variants */
3445 extras = (range_max - 1) - (range_min + 1) + 1;
3447 else if (range_max >= UTF_CONTINUATION_MARK) {
3449 /* Only the higher portion of the range is variants */
3450 extras = (range_max - 1) - UTF_CONTINUATION_MARK + 1;
3453 utf8_variant_count += extras;
3456 /* The base growth is the number of code points in the range,
3457 * not including the endpoints, which have already been sized
3458 * for (and output). We don't subtract for the hyphen, as it
3459 * has been parsed but not output, and the SvGROW below is
3460 * based only on what's been output plus what's left to parse.
3462 grow = (range_max - 1) - (range_min + 1) + 1;
3466 /* In some cases in EBCDIC, we haven't yet calculated a
3467 * precise amount needed for the UTF-8 variants. Just
3468 * assume the worst case, that everything will expand by a
3470 if (! convert_unicode) {
3476 /* Otherwise we know exactly how many variants there
3477 * are in the range. */
3482 /* Grow, but position the output to overwrite the range min end
3483 * point, because in some cases we overwrite that */
3484 SvCUR_set(sv, d - SvPVX_const(sv));
3485 offset_to_min = min_ptr - SvPVX_const(sv);
3487 /* See Note on sizing above. */
3488 d = offset_to_min + SvGROW(sv, SvCUR(sv)
3491 + 1 /* Trailing NUL */ );
3493 /* Now, we can expand out the range. */
3495 if (convert_unicode) {
3498 /* Recall that the min and max are now in Unicode terms, so
3499 * we have to convert each character to its native
3502 for (i = range_min; i <= range_max; i++) {
3503 append_utf8_from_native_byte(
3504 LATIN1_TO_NATIVE((U8) i),
3509 for (i = range_min; i <= range_max; i++) {
3510 *d++ = (char)LATIN1_TO_NATIVE((U8) i);
3516 /* Always gets run for ASCII, and sometimes for EBCDIC. */
3518 /* Here, no conversions are necessary, which means that the
3519 * first character in the range is already in 'd' and
3520 * valid, so we can skip overwriting it */
3524 for (i = range_min + 1; i <= range_max; i++) {
3525 append_utf8_from_native_byte((U8) i, (U8 **) &d);
3531 assert(range_min + 1 <= range_max);
3532 for (i = range_min + 1; i < range_max; i++) {
3534 /* In this case on EBCDIC, we haven't calculated
3535 * the variants. Do it here, as we go along */
3536 if (! UVCHR_IS_INVARIANT(i)) {
3537 utf8_variant_count++;
3543 /* The range_max is done outside the loop so as to
3544 * avoid having to special case not incrementing
3545 * 'utf8_variant_count' on EBCDIC (it's already been
3546 * counted when originally parsed) */
3547 *d++ = (char) range_max;
3552 /* If the original range extended above 255, add in that
3554 if (real_range_max) {
3555 *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
3556 *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
3557 if (real_range_max > 0x100) {
3558 if (real_range_max > 0x101) {
3559 *d++ = (char) RANGE_INDICATOR;
3561 d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
3567 /* mark the range as done, and continue */
3571 non_portable_endpoint = 0;
3575 } /* End of is a range */
3576 } /* End of transliteration. Joins main code after these else's */
3577 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3580 while (s1 >= start && *s1-- == '\\')
3583 in_charclass = TRUE;
3585 else if (*s == ']' && PL_lex_inpat && in_charclass) {
3588 while (s1 >= start && *s1-- == '\\')
3591 in_charclass = FALSE;
3593 /* skip for regexp comments /(?#comment)/, except for the last
3594 * char, which will be done separately. Stop on (?{..}) and
3596 else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3599 PERL_UINT_FAST8_T len = UTF8SKIP(s);
3601 while (s + len < send && *s != ')') {
3602 Copy(s, d, len, U8);
3605 len = UTF8_SAFE_SKIP(s, send);
3608 else while (s+1 < send && *s != ')') {
3612 else if (!PL_lex_casemods
3613 && ( s[2] == '{' /* This should match regcomp.c */
3614 || (s[2] == '?' && s[3] == '{')))
3619 /* likewise skip #-initiated comments in //x patterns */
3623 && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
3625 while (s < send && *s != '\n')
3628 /* no further processing of single-quoted regex */
3629 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3630 goto default_action;
3632 /* check for embedded arrays
3633 * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3635 else if (*s == '@' && s[1]) {
3637 ? isIDFIRST_utf8_safe(s+1, send)
3638 : isWORDCHAR_A(s[1]))
3642 if (memCHRs(":'{$", s[1]))
3644 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3645 break; /* in regexp, neither @+ nor @- are interpolated */
3647 /* check for embedded scalars. only stop if we're sure it's a
3649 else if (*s == '$') {
3650 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
3652 if (s + 1 < send && !memCHRs("()| \r\n\t", s[1])) {
3654 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3655 "Possible unintended interpolation of $\\ in regex");
3657 break; /* in regexp, $ might be tail anchor */
3661 /* End of else if chain - OP_TRANS rejoin rest */
3663 if (UNLIKELY(s >= send)) {
3669 if (*s == '\\' && s+1 < send) {
3670 char* bslash = s; /* point to beginning \ */
3671 char* rbrace; /* point to ending '}' */
3672 char* e; /* 1 past the meat (non-blanks) before the
3676 /* warn on \1 - \9 in substitution replacements, but note that \11
3677 * is an octal; and \19 is \1 followed by '9' */
3678 if (PL_lex_inwhat == OP_SUBST
3684 /* diag_listed_as: \%d better written as $%d */
3685 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3691 /* string-change backslash escapes */
3692 if (PL_lex_inwhat != OP_TRANS && *s && memCHRs("lLuUEQF", *s)) {
3696 /* In a pattern, process \N, but skip any other backslash escapes.
3697 * This is because we don't want to translate an escape sequence
3698 * into a meta symbol and have the regex compiler use the meta
3699 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3700 * in spite of this, we do have to process \N here while the proper
3701 * charnames handler is in scope. See bugs #56444 and #62056.
3703 * There is a complication because \N in a pattern may also stand
3704 * for 'match a non-nl', and not mean a charname, in which case its
3705 * processing should be deferred to the regex compiler. To be a
3706 * charname it must be followed immediately by a '{', and not look
3707 * like \N followed by a curly quantifier, i.e., not something like
3708 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3710 else if (PL_lex_inpat
3713 || regcurly(s + 1, send, NULL)))
3716 goto default_action;
3722 if ((isALPHANUMERIC(*s)))
3723 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3724 "Unrecognized escape \\%c passed through",
3726 /* default action is to copy the quoted character */
3727 goto default_action;
3730 /* eg. \132 indicates the octal constant 0132 */
3731 case '0': case '1': case '2': case '3':
3732 case '4': case '5': case '6': case '7':
3734 I32 flags = PERL_SCAN_SILENT_ILLDIGIT
3735 | PERL_SCAN_NOTIFY_ILLDIGIT;
3737 uv = grok_oct(s, &len, &flags, NULL);
3739 if ( (flags & PERL_SCAN_NOTIFY_ILLDIGIT)
3741 && isDIGIT(*s) /* like \08, \178 */
3742 && ckWARN(WARN_MISC))
3744 Perl_warner(aTHX_ packWARN(WARN_MISC), "%s",
3745 form_alien_digit_msg(8, len, s, send, UTF, FALSE));
3748 goto NUM_ESCAPE_INSERT;
3750 /* eg. \o{24} indicates the octal constant \024 */
3755 if (! grok_bslash_o(&s, send,
3758 FALSE, /* Not strict */
3759 FALSE, /* No illegal cp's */
3763 uv = 0; /* drop through to ensure range ends are set */
3765 goto NUM_ESCAPE_INSERT;
3768 /* eg. \x24 indicates the hex constant 0x24 */
3773 if (! grok_bslash_x(&s, send,
3776 FALSE, /* Not strict */
3777 FALSE, /* No illegal cp's */
3781 uv = 0; /* drop through to ensure range ends are set */
3786 /* Insert oct or hex escaped character. */
3788 /* Here uv is the ordinal of the next character being added */
3789 if (UVCHR_IS_INVARIANT(uv)) {
3793 if (!d_is_utf8 && uv > 255) {
3795 /* Here, 'uv' won't fit unless we convert to UTF-8.
3796 * If we've only seen invariants so far, all we have to
3797 * do is turn on the flag */
3798 if (utf8_variant_count == 0) {
3802 SvCUR_set(sv, d - SvPVX_const(sv));
3806 sv_utf8_upgrade_flags_grow(
3808 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3810 /* Since we're having to grow here,
3811 * make sure we have enough room for
3812 * this escape and a NUL, so the
3813 * code immediately below won't have
3814 * to actually grow again */
3816 + (STRLEN)(send - s) + 1);
3817 d = SvPVX(sv) + SvCUR(sv);
3820 has_above_latin1 = TRUE;
3826 utf8_variant_count++;
3829 /* Usually, there will already be enough room in 'sv'
3830 * since such escapes are likely longer than any UTF-8
3831 * sequence they can end up as. This isn't the case on
3832 * EBCDIC where \x{40000000} contains 12 bytes, and the
3833 * UTF-8 for it contains 14. And, we have to allow for
3834 * a trailing NUL. It probably can't happen on ASCII
3835 * platforms, but be safe. See Note on sizing above. */
3836 const STRLEN needed = d - SvPVX(sv)
3840 if (UNLIKELY(needed > SvLEN(sv))) {
3841 SvCUR_set(sv, d - SvPVX_const(sv));
3842 d = SvCUR(sv) + SvGROW(sv, needed);
3845 d = (char*) uvchr_to_utf8_flags((U8*)d, uv,
3846 (ckWARN(WARN_PORTABLE))
3847 ? UNICODE_WARN_PERL_EXTENDED
3852 non_portable_endpoint++;
3857 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3858 * named character, like \N{LATIN SMALL LETTER A}, or a named
3859 * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3860 * GRAVE} (except y/// can't handle the latter, croaking). For
3861 * convenience all three forms are referred to as "named
3862 * characters" below.
3864 * For patterns, \N also can mean to match a non-newline. Code
3865 * before this 'switch' statement should already have handled
3866 * this situation, and hence this code only has to deal with
3867 * the named character cases.
3869 * For non-patterns, the named characters are converted to
3870 * their string equivalents. In patterns, named characters are
3871 * not converted to their ultimate forms for the same reasons
3872 * that other escapes aren't (mainly that the ultimate
3873 * character could be considered a meta-symbol by the regex
3874 * compiler). Instead, they are converted to the \N{U+...}
3875 * form to get the value from the charnames that is in effect
3876 * right now, while preserving the fact that it was a named
3877 * character, so that the regex compiler knows this.
3879 * The structure of this section of code (besides checking for
3880 * errors and upgrading to utf8) is:
3881 * If the named character is of the form \N{U+...}, pass it
3882 * through if a pattern; otherwise convert the code point
3884 * Otherwise must be some \N{NAME}: convert to
3885 * \N{U+c1.c2...} if a pattern; otherwise convert to utf8
3887 * Transliteration is an exception. The conversion to utf8 is
3888 * only done if the code point requires it to be representable.
3890 * Here, 's' points to the 'N'; the test below is guaranteed to
3891 * succeed if we are being called on a pattern, as we already
3892 * know from a test above that the next character is a '{'. A
3893 * non-pattern \N must mean 'named character', which requires
3897 yyerror("Missing braces on \\N{}");
3903 /* If there is no matching '}', it is an error. */
3904 if (! (rbrace = (char *) memchr(s, '}', send - s))) {
3905 if (! PL_lex_inpat) {
3906 yyerror("Missing right brace on \\N{}");
3908 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3910 yyquit(); /* Have exhausted the input. */
3913 /* Here it looks like a named character */
3914 while (s < rbrace && isBLANK(*s)) {
3919 while (s < e && isBLANK(*(e - 1))) {
3923 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3924 s += 2; /* Skip to next char after the 'U+' */
3927 /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3928 /* Check the syntax. */
3929 if (!isXDIGIT(*s)) {
3932 "Invalid hexadecimal number in \\N{U+...}"
3941 else if ((*s == '.' || *s == '_')
3947 /* Pass everything through unchanged.
3948 * +1 is to include the '}' */
3949 Copy(bslash, d, rbrace - bslash + 1, char);
3950 d += rbrace - bslash + 1;
3952 else { /* Not a pattern: convert the hex to string */
3953 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3954 | PERL_SCAN_SILENT_ILLDIGIT
3955 | PERL_SCAN_SILENT_OVERFLOW
3956 | PERL_SCAN_DISALLOW_PREFIX;
3959 uv = grok_hex(s, &len, &flags, NULL);
3960 if (len == 0 || (len != (STRLEN)(e - s)))
3963 if ( uv > MAX_LEGAL_CP
3964 || (flags & PERL_SCAN_GREATER_THAN_UV_MAX))
3966 yyerror(form_cp_too_large_msg(16, s, len, 0));
3967 uv = 0; /* drop through to ensure range ends are
3971 /* For non-tr///, if the destination is not in utf8,
3972 * unconditionally recode it to be so. This is
3973 * because \N{} implies Unicode semantics, and scalars
3974 * have to be in utf8 to guarantee those semantics.
3975 * tr/// doesn't care about Unicode rules, so no need
3976 * there to upgrade to UTF-8 for small enough code
3978 if (! d_is_utf8 && ( uv > 0xFF
3979 || PL_lex_inwhat != OP_TRANS))
3981 /* See Note on sizing above. */
3982 const STRLEN extra = OFFUNISKIP(uv) + (send - rbrace) + 1;
3984 SvCUR_set(sv, d - SvPVX_const(sv));
3988 if (utf8_variant_count == 0) {
3990 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
3993 sv_utf8_upgrade_flags_grow(
3995 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3997 d = SvPVX(sv) + SvCUR(sv);
4001 has_above_latin1 = TRUE;
4004 /* Add the (Unicode) code point to the output. */
4005 if (! d_is_utf8 || OFFUNI_IS_INVARIANT(uv)) {
4006 *d++ = (char) LATIN1_TO_NATIVE(uv);
4009 d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv,
4010 (ckWARN(WARN_PORTABLE))
4011 ? UNICODE_WARN_PERL_EXTENDED
4016 else /* Here is \N{NAME} but not \N{U+...}. */
4017 if (! (res = get_and_check_backslash_N_name_wrapper(s, e)))
4018 { /* Failed. We should die eventually, but for now use a NUL
4022 else { /* Successfully evaluated the name */
4024 const char *str = SvPV_const(res, len);
4027 if (! len) { /* The name resolved to an empty string */
4028 const char empty_N[] = "\\N{_}";
4029 Copy(empty_N, d, sizeof(empty_N) - 1, char);
4030 d += sizeof(empty_N) - 1;
4033 /* In order to not lose information for the regex
4034 * compiler, pass the result in the specially made
4035 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
4036 * the code points in hex of each character
4037 * returned by charnames */
4039 const char *str_end = str + len;
4040 const STRLEN off = d - SvPVX_const(sv);
4042 if (! SvUTF8(res)) {
4043 /* For the non-UTF-8 case, we can determine the
4044 * exact length needed without having to parse
4045 * through the string. Each character takes up
4046 * 2 hex digits plus either a trailing dot or
4048 const char initial_text[] = "\\N{U+";
4049 const STRLEN initial_len = sizeof(initial_text)
4051 d = off + SvGROW(sv, off
4054 /* +1 for trailing NUL */
4057 + (STRLEN)(send - rbrace));
4058 Copy(initial_text, d, initial_len, char);
4060 while (str < str_end) {
4063 my_snprintf(hex_string,
4067 /* The regex compiler is
4068 * expecting Unicode, not
4070 NATIVE_TO_LATIN1(*str));
4071 PERL_MY_SNPRINTF_POST_GUARD(len,
4072 sizeof(hex_string));
4073 Copy(hex_string, d, 3, char);
4077 d--; /* Below, we will overwrite the final
4078 dot with a right brace */
4081 STRLEN char_length; /* cur char's byte length */
4083 /* and the number of bytes after this is
4084 * translated into hex digits */
4085 STRLEN output_length;
4087 /* 2 hex per byte; 2 chars for '\N'; 2 chars
4088 * for max('U+', '.'); and 1 for NUL */
4089 char hex_string[2 * UTF8_MAXBYTES + 5];
4091 /* Get the first character of the result. */
4092 U32 uv = utf8n_to_uvchr((U8 *) str,
4096 /* Convert first code point to Unicode hex,
4097 * including the boiler plate before it. */
4099 my_snprintf(hex_string, sizeof(hex_string),
4101 (unsigned int) NATIVE_TO_UNI(uv));
4103 /* Make sure there is enough space to hold it */
4104 d = off + SvGROW(sv, off
4106 + (STRLEN)(send - rbrace)
4107 + 2); /* '}' + NUL */
4109 Copy(hex_string, d, output_length, char);
4112 /* For each subsequent character, append dot and
4113 * its Unicode code point in hex */
4114 while ((str += char_length) < str_end) {
4115 const STRLEN off = d - SvPVX_const(sv);
4116 U32 uv = utf8n_to_uvchr((U8 *) str,
4121 my_snprintf(hex_string,
4124 (unsigned int) NATIVE_TO_UNI(uv));
4126 d = off + SvGROW(sv, off
4128 + (STRLEN)(send - rbrace)
4129 + 2); /* '}' + NUL */
4130 Copy(hex_string, d, output_length, char);
4135 *d++ = '}'; /* Done. Add the trailing brace */
4138 else { /* Here, not in a pattern. Convert the name to a
4141 if (PL_lex_inwhat == OP_TRANS) {
4142 str = SvPV_const(res, len);
4143 if (len > ((SvUTF8(res))
4147 yyerror(Perl_form(aTHX_
4148 "%.*s must not be a named sequence"
4149 " in transliteration operator",
4150 /* +1 to include the "}" */
4151 (int) (rbrace + 1 - start), start));
4153 goto end_backslash_N;
4156 if (SvUTF8(res) && UTF8_IS_ABOVE_LATIN1(*str)) {
4157 has_above_latin1 = TRUE;
4161 else if (! SvUTF8(res)) {
4162 /* Make sure \N{} return is UTF-8. This is because
4163 * \N{} implies Unicode semantics, and scalars have
4164 * to be in utf8 to guarantee those semantics; but
4165 * not needed in tr/// */
4166 sv_utf8_upgrade_flags(res, 0);
4167 str = SvPV_const(res, len);
4170 /* Upgrade destination to be utf8 if this new
4172 if (! d_is_utf8 && SvUTF8(res)) {
4173 /* See Note on sizing above. */
4174 const STRLEN extra = len + (send - s) + 1;
4176 SvCUR_set(sv, d - SvPVX_const(sv));
4180 if (utf8_variant_count == 0) {
4182 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
4185 sv_utf8_upgrade_flags_grow(sv,
4186 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4188 d = SvPVX(sv) + SvCUR(sv);
4191 } else if (len > (STRLEN)(e - s + 4)) { /* +4 is for \N{} */
4193 /* See Note on sizing above. (NOTE: SvCUR() is not
4194 * set correctly here). */
4195 const STRLEN extra = len + (send - rbrace) + 1;
4196 const STRLEN off = d - SvPVX_const(sv);
4197 d = off + SvGROW(sv, off + extra);
4199 Copy(str, d, len, char);
4205 } /* End \N{NAME} */
4209 backslash_N++; /* \N{} is defined to be Unicode */
4211 s = rbrace + 1; /* Point to just after the '}' */
4214 /* \c is a control character */
4218 const char * message;
4220 if (! grok_bslash_c(*s, (U8 *) d, &message, NULL)) {
4222 yyquit(); /* Have always immediately croaked on
4228 yyerror("Missing control char name in \\c");
4229 yyquit(); /* Are at end of input, no sense continuing */
4232 non_portable_endpoint++;
4236 /* printf-style backslashes, formfeeds, newlines, etc */
4262 } /* end if (backslash) */
4265 /* Just copy the input to the output, though we may have to convert
4268 * If the input has the same representation in UTF-8 as not, it will be
4269 * a single byte, and we don't care about UTF8ness; just copy the byte */
4270 if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
4273 else if (! s_is_utf8 && ! d_is_utf8) {
4274 /* If neither source nor output is UTF-8, is also a single byte,
4275 * just copy it; but this byte counts should we later have to
4276 * convert to UTF-8 */
4278 utf8_variant_count++;
4280 else if (s_is_utf8 && d_is_utf8) { /* Both UTF-8, can just copy */
4281 const STRLEN len = UTF8SKIP(s);
4283 /* We expect the source to have already been checked for
4285 assert(isUTF8_CHAR((U8 *) s, (U8 *) send));
4287 Copy(s, d, len, U8);
4291 else if (s_is_utf8) { /* UTF8ness matters: convert output to utf8 */
4292 STRLEN need = send - s + 1; /* See Note on sizing above. */
4294 SvCUR_set(sv, d - SvPVX_const(sv));
4298 if (utf8_variant_count == 0) {
4300 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
4303 sv_utf8_upgrade_flags_grow(sv,
4304 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4306 d = SvPVX(sv) + SvCUR(sv);
4309 goto default_action; /* Redo, having upgraded so both are UTF-8 */
4311 else { /* UTF8ness matters: convert this non-UTF8 source char to
4312 UTF-8 for output. It will occupy 2 bytes, but don't include
4313 the input byte since we haven't incremented 's' yet. See
4314 Note on sizing above. */
4315 const STRLEN off = d - SvPVX(sv);
4316 const STRLEN extra = 2 + (send - s - 1) + 1;
4317 if (off + extra > SvLEN(sv)) {
4318 d = off + SvGROW(sv, off + extra);
4320 *d++ = UTF8_EIGHT_BIT_HI(*s);
4321 *d++ = UTF8_EIGHT_BIT_LO(*s);
4324 } /* while loop to process each character */
4327 const STRLEN off = d - SvPVX(sv);
4329 /* See if room for the terminating NUL */
4330 if (UNLIKELY(off >= SvLEN(sv))) {
4334 if (off > SvLEN(sv))
4336 Perl_croak(aTHX_ "panic: constant overflowed allocated space,"
4337 " %" UVuf " >= %" UVuf, (UV)off, (UV)SvLEN(sv));
4339 /* Whew! Here we don't have room for the terminating NUL, but
4340 * everything else so far has fit. It's not too late to grow
4341 * to fit the NUL and continue on. But it is a bug, as the code
4342 * above was supposed to have made room for this, so under
4343 * DEBUGGING builds, we panic anyway. */
4344 d = off + SvGROW(sv, off + 1);
4348 /* terminate the string and set up the sv */
4350 SvCUR_set(sv, d - SvPVX_const(sv));
4357 /* shrink the sv if we allocated more than we used */
4358 if (SvCUR(sv) + 5 < SvLEN(sv)) {
4359 SvPV_shrink_to_cur(sv);
4362 /* return the substring (via pl_yylval) only if we parsed anything */
4365 for (; s2 < s; s2++) {
4367 COPLINE_INC_WITH_HERELINES;
4369 SvREFCNT_inc_simple_void_NN(sv);
4370 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
4371 && ! PL_parser->lex_re_reparsing)
4373 const char *const key = PL_lex_inpat ? "qr" : "q";
4374 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
4378 if (PL_lex_inwhat == OP_TRANS) {
4381 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
4384 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
4392 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
4393 type, typelen, NULL);
4395 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
4397 LEAVE_with_name("scan_const");
4402 * Returns TRUE if there's more to the expression (e.g., a subscript),
4405 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
4407 * ->[ and ->{ return TRUE
4408 * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
4409 * { and [ outside a pattern are always subscripts, so return TRUE
4410 * if we're outside a pattern and it's not { or [, then return FALSE
4411 * if we're in a pattern and the first char is a {
4412 * {4,5} (any digits around the comma) returns FALSE
4413 * if we're in a pattern and the first char is a [
4415 * [SOMETHING] has a funky algorithm to decide whether it's a
4416 * character class or not. It has to deal with things like
4417 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
4418 * anything else returns TRUE
4421 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
4424 S_intuit_more(pTHX_ char *s, char *e)
4426 PERL_ARGS_ASSERT_INTUIT_MORE;
4428 if (PL_lex_brackets)
4430 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
4432 if (*s == '-' && s[1] == '>'
4433 && FEATURE_POSTDEREF_QQ_IS_ENABLED
4434 && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
4435 ||(s[2] == '@' && memCHRs("*[{",s[3])) ))
4437 if (*s != '{' && *s != '[')
4439 PL_parser->sub_no_recover = TRUE;
4443 /* In a pattern, so maybe we have {n,m}. */
4445 if (regcurly(s, e, NULL)) {
4451 /* On the other hand, maybe we have a character class */
4454 if (*s == ']' || *s == '^')
4457 /* this is terrifying, and it works */
4460 const char * const send = (char *) memchr(s, ']', e - s);
4461 unsigned char un_char, last_un_char;
4462 char tmpbuf[sizeof PL_tokenbuf * 4];
4464 if (!send) /* has to be an expression */
4466 weight = 2; /* let's weigh the evidence */
4470 else if (isDIGIT(*s)) {
4472 if (isDIGIT(s[1]) && s[2] == ']')
4478 Zero(seen,256,char);
4480 for (; s < send; s++) {
4481 last_un_char = un_char;
4482 un_char = (unsigned char)*s;
4487 weight -= seen[un_char] * 10;
4488 if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
4490 scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4491 len = (int)strlen(tmpbuf);
4492 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
4493 UTF ? SVf_UTF8 : 0, SVt_PV))
4500 && memCHRs("[#!%*<>()-=",s[1]))
4502 if (/*{*/ memCHRs("])} =",s[2]))
4511 if (memCHRs("wds]",s[1]))
4513 else if (seen[(U8)'\''] || seen[(U8)'"'])
4515 else if (memCHRs("rnftbxcav",s[1]))
4517 else if (isDIGIT(s[1])) {
4519 while (s[1] && isDIGIT(s[1]))
4529 if (memCHRs("aA01! ",last_un_char))
4531 if (memCHRs("zZ79~",s[1]))
4533 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4534 weight -= 5; /* cope with negative subscript */
4537 if (!isWORDCHAR(last_un_char)
4538 && !(last_un_char == '$' || last_un_char == '@'
4539 || last_un_char == '&')
4540 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4544 if (keyword(d, s - d, 0))
4547 if (un_char == last_un_char + 1)
4549 weight -= seen[un_char];
4554 if (weight >= 0) /* probably a character class */
4564 * Does all the checking to disambiguate
4566 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
4567 * METHCALL (bar->foo(args)) or METHCALL0 (bar->foo args).
4569 * First argument is the stuff after the first token, e.g. "bar".
4571 * Not a method if foo is a filehandle.
4572 * Not a method if foo is a subroutine prototyped to take a filehandle.
4573 * Not a method if it's really "Foo $bar"
4574 * Method if it's "foo $bar"
4575 * Not a method if it's really "print foo $bar"
4576 * Method if it's really "foo package::" (interpreted as package->foo)
4577 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4578 * Not a method if bar is a filehandle or package, but is quoted with
4583 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
4585 char *s = start + (*start == '$');
4586 char tmpbuf[sizeof PL_tokenbuf];
4589 /* Mustn't actually add anything to a symbol table.
4590 But also don't want to "initialise" any placeholder
4591 constants that might already be there into full
4592 blown PVGVs with attached PVCV. */
4594 ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
4596 PERL_ARGS_ASSERT_INTUIT_METHOD;
4598 if (!FEATURE_INDIRECT_IS_ENABLED)
4601 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4603 if (cv && SvPOK(cv)) {
4604 const char *proto = CvPROTO(cv);
4606 while (*proto && (isSPACE(*proto) || *proto == ';'))
4613 if (*start == '$') {
4614 SSize_t start_off = start - SvPVX(PL_linestr);
4615 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
4616 || isUPPER(*PL_tokenbuf))
4618 /* this could be $# */
4621 PL_bufptr = SvPVX(PL_linestr) + start_off;
4623 return *s == '(' ? METHCALL : METHCALL0;
4626 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4627 /* start is the beginning of the possible filehandle/object,
4628 * and s is the end of it
4629 * tmpbuf is a copy of it (but with single quotes as double colons)
4632 if (!keyword(tmpbuf, len, 0)) {
4633 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4638 indirgv = gv_fetchpvn_flags(tmpbuf, len,
4639 GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
4641 if (indirgv && SvTYPE(indirgv) != SVt_NULL
4642 && (!isGV(indirgv) || GvCVu(indirgv)))
4644 /* filehandle or package name makes it a method */
4645 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4647 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4648 return 0; /* no assumptions -- "=>" quotes bareword */
4650 NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0,
4651 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4652 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4654 force_next(BAREWORD);
4656 return *s == '(' ? METHCALL : METHCALL0;
4662 /* Encoded script support. filter_add() effectively inserts a
4663 * 'pre-processing' function into the current source input stream.
4664 * Note that the filter function only applies to the current source file
4665 * (e.g., it will not affect files 'require'd or 'use'd by this one).
4667 * The datasv parameter (which may be NULL) can be used to pass
4668 * private data to this instance of the filter. The filter function
4669 * can recover the SV using the FILTER_DATA macro and use it to
4670 * store private buffers and state information.
4672 * The supplied datasv parameter is upgraded to a PVIO type
4673 * and the IoDIRP/IoANY field is used to store the function pointer,
4674 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4675 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4676 * private use must be set using malloc'd pointers.
4680 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4688 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4689 Perl_croak(aTHX_ "Source filters apply only to byte streams");
4691 if (!PL_rsfp_filters)
4692 PL_rsfp_filters = newAV();
4695 SvUPGRADE(datasv, SVt_PVIO);
4696 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4697 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4698 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4699 FPTR2DPTR(void *, IoANY(datasv)),
4700 SvPV_nolen(datasv)));
4701 av_unshift(PL_rsfp_filters, 1);
4702 av_store(PL_rsfp_filters, 0, datasv) ;
4704 !PL_parser->filtered
4705 && PL_parser->lex_flags & LEX_EVALBYTES
4706 && PL_bufptr < PL_bufend
4708 const char *s = PL_bufptr;
4709 while (s < PL_bufend) {
4711 SV *linestr = PL_parser->linestr;
4712 char *buf = SvPVX(linestr);
4713 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4714 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4715 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4716 STRLEN const linestart_pos = PL_parser->linestart - buf;
4717 STRLEN const last_uni_pos =
4718 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4719 STRLEN const last_lop_pos =
4720 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4721 av_push(PL_rsfp_filters, linestr);
4722 PL_parser->linestr =
4723 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4724 buf = SvPVX(PL_parser->linestr);
4725 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4726 PL_parser->bufptr = buf + bufptr_pos;
4727 PL_parser->oldbufptr = buf + oldbufptr_pos;
4728 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4729 PL_parser->linestart = buf + linestart_pos;
4730 if (PL_parser->last_uni)
4731 PL_parser->last_uni = buf + last_uni_pos;
4732 if (PL_parser->last_lop)
4733 PL_parser->last_lop = buf + last_lop_pos;
4734 SvLEN_set(linestr, SvCUR(linestr));
4735 SvCUR_set(linestr, s - SvPVX(linestr));
4736 PL_parser->filtered = 1;
4746 =for apidoc_section $filters
4747 =for apidoc filter_del
4749 Delete most recently added instance of the filter function argument
4755 Perl_filter_del(pTHX_ filter_t funcp)
4759 PERL_ARGS_ASSERT_FILTER_DEL;
4762 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4763 FPTR2DPTR(void*, funcp)));
4765 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4767 /* if filter is on top of stack (usual case) just pop it off */
4768 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4769 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4770 sv_free(av_pop(PL_rsfp_filters));
4774 /* we need to search for the correct entry and clear it */
4775 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4779 /* Invoke the idxth filter function for the current rsfp. */
4780 /* maxlen 0 = read one text line */
4782 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4787 /* This API is bad. It should have been using unsigned int for maxlen.
4788 Not sure if we want to change the API, but if not we should sanity
4789 check the value here. */
4790 unsigned int correct_length = maxlen < 0 ? PERL_INT_MAX : maxlen;
4792 PERL_ARGS_ASSERT_FILTER_READ;
4794 if (!PL_parser || !PL_rsfp_filters)
4796 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
4797 /* Provide a default input filter to make life easy. */
4798 /* Note that we append to the line. This is handy. */
4799 DEBUG_P(PerlIO_printf(Perl_debug_log,
4800 "filter_read %d: from rsfp\n", idx));
4801 if (correct_length) {
4804 const int old_len = SvCUR(buf_sv);
4806 /* ensure buf_sv is large enough */
4807 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4808 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4809 correct_length)) <= 0) {
4810 if (PerlIO_error(PL_rsfp))
4811 return -1; /* error */
4813 return 0 ; /* end of file */
4815 SvCUR_set(buf_sv, old_len + len) ;
4816 SvPVX(buf_sv)[old_len + len] = '\0';
4819 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4820 if (PerlIO_error(PL_rsfp))
4821 return -1; /* error */
4823 return 0 ; /* end of file */
4826 return SvCUR(buf_sv);
4828 /* Skip this filter slot if filter has been deleted */
4829 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4830 DEBUG_P(PerlIO_printf(Perl_debug_log,
4831 "filter_read %d: skipped (filter deleted)\n",
4833 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4835 if (SvTYPE(datasv) != SVt_PVIO) {
4836 if (correct_length) {
4838 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4839 if (!remainder) return 0; /* eof */
4840 if (correct_length > remainder) correct_length = remainder;
4841 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4842 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4845 const char *s = SvEND(datasv);
4846 const char *send = SvPVX(datasv) + SvLEN(datasv);
4854 if (s == send) return 0; /* eof */
4855 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4856 SvCUR_set(datasv, s-SvPVX(datasv));
4858 return SvCUR(buf_sv);
4860 /* Get function pointer hidden within datasv */
4861 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4862 DEBUG_P(PerlIO_printf(Perl_debug_log,
4863 "filter_read %d: via function %p (%s)\n",
4864 idx, (void*)datasv, SvPV_nolen_const(datasv)));
4865 /* Call function. The function is expected to */
4866 /* call "FILTER_READ(idx+1, buf_sv)" first. */
4867 /* Return: <0:error, =0:eof, >0:not eof */
4869 save_scalar(PL_errgv);
4870 ret = (*funcp)(aTHX_ idx, buf_sv, correct_length);
4876 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4878 PERL_ARGS_ASSERT_FILTER_GETS;
4880 #ifdef PERL_CR_FILTER
4881 if (!PL_rsfp_filters) {
4882 filter_add(S_cr_textfilter,NULL);
4885 if (PL_rsfp_filters) {
4887 SvCUR_set(sv, 0); /* start with empty line */
4888 if (FILTER_READ(0, sv, 0) > 0)
4889 return ( SvPVX(sv) ) ;
4894 return (sv_gets(sv, PL_rsfp, append));
4898 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4902 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4904 if (memEQs(pkgname, len, "__PACKAGE__"))
4908 && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
4909 && (gv = gv_fetchpvn_flags(pkgname,
4911 ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4913 return GvHV(gv); /* Foo:: */
4916 /* use constant CLASS => 'MyClass' */
4917 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4918 if (gv && GvCV(gv)) {
4919 SV * const sv = cv_const_sv(GvCV(gv));
4921 return gv_stashsv(sv, 0);
4924 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4929 S_tokenize_use(pTHX_ int is_use, char *s) {
4930 PERL_ARGS_ASSERT_TOKENIZE_USE;
4932 if (PL_expect != XSTATE)
4933 /* diag_listed_as: "use" not allowed in expression */
4934 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4935 is_use ? "use" : "no"));
4938 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4939 s = force_version(s, TRUE);
4940 if (*s == ';' || *s == '}'
4941 || (s = skipspace(s), (*s == ';' || *s == '}'))) {
4942 NEXTVAL_NEXTTOKE.opval = NULL;
4943 force_next(BAREWORD);
4945 else if (*s == 'v') {
4946 s = force_word(s,BAREWORD,FALSE,TRUE);
4947 s = force_version(s, FALSE);
4951 s = force_word(s,BAREWORD,FALSE,TRUE);
4952 s = force_version(s, FALSE);
4954 pl_yylval.ival = is_use;
4958 static const char* const exp_name[] =
4959 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4960 "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
4961 "SIGVAR", "TERMORDORDOR"
4965 #define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
4967 S_word_takes_any_delimiter(char *p, STRLEN len)
4969 return (len == 1 && memCHRs("msyq", p[0]))
4971 && ((p[0] == 't' && p[1] == 'r')
4972 || (p[0] == 'q' && memCHRs("qwxr", p[1]))));
4976 S_check_scalar_slice(pTHX_ char *s)
4979 while (SPACE_OR_TAB(*s)) s++;
4980 if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2,
4986 while ( isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
4987 || (*s && memCHRs(" \t$#+-'\"", *s)))
4989 s += UTF ? UTF8SKIP(s) : 1;
4991 if (*s == '}' || *s == ']')
4992 pl_yylval.ival = OPpSLICEWARNING;
4995 #define lex_token_boundary() S_lex_token_boundary(aTHX)
4997 S_lex_token_boundary(pTHX)
4999 PL_oldoldbufptr = PL_oldbufptr;
5000 PL_oldbufptr = PL_bufptr;
5003 #define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s)
5005 S_vcs_conflict_marker(pTHX_ char *s)
5007 lex_token_boundary();
5009 yyerror("Version control conflict marker");
5010 while (s < PL_bufend && *s != '\n')
5016 yyl_sigvar(pTHX_ char *s)
5018 /* we expect the sigil and optional var name part of a
5019 * signature element here. Since a '$' is not necessarily
5020 * followed by a var name, handle it specially here; the general
5021 * yylex code would otherwise try to interpret whatever follows
5022 * as a var; e.g. ($, ...) would be seen as the var '$,'
5029 PL_bufptr = s; /* for error reporting */
5034 /* spot stuff that looks like an prototype */
5035 if (memCHRs("$:@%&*;\\[]", *s)) {
5036 yyerror("Illegal character following sigil in a subroutine signature");
5039 /* '$#' is banned, while '$ # comment' isn't */
5041 yyerror("'#' not allowed immediately following a sigil in a subroutine signature");
5045 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5046 char *dest = PL_tokenbuf + 1;
5047 /* read var name, including sigil, into PL_tokenbuf */
5048 PL_tokenbuf[0] = sigil;
5049 parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
5050 0, cBOOL(UTF), FALSE, FALSE);
5052 assert(PL_tokenbuf[1]); /* we have a variable name */
5060 /* parse the = for the default ourselves to avoid '+=' etc being accepted here
5061 * as the ASSIGNOP, and exclude other tokens that start with =
5063 if (*s == '=' && (!s[1] || memCHRs("=~>", s[1]) == 0)) {
5064 /* save now to report with the same context as we did when
5065 * all ASSIGNOPS were accepted */
5069 NEXTVAL_NEXTTOKE.ival = 0;
5070 force_next(ASSIGNOP);
5073 else if (*s == ',' || *s == ')') {
5074 PL_expect = XOPERATOR;
5077 /* make sure the context shows the unexpected character and
5078 * hopefully a bit more */
5080 while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5082 PL_bufptr = s; /* for error reporting */
5083 yyerror("Illegal operator following parameter in a subroutine signature");
5087 NEXTVAL_NEXTTOKE.ival = sigil;
5088 force_next('p'); /* force a signature pending identifier */
5095 case ',': /* handle ($a,,$b) */
5100 yyerror("A signature parameter must start with '$', '@' or '%'");
5101 /* very crude error recovery: skip to likely next signature
5103 while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5109 case ',': TOKEN (PERLY_COMMA);
5110 case '$': TOKEN (PERLY_DOLLAR);
5111 case '@': TOKEN (PERLY_SNAIL);
5112 case '%': TOKEN (PERLY_PERCENT_SIGN);
5113 case ')': TOKEN (PERLY_PAREN_CLOSE);
5114 default: TOKEN (sigil);
5119 yyl_dollar(pTHX_ char *s)
5123 if (PL_expect == XPOSTDEREF) {
5126 POSTDEREF(DOLSHARP);
5128 POSTDEREF(PERLY_DOLLAR);
5132 && ( isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
5133 || memCHRs("{$:+-@", s[2])))
5135 PL_tokenbuf[0] = '@';
5136 s = scan_ident(s + 1, PL_tokenbuf + 1,
5137 sizeof PL_tokenbuf - 1, FALSE);
5138 if (PL_expect == XOPERATOR) {
5140 if (PL_bufptr > s) {
5142 PL_bufptr = PL_oldbufptr;
5144 no_op("Array length", d);
5146 if (!PL_tokenbuf[1])
5148 PL_expect = XOPERATOR;
5149 force_ident_maybe_lex('#');
5153 PL_tokenbuf[0] = '$';
5154 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5155 if (PL_expect == XOPERATOR) {
5157 if (PL_bufptr > s) {
5159 PL_bufptr = PL_oldbufptr;
5163 if (!PL_tokenbuf[1]) {
5165 yyerror("Final $ should be \\$ or $name");
5166 PREREF(PERLY_DOLLAR);
5170 const char tmp = *s;
5171 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5174 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5175 && intuit_more(s, PL_bufend)) {
5177 PL_tokenbuf[0] = '@';
5178 if (ckWARN(WARN_SYNTAX)) {
5181 while ( t < PL_bufend ) {
5183 do { t += UTF ? UTF8SKIP(t) : 1; } while (t < PL_bufend && isSPACE(*t));
5184 /* consumed one or more space chars */
5185 } else if (*t == '$' || *t == '@') {
5186 /* could be more than one '$' like $$ref or @$ref */
5187 do { t++; } while (t < PL_bufend && *t == '$');
5189 /* could be an abigail style identifier like $ foo */
5190 while (t < PL_bufend && *t == ' ') t++;
5192 /* strip off the name of the var */
5193 while (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
5194 t += UTF ? UTF8SKIP(t) : 1;
5195 /* consumed a varname */
5196 } else if (isDIGIT(*t)) {
5197 /* deal with hex constants like 0x11 */
5198 if (t[0] == '0' && t[1] == 'x') {
5200 while (t < PL_bufend && isXDIGIT(*t)) t++;
5202 /* deal with decimal/octal constants like 1 and 0123 */
5203 do { t++; } while (isDIGIT(*t));
5204 if (t<PL_bufend && *t == '.') {
5205 do { t++; } while (isDIGIT(*t));
5208 /* consumed a number */
5210 /* not a var nor a space nor a number */
5214 if (t < PL_bufend && *t++ == ',') {
5215 PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
5216 while (t < PL_bufend && *t != ']')
5218 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5219 "Multidimensional syntax %" UTF8f " not supported",
5220 UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
5224 else if (*s == '{') {
5226 PL_tokenbuf[0] = '%';
5227 if ( strEQ(PL_tokenbuf+1, "SIG")
5228 && ckWARN(WARN_SYNTAX)
5229 && (t = (char *) memchr(s, '}', PL_bufend - s))
5230 && (t = (char *) memchr(t, '=', PL_bufend - t)))
5232 char tmpbuf[sizeof PL_tokenbuf];
5235 } while (isSPACE(*t));
5236 if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
5238 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
5243 && get_cvn_flags(tmpbuf, len, UTF
5247 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5248 "You need to quote \"%" UTF8f "\"",
5249 UTF8fARG(UTF, len, tmpbuf));
5256 PL_expect = XOPERATOR;
5257 if ((PL_lex_state == LEX_NORMAL || PL_lex_brackets) && isSPACE((char)tmp)) {
5258 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5259 if (!islop || PL_last_lop_op == OP_GREPSTART)
5260 PL_expect = XOPERATOR;
5261 else if (memCHRs("$@\"'`q", *s))
5262 PL_expect = XTERM; /* e.g. print $fh "foo" */
5263 else if ( memCHRs("&*<%", *s)
5264 && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
5266 PL_expect = XTERM; /* e.g. print $fh &sub */
5268 else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5269 char tmpbuf[sizeof PL_tokenbuf];
5272 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5273 if ((t2 = keyword(tmpbuf, len, 0))) {
5274 /* binary operators exclude handle interpretations */
5286 PL_expect = XTERM; /* e.g. print $fh length() */
5291 PL_expect = XTERM; /* e.g. print $fh subr() */
5294 else if (isDIGIT(*s))
5295 PL_expect = XTERM; /* e.g. print $fh 3 */
5296 else if (*s == '.' && isDIGIT(s[1]))
5297 PL_expect = XTERM; /* e.g. print $fh .3 */
5298 else if ((*s == '?' || *s == '-' || *s == '+')
5299 && !isSPACE(s[1]) && s[1] != '=')
5300 PL_expect = XTERM; /* e.g. print $fh -1 */
5301 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5303 PL_expect = XTERM; /* e.g. print $fh /.../
5304 XXX except DORDOR operator
5306 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5308 PL_expect = XTERM; /* print $fh <<"EOF" */
5311 force_ident_maybe_lex('$');
5312 TOKEN(PERLY_DOLLAR);
5316 yyl_sub(pTHX_ char *s, const int key)
5318 char * const tmpbuf = PL_tokenbuf + 1;
5319 bool have_name, have_proto;
5321 SV *format_name = NULL;
5322 bool is_sigsub = FEATURE_SIGNATURES_IS_ENABLED;
5324 SSize_t off = s-SvPVX(PL_linestr);
5327 s = skipspace(s); /* can move PL_linestr */
5329 d = SvPVX(PL_linestr)+off;
5331 SAVEBOOL(PL_parser->sig_seen);
5332 PL_parser->sig_seen = FALSE;
5334 if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
5336 || (*s == ':' && s[1] == ':'))
5339 PL_expect = XATTRBLOCK;
5340 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
5342 if (key == KEY_format)
5343 format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
5345 if (memchr(tmpbuf, ':', len) || key != KEY_sub
5347 PL_tokenbuf, len + 1, 0
5349 sv_setpvn(PL_subname, tmpbuf, len);
5351 sv_setsv(PL_subname,PL_curstname);
5352 sv_catpvs(PL_subname,"::");
5353 sv_catpvn(PL_subname,tmpbuf,len);
5355 if (SvUTF8(PL_linestr))
5356 SvUTF8_on(PL_subname);
5362 if (key == KEY_my || key == KEY_our || key==KEY_state) {
5364 /* diag_listed_as: Missing name in "%s sub" */
5366 "Missing name in \"%s\"", PL_bufptr);
5368 PL_expect = XATTRTERM;
5369 sv_setpvs(PL_subname,"?");
5373 if (key == KEY_format) {
5375 NEXTVAL_NEXTTOKE.opval
5376 = newSVOP(OP_CONST,0, format_name);
5377 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
5378 force_next(BAREWORD);
5380 PREBLOCK(KW_FORMAT);
5383 /* Look for a prototype */
5384 if (*s == '(' && !is_sigsub) {
5385 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5387 Perl_croak(aTHX_ "Prototype not terminated");
5388 COPLINE_SET_FROM_MULTI_END;
5389 (void)validate_proto(PL_subname, PL_lex_stuff,
5390 ckWARN(WARN_ILLEGALPROTO), 0);
5398 if ( !(*s == ':' && s[1] != ':')
5399 && (*s != '{' && *s != '(') && key != KEY_format)
5401 assert(key == KEY_sub || key == KEY_AUTOLOAD ||
5402 key == KEY_DESTROY || key == KEY_BEGIN ||
5403 key == KEY_UNITCHECK || key == KEY_CHECK ||
5404 key == KEY_INIT || key == KEY_END ||
5405 key == KEY_my || key == KEY_state ||
5408 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5409 else if (*s != ';' && *s != '}')
5410 Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname));
5414 NEXTVAL_NEXTTOKE.opval =
5415 newSVOP(OP_CONST, 0, PL_lex_stuff);
5416 PL_lex_stuff = NULL;
5421 sv_setpvs(PL_subname, "__ANON__");
5423 sv_setpvs(PL_subname, "__ANON__::__ANON__");
5425 TOKEN(KW_SUB_anon_sig);
5429 force_ident_maybe_lex('&');
5431 TOKEN(KW_SUB_named_sig);
5433 TOKEN(KW_SUB_named);
5437 yyl_interpcasemod(pTHX_ char *s)
5440 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
5442 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
5443 PL_bufptr, PL_bufend, *PL_bufptr);
5446 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
5448 if (PL_lex_casemods) {
5449 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
5450 PL_lex_casestack[PL_lex_casemods] = '\0';
5452 if (PL_bufptr != PL_bufend
5453 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
5454 || oldmod == 'F')) {
5456 PL_lex_state = LEX_INTERPCONCAT;
5458 PL_lex_allbrackets--;
5459 return REPORT(PERLY_PAREN_CLOSE);
5461 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
5462 /* Got an unpaired \E */
5463 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
5464 "Useless use of \\E");
5466 if (PL_bufptr != PL_bufend)
5468 PL_lex_state = LEX_INTERPCONCAT;
5473 PerlIO_printf(Perl_debug_log, "### Saw case modifier\n");
5476 if (s[1] == '\\' && s[2] == 'E') {
5478 PL_lex_state = LEX_INTERPCONCAT;
5483 if ( memBEGINs(s, (STRLEN) (PL_bufend - s), "L\\u")
5484 || memBEGINs(s, (STRLEN) (PL_bufend - s), "U\\l"))
5486 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
5488 if ((*s == 'L' || *s == 'U' || *s == 'F')
5489 && (strpbrk(PL_lex_casestack, "LUF")))
5491 PL_lex_casestack[--PL_lex_casemods] = '\0';
5492 PL_lex_allbrackets--;
5493 return REPORT(PERLY_PAREN_CLOSE);
5495 if (PL_lex_casemods > 10)
5496 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
5497 PL_lex_casestack[PL_lex_casemods++] = *s;
5498 PL_lex_casestack[PL_lex_casemods] = '\0';
5499 PL_lex_state = LEX_INTERPCONCAT;
5500 NEXTVAL_NEXTTOKE.ival = 0;
5501 force_next((2<<24)|PERLY_PAREN_OPEN);
5503 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
5505 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
5507 NEXTVAL_NEXTTOKE.ival = OP_LC;
5509 NEXTVAL_NEXTTOKE.ival = OP_UC;
5511 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
5513 NEXTVAL_NEXTTOKE.ival = OP_FC;
5515 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
5519 if (PL_lex_starts) {
5522 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5523 if (PL_lex_casemods == 1 && PL_lex_inpat)
5526 AopNOASSIGN(OP_CONCAT);
5534 yyl_secondclass_keyword(pTHX_ char *s, STRLEN len, int key, I32 *orig_keyword,
5535 GV **pgv, GV ***pgvp)
5537 GV *ogv = NULL; /* override (winner) */
5538 GV *hgv = NULL; /* hidden (loser) */
5541 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5543 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5544 (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
5546 && (cv = GvCVu(gv)))
5548 if (GvIMPORTED_CV(gv))
5550 else if (! CvNOWARN_AMBIGUOUS(cv))
5554 && (*pgvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf, len, FALSE))
5556 && (isGV_with_GP(gv)
5557 ? GvCVu(gv) && GvIMPORTED_CV(gv)
5558 : SvPCS_IMPORTED(gv)
5559 && (gv_init(gv, PL_globalstash, PL_tokenbuf,
5569 *orig_keyword = key;
5570 return 0; /* overridden by import or by GLOBAL */
5572 else if (gv && !*pgvp
5573 && -key==KEY_lock /* XXX generalizable kludge */
5576 return 0; /* any sub overrides "weak" keyword */
5578 else { /* no override */
5580 if (key == KEY_dump) {
5581 Perl_croak(aTHX_ "dump() must be written as CORE::dump() as of Perl 5.30");
5585 if (hgv && key != KEY_x) /* never ambiguous */
5586 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5587 "Ambiguous call resolved as CORE::%s(), "
5588 "qualify as such or use &",
5595 yyl_qw(pTHX_ char *s, STRLEN len)
5599 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5601 missingterm(NULL, 0);
5603 COPLINE_SET_FROM_MULTI_END;
5604 PL_expect = XOPERATOR;
5605 if (SvCUR(PL_lex_stuff)) {
5606 int warned_comma = !ckWARN(WARN_QW);
5607 int warned_comment = warned_comma;
5608 char *d = SvPV_force(PL_lex_stuff, len);
5610 for (; isSPACE(*d) && len; --len, ++d)
5615 if (!warned_comma || !warned_comment) {
5616 for (; !isSPACE(*d) && len; --len, ++d) {
5617 if (!warned_comma && *d == ',') {
5618 Perl_warner(aTHX_ packWARN(WARN_QW),
5619 "Possible attempt to separate words with commas");
5622 else if (!warned_comment && *d == '#') {
5623 Perl_warner(aTHX_ packWARN(WARN_QW),
5624 "Possible attempt to put comments in qw() list");
5630 for (; !isSPACE(*d) && len; --len, ++d)
5633 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
5634 words = op_append_elem(OP_LIST, words,
5635 newSVOP(OP_CONST, 0, tokeq(sv)));
5640 words = newNULLLIST();
5641 SvREFCNT_dec_NN(PL_lex_stuff);
5642 PL_lex_stuff = NULL;
5643 PL_expect = XOPERATOR;
5644 pl_yylval.opval = sawparens(words);
5649 yyl_hyphen(pTHX_ char *s)
5651 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5659 while (s < PL_bufend && SPACE_OR_TAB(*s))
5662 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=>")) {
5663 s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE);
5664 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5665 OPERATOR(PERLY_MINUS); /* unary minus */
5668 case 'r': ftst = OP_FTEREAD; break;
5669 case 'w': ftst = OP_FTEWRITE; break;
5670 case 'x': ftst = OP_FTEEXEC; break;
5671 case 'o': ftst = OP_FTEOWNED; break;
5672 case 'R': ftst = OP_FTRREAD; break;
5673 case 'W': ftst = OP_FTRWRITE; break;
5674 case 'X': ftst = OP_FTREXEC; break;
5675 case 'O': ftst = OP_FTROWNED; break;
5676 case 'e': ftst = OP_FTIS; break;
5677 case 'z': ftst = OP_FTZERO; break;
5678 case 's': ftst = OP_FTSIZE; break;
5679 case 'f': ftst = OP_FTFILE; break;
5680 case 'd': ftst = OP_FTDIR; break;
5681 case 'l': ftst = OP_FTLINK; break;
5682 case 'p': ftst = OP_FTPIPE; break;
5683 case 'S': ftst = OP_FTSOCK; break;
5684 case 'u': ftst = OP_FTSUID; break;
5685 case 'g': ftst = OP_FTSGID; break;
5686 case 'k': ftst = OP_FTSVTX; break;
5687 case 'b': ftst = OP_FTBLK; break;
5688 case 'c': ftst = OP_FTCHR; break;
5689 case 't': ftst = OP_FTTTY; break;
5690 case 'T': ftst = OP_FTTEXT; break;
5691 case 'B': ftst = OP_FTBINARY; break;
5692 case 'M': case 'A': case 'C':
5693 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5695 case 'M': ftst = OP_FTMTIME; break;
5696 case 'A': ftst = OP_FTATIME; break;
5697 case 'C': ftst = OP_FTCTIME; break;
5705 PL_last_uni = PL_oldbufptr;
5706 PL_last_lop_op = (OPCODE)ftst;
5708 PerlIO_printf(Perl_debug_log, "### Saw file test %c\n", (int)tmp);
5713 /* Assume it was a minus followed by a one-letter named
5714 * subroutine call (or a -bareword), then. */
5716 PerlIO_printf(Perl_debug_log,
5717 "### '-%c' looked like a file test but was not\n",
5724 const char tmp = *s++;
5727 if (PL_expect == XOPERATOR)
5732 else if (*s == '>') {
5735 if (((*s == '$' || *s == '&') && s[1] == '*')
5736 ||(*s == '$' && s[1] == '#' && s[2] == '*')
5737 ||((*s == '@' || *s == '%') && memCHRs("*[{", s[1]))
5738 ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5741 PL_expect = XPOSTDEREF;
5744 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5745 s = force_word(s,METHCALL0,FALSE,TRUE);
5753 if (PL_expect == XOPERATOR) {
5755 && !PL_lex_allbrackets
5756 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5764 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5766 OPERATOR(PERLY_MINUS); /* unary minus */
5772 yyl_plus(pTHX_ char *s)
5774 const char tmp = *s++;
5777 if (PL_expect == XOPERATOR)
5782 if (PL_expect == XOPERATOR) {
5784 && !PL_lex_allbrackets
5785 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5793 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5795 OPERATOR(PERLY_PLUS);
5800 yyl_star(pTHX_ char *s)
5802 if (PL_expect == XPOSTDEREF)
5803 POSTDEREF(PERLY_STAR);
5805 if (PL_expect != XOPERATOR) {
5806 s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5807 PL_expect = XOPERATOR;
5808 force_ident(PL_tokenbuf, PERLY_STAR);
5817 if (*s == '=' && !PL_lex_allbrackets
5818 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5827 && !PL_lex_allbrackets
5828 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5838 yyl_percent(pTHX_ char *s)
5840 if (PL_expect == XOPERATOR) {
5842 && !PL_lex_allbrackets
5843 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5850 else if (PL_expect == XPOSTDEREF)
5851 POSTDEREF(PERLY_PERCENT_SIGN);
5853 PL_tokenbuf[0] = '%';
5854 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5856 if (!PL_tokenbuf[1]) {
5857 PREREF(PERLY_PERCENT_SIGN);
5859 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5860 && intuit_more(s, PL_bufend)) {
5862 PL_tokenbuf[0] = '@';
5864 PL_expect = XOPERATOR;
5865 force_ident_maybe_lex('%');
5866 TERM(PERLY_PERCENT_SIGN);
5870 yyl_caret(pTHX_ char *s)
5873 const bool bof = cBOOL(FEATURE_BITWISE_IS_ENABLED);
5874 if (bof && s[1] == '.')
5876 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5877 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5883 BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
5887 yyl_colon(pTHX_ char *s)
5891 switch (PL_expect) {
5893 if (!PL_in_my || (PL_lex_state != LEX_NORMAL && !PL_lex_brackets))
5895 PL_bufptr = s; /* update in case we back off */
5898 "Use of := for an empty attribute list is not allowed");
5905 PL_expect = XTERMBLOCK;
5907 /* NB: as well as parsing normal attributes, we also end up
5908 * here if there is something looking like attributes
5909 * following a signature (which is illegal, but used to be
5910 * legal in 5.20..5.26). If the latter, we still parse the
5911 * attributes so that error messages(s) are less confusing,
5912 * but ignore them (parser->sig_seen).
5916 while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5917 bool sig = PL_parser->sig_seen;
5921 char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5922 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5923 if (tmp < 0) tmp = -tmp;
5938 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5940 d = scan_str(d,TRUE,TRUE,FALSE,NULL);
5945 Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list");
5947 COPLINE_SET_FROM_MULTI_END;
5950 sv_catsv(sv, PL_lex_stuff);
5951 attrs = op_append_elem(OP_LIST, attrs,
5952 newSVOP(OP_CONST, 0, sv));
5953 SvREFCNT_dec_NN(PL_lex_stuff);
5954 PL_lex_stuff = NULL;
5957 /* NOTE: any CV attrs applied here need to be part of
5958 the CVf_BUILTIN_ATTRS define in cv.h! */
5959 if (!PL_in_my && memEQs(SvPVX(sv), len, "lvalue")) {
5962 CvLVALUE_on(PL_compcv);
5964 else if (!PL_in_my && memEQs(SvPVX(sv), len, "method")) {
5967 CvNOWARN_AMBIGUOUS_on(PL_compcv);
5969 else if (!PL_in_my && memEQs(SvPVX(sv), len, "const")) {
5972 Perl_ck_warner_d(aTHX_
5973 packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
5974 ":const is experimental"
5976 CvANONCONST_on(PL_compcv);
5977 if (!CvANON(PL_compcv))
5978 yyerror(":const is not permitted on named "
5982 /* After we've set the flags, it could be argued that
5983 we don't need to do the attributes.pm-based setting
5984 process, and shouldn't bother appending recognized
5985 flags. To experiment with that, uncomment the
5986 following "else". (Note that's already been
5987 uncommented. That keeps the above-applied built-in
5988 attributes from being intercepted (and possibly
5989 rejected) by a package's attribute routines, but is
5990 justified by the performance win for the common case
5991 of applying only built-in attributes.) */
5993 attrs = op_append_elem(OP_LIST, attrs,
5994 newSVOP(OP_CONST, 0,
5998 if (*s == ':' && s[1] != ':')
6001 break; /* require real whitespace or :'s */
6002 /* XXX losing whitespace on sequential attributes here */
6007 && !(PL_expect == XOPERATOR
6008 ? (*s == '=' || *s == ')')
6009 : (*s == '{' || *s == '(')))
6011 const char q = ((*s == '\'') ? '"' : '\'');
6012 /* If here for an expression, and parsed no attrs, back off. */
6013 if (PL_expect == XOPERATOR && !attrs) {
6017 /* MUST advance bufptr here to avoid bogus "at end of line"
6018 context messages from yyerror().
6021 yyerror( (const char *)
6023 ? Perl_form(aTHX_ "Invalid separator character "
6024 "%c%c%c in attribute list", q, *s, q)
6025 : "Unterminated attribute list" ) );
6028 OPERATOR(PERLY_COLON);
6032 if (PL_parser->sig_seen) {
6033 /* see comment about about sig_seen and parser error
6037 Perl_croak(aTHX_ "Subroutine attributes must come "
6038 "before the signature");
6041 NEXTVAL_NEXTTOKE.opval = attrs;
6047 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
6052 PL_lex_allbrackets--;
6053 OPERATOR(PERLY_COLON);
6057 yyl_subproto(pTHX_ char *s, CV *cv)
6059 STRLEN protolen = CvPROTOLEN(cv);
6060 const char *proto = CvPROTO(cv);
6063 proto = S_strip_spaces(aTHX_ proto, &protolen);
6066 if ((optional = *proto == ';')) {
6069 } while (*proto == ';');
6075 *proto == '$' || *proto == '_'
6076 || *proto == '*' || *proto == '+'
6081 *proto == '\\' && proto[1] && proto[2] == '\0'
6084 UNIPROTO(UNIOPSUB,optional);
6087 if (*proto == '\\' && proto[1] == '[') {
6088 const char *p = proto + 2;
6089 while(*p && *p != ']')
6091 if(*p == ']' && !p[1])
6092 UNIPROTO(UNIOPSUB,optional);
6095 if (*proto == '&' && *s == '{') {
6097 sv_setpvs(PL_subname, "__ANON__");
6099 sv_setpvs(PL_subname, "__ANON__::__ANON__");
6100 if (!PL_lex_allbrackets
6101 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6103 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
6112 yyl_leftcurly(pTHX_ char *s, const U8 formbrack)
6115 if (PL_lex_brackets > 100) {
6116 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6119 switch (PL_expect) {
6122 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6123 PL_lex_allbrackets++;
6124 OPERATOR(HASHBRACK);
6126 while (s < PL_bufend && SPACE_OR_TAB(*s))
6129 PL_tokenbuf[0] = '\0';
6130 if (d < PL_bufend && *d == '-') {
6131 PL_tokenbuf[0] = '-';
6133 while (d < PL_bufend && SPACE_OR_TAB(*d))
6136 if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
6138 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6140 while (d < PL_bufend && SPACE_OR_TAB(*d))
6143 const char minus = (PL_tokenbuf[0] == '-');
6144 s = force_word(s + minus, BAREWORD, FALSE, TRUE);
6146 force_next(PERLY_MINUS);
6152 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6153 PL_lex_allbrackets++;
6158 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6159 PL_lex_allbrackets++;
6163 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6164 PL_lex_allbrackets++;
6169 if (PL_oldoldbufptr == PL_last_lop)
6170 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6172 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6173 PL_lex_allbrackets++;
6176 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6178 /* This hack is to get the ${} in the message. */
6180 yyerror("syntax error");
6183 OPERATOR(HASHBRACK);
6185 if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
6186 /* ${...} or @{...} etc., but not print {...}
6187 * Skip the disambiguation and treat this as a block.
6189 goto block_expectation;
6191 /* This hack serves to disambiguate a pair of curlies
6192 * as being a block or an anon hash. Normally, expectation
6193 * determines that, but in cases where we're not in a
6194 * position to expect anything in particular (like inside
6195 * eval"") we have to resolve the ambiguity. This code
6196 * covers the case where the first term in the curlies is a
6197 * quoted string. Most other cases need to be explicitly
6198 * disambiguated by prepending a "+" before the opening
6199 * curly in order to force resolution as an anon hash.
6201 * XXX should probably propagate the outer expectation
6202 * into eval"" to rely less on this hack, but that could
6203 * potentially break current behavior of eval"".
6207 if (*s == '\'' || *s == '"' || *s == '`') {
6208 /* common case: get past first string, handling escapes */
6209 for (t++; t < PL_bufend && *t != *s;)
6214 else if (*s == 'q') {
6217 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6218 && !isWORDCHAR(*t))))
6220 /* skip q//-like construct */
6222 char open, close, term;
6225 while (t < PL_bufend && isSPACE(*t))
6227 /* check for q => */
6228 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6229 OPERATOR(HASHBRACK);
6233 if (term && (tmps = memCHRs("([{< )]}> )]}>",term)))
6237 for (t++; t < PL_bufend; t++) {
6238 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6240 else if (*t == open)
6244 for (t++; t < PL_bufend; t++) {
6245 if (*t == '\\' && t+1 < PL_bufend)
6247 else if (*t == close && --brackets <= 0)
6249 else if (*t == open)
6256 /* skip plain q word */
6257 while ( t < PL_bufend
6258 && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6260 t += UTF ? UTF8SKIP(t) : 1;
6263 else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
6264 t += UTF ? UTF8SKIP(t) : 1;
6265 while ( t < PL_bufend
6266 && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6268 t += UTF ? UTF8SKIP(t) : 1;
6271 while (t < PL_bufend && isSPACE(*t))
6273 /* if comma follows first term, call it an anon hash */
6274 /* XXX it could be a comma expression with loop modifiers */
6275 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6276 || (*t == '=' && t[1] == '>')))
6277 OPERATOR(HASHBRACK);
6278 if (PL_expect == XREF) {
6280 /* If there is an opening brace or 'sub:', treat it
6281 as a term to make ${{...}}{k} and &{sub:attr...}
6282 dwim. Otherwise, treat it as a statement, so
6283 map {no strict; ...} works.
6290 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "sub")) {
6303 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6310 pl_yylval.ival = CopLINE(PL_curcop);
6311 PL_copline = NOLINE; /* invalidate current command line number */
6312 TOKEN(formbrack ? PERLY_EQUAL_SIGN : PERLY_BRACE_OPEN);
6316 yyl_rightcurly(pTHX_ char *s, const U8 formbrack)
6318 assert(s != PL_bufend);
6321 if (PL_lex_brackets <= 0)
6322 /* diag_listed_as: Unmatched right %s bracket */
6323 yyerror("Unmatched right curly bracket");
6325 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6327 PL_lex_allbrackets--;
6329 if (PL_lex_state == LEX_INTERPNORMAL) {
6330 if (PL_lex_brackets == 0) {
6331 if (PL_expect & XFAKEBRACK) {
6332 PL_expect &= XENUMMASK;
6333 PL_lex_state = LEX_INTERPEND;
6335 return yylex(); /* ignore fake brackets */
6337 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6338 && SvEVALED(PL_lex_repl))
6339 PL_lex_state = LEX_INTERPEND;
6340 else if (*s == '-' && s[1] == '>')
6341 PL_lex_state = LEX_INTERPENDMAYBE;
6342 else if (*s != '[' && *s != '{')
6343 PL_lex_state = LEX_INTERPEND;
6347 if (PL_expect & XFAKEBRACK) {
6348 PL_expect &= XENUMMASK;
6350 return yylex(); /* ignore fake brackets */
6353 force_next(formbrack ? PERLY_DOT : PERLY_BRACE_CLOSE);
6354 if (formbrack) LEAVE_with_name("lex_format");
6355 if (formbrack == 2) { /* means . where arguments were expected */
6356 force_next(PERLY_SEMICOLON);
6360 TOKEN(PERLY_SEMICOLON);
6364 yyl_ampersand(pTHX_ char *s)
6366 if (PL_expect == XPOSTDEREF)
6367 POSTDEREF(PERLY_AMPERSAND);
6371 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6372 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6380 if (PL_expect == XOPERATOR) {
6383 if ( PL_bufptr == PL_linestart
6384 && ckWARN(WARN_SEMICOLON)
6385 && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
6387 CopLINE_dec(PL_curcop);
6388 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6389 CopLINE_inc(PL_curcop);
6392 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6394 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6395 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6401 BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
6406 PL_tokenbuf[0] = '&';
6407 s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
6408 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6411 force_ident_maybe_lex('&');
6413 PREREF(PERLY_AMPERSAND);
6415 TERM(PERLY_AMPERSAND);
6419 yyl_verticalbar(pTHX_ char *s)
6426 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6427 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6436 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6439 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6440 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6445 BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
6449 yyl_bang(pTHX_ char *s)
6451 const char tmp = *s++;
6453 /* was this !=~ where !~ was meant?
6454 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6456 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6457 const char *t = s+1;
6459 while (t < PL_bufend && isSPACE(*t))
6462 if (*t == '/' || *t == '?'
6463 || ((*t == 'm' || *t == 's' || *t == 'y')
6464 && !isWORDCHAR(t[1]))
6465 || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6466 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6467 "!=~ should be !~");
6470 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6482 OPERATOR(PERLY_EXCLAMATION_MARK);
6486 yyl_snail(pTHX_ char *s)
6488 if (PL_expect == XPOSTDEREF)
6489 POSTDEREF(PERLY_SNAIL);
6490 PL_tokenbuf[0] = '@';
6491 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6492 if (PL_expect == XOPERATOR) {
6494 if (PL_bufptr > s) {
6496 PL_bufptr = PL_oldbufptr;
6501 if (!PL_tokenbuf[1]) {
6502 PREREF(PERLY_SNAIL);
6504 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6506 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6507 && intuit_more(s, PL_bufend))
6510 PL_tokenbuf[0] = '%';
6512 /* Warn about @ where they meant $. */
6513 if (*s == '[' || *s == '{') {
6514 if (ckWARN(WARN_SYNTAX)) {
6515 S_check_scalar_slice(aTHX_ s);
6519 PL_expect = XOPERATOR;
6520 force_ident_maybe_lex('@');
6525 yyl_slash(pTHX_ char *s)
6527 if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6528 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6529 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6534 else if (PL_expect == XOPERATOR) {
6536 if (*s == '=' && !PL_lex_allbrackets
6537 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6545 /* Disable warning on "study /blah/" */
6546 if ( PL_oldoldbufptr == PL_last_uni
6547 && ( *PL_last_uni != 's' || s - PL_last_uni < 5
6548 || memNE(PL_last_uni, "study", 5)
6549 || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF)
6552 s = scan_pat(s,OP_MATCH);
6553 TERM(sublex_start());
6558 yyl_leftsquare(pTHX_ char *s)
6560 if (PL_lex_brackets > 100)
6561 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6562 PL_lex_brackstack[PL_lex_brackets++] = 0;
6563 PL_lex_allbrackets++;
6565 OPERATOR(PERLY_BRACKET_OPEN);
6569 yyl_rightsquare(pTHX_ char *s)
6571 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6574 if (PL_lex_brackets <= 0)
6575 /* diag_listed_as: Unmatched right %s bracket */
6576 yyerror("Unmatched right square bracket");
6579 PL_lex_allbrackets--;
6580 if (PL_lex_state == LEX_INTERPNORMAL) {
6581 if (PL_lex_brackets == 0) {
6582 if (*s == '-' && s[1] == '>')
6583 PL_lex_state = LEX_INTERPENDMAYBE;
6584 else if (*s != '[' && *s != '{')
6585 PL_lex_state = LEX_INTERPEND;
6588 TERM(PERLY_BRACKET_CLOSE);
6592 yyl_tilde(pTHX_ char *s)
6595 if (s[1] == '~' && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)) {
6596 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6599 Perl_ck_warner_d(aTHX_
6600 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
6601 "Smartmatch is experimental");
6602 NCEop(OP_SMARTMATCH);
6605 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
6607 BCop(OP_SCOMPLEMENT);
6609 BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
6613 yyl_leftparen(pTHX_ char *s)
6615 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6616 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
6620 PL_lex_allbrackets++;
6621 TOKEN(PERLY_PAREN_OPEN);
6625 yyl_rightparen(pTHX_ char *s)
6627 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6630 PL_lex_allbrackets--;
6633 PREBLOCK(PERLY_PAREN_CLOSE);
6634 TERM(PERLY_PAREN_CLOSE);
6638 yyl_leftpointy(pTHX_ char *s)
6642 if (PL_expect != XOPERATOR) {
6643 if (s[1] != '<' && !memchr(s,'>', PL_bufend - s))
6645 if (s[1] == '<' && s[2] != '>')
6646 s = scan_heredoc(s);
6648 s = scan_inputsymbol(s);
6649 PL_expect = XOPERATOR;
6650 TOKEN(sublex_start());
6657 if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6661 SHop(OP_LEFT_SHIFT);
6666 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6673 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6681 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6690 yyl_rightpointy(pTHX_ char *s)
6692 const char tmp = *s++;
6695 if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6699 SHop(OP_RIGHT_SHIFT);
6701 else if (tmp == '=') {
6702 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6710 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6719 yyl_sglquote(pTHX_ char *s)
6721 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6723 missingterm(NULL, 0);
6724 COPLINE_SET_FROM_MULTI_END;
6725 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6726 if (PL_expect == XOPERATOR) {
6729 pl_yylval.ival = OP_CONST;
6730 TERM(sublex_start());
6734 yyl_dblquote(pTHX_ char *s)
6738 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6741 printbuf("### Saw string before %s\n", s);
6743 PerlIO_printf(Perl_debug_log,
6744 "### Saw unterminated string\n");
6746 if (PL_expect == XOPERATOR) {
6750 missingterm(NULL, 0);
6751 pl_yylval.ival = OP_CONST;
6752 /* FIXME. I think that this can be const if char *d is replaced by
6753 more localised variables. */
6754 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6755 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6756 pl_yylval.ival = OP_STRINGIFY;
6760 if (pl_yylval.ival == OP_CONST)
6761 COPLINE_SET_FROM_MULTI_END;
6762 TERM(sublex_start());
6766 yyl_backtick(pTHX_ char *s)
6768 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6771 printbuf("### Saw backtick string before %s\n", s);
6773 PerlIO_printf(Perl_debug_log,
6774 "### Saw unterminated backtick string\n");
6776 if (PL_expect == XOPERATOR)
6777 no_op("Backticks",s);
6779 missingterm(NULL, 0);
6780 pl_yylval.ival = OP_BACKTICK;
6781 TERM(sublex_start());
6785 yyl_backslash(pTHX_ char *s)
6787 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr && isDIGIT(*s))
6788 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6790 if (PL_expect == XOPERATOR)
6791 no_op("Backslash",s);
6796 yyl_data_handle(pTHX)
6798 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
6801 GV *gv = (GV *)*hv_fetchs(stash, "DATA", 1);
6804 gv_init(gv,stash,"DATA",4,0);
6808 GvIOp(gv) = newIO();
6809 IoIFP(GvIOp(gv)) = PL_rsfp;
6811 /* Mark this internal pseudo-handle as clean */
6812 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6813 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6814 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6816 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6818 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6819 /* if the script was opened in binmode, we need to revert
6820 * it to text mode for compatibility; but only iff it has CRs
6821 * XXX this is a questionable hack at best. */
6822 if (PL_bufend-PL_bufptr > 2
6823 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6826 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6827 loc = PerlIO_tell(PL_rsfp);
6828 (void)PerlIO_seek(PL_rsfp, 0L, 0);
6830 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
6832 PerlIO_seek(PL_rsfp, loc, 0);
6837 #ifdef PERLIO_LAYERS
6840 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6847 PERL_STATIC_NO_RET void yyl_croak_unrecognised(pTHX_ char*)
6848 __attribute__noreturn__;
6850 PERL_STATIC_NO_RET void
6851 yyl_croak_unrecognised(pTHX_ char *s)
6853 SV *dsv = newSVpvs_flags("", SVs_TEMP);
6859 STRLEN skiplen = UTF8SKIP(s);
6860 STRLEN stravail = PL_bufend - s;
6861 c = sv_uni_display(dsv, newSVpvn_flags(s,
6862 skiplen > stravail ? stravail : skiplen,
6863 SVs_TEMP | SVf_UTF8),
6864 10, UNI_DISPLAY_ISPRINT);
6867 c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
6870 if (s >= PL_linestart) {
6874 /* somehow (probably due to a parse failure), PL_linestart has advanced
6875 * pass PL_bufptr, get a reasonable beginning of line
6878 while (d > SvPVX(PL_linestr) && d[-1] && d[-1] != '\n')
6881 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d);
6882 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
6883 d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT;
6886 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
6887 UTF8fARG(UTF, (s - d), d),
6892 yyl_require(pTHX_ char *s, I32 orig_keyword)
6896 s = force_version(s, FALSE);
6898 else if (*s != 'v' || !isDIGIT(s[1])
6899 || (s = force_version(s, TRUE), *s == 'v'))
6901 *PL_tokenbuf = '\0';
6902 s = force_word(s,BAREWORD,TRUE,TRUE);
6903 if (isIDFIRST_lazy_if_safe(PL_tokenbuf,
6904 PL_tokenbuf + sizeof(PL_tokenbuf),
6907 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
6908 GV_ADD | (UTF ? SVf_UTF8 : 0));
6911 yyerror("<> at require-statement should be quotes");
6914 if (orig_keyword == KEY_require)
6919 PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
6921 PL_last_uni = PL_oldbufptr;
6922 PL_last_lop_op = OP_REQUIRE;
6924 return REPORT( (int)KW_REQUIRE );
6928 yyl_foreach(pTHX_ char *s)
6930 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6932 pl_yylval.ival = CopLINE(PL_curcop);
6934 if (PL_expect == XSTATE && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6936 SSize_t s_off = s - SvPVX(PL_linestr);
6937 bool paren_is_valid = FALSE;
6938 bool maybe_package = FALSE;
6939 bool saw_core = FALSE;
6940 bool core_valid = FALSE;
6942 if (UNLIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "CORE::"))) {
6946 if (LIKELY(memBEGINPs(p, (STRLEN) (PL_bufend - p), "my"))) {
6948 paren_is_valid = TRUE;
6949 if (isSPACE(p[2])) {
6950 p = skipspace(p + 3);
6951 maybe_package = TRUE;
6957 else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "our")) {
6959 if (isSPACE(p[3])) {
6960 p = skipspace(p + 4);
6961 maybe_package = TRUE;
6967 else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "state")) {
6969 if (isSPACE(p[5])) {
6970 p = skipspace(p + 6);
6976 if (saw_core && !core_valid) {
6977 Perl_croak(aTHX_ "Missing $ on loop variable");
6980 if (maybe_package && !saw_core) {
6981 /* skip optional package name, as in "for my abc $x (..)" */
6982 if (UNLIKELY(isIDFIRST_lazy_if_safe(p, PL_bufend, UTF))) {
6984 p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6986 paren_is_valid = FALSE;
6990 if (UNLIKELY(paren_is_valid && *p == '(')) {
6991 Perl_ck_warner_d(aTHX_
6992 packWARN(WARN_EXPERIMENTAL__FOR_LIST),
6993 "for my (...) is experimental");
6995 else if (UNLIKELY(*p != '$' && *p != '\\')) {
6996 /* "for myfoo (" will end up here, but with p pointing at the 'f' */
6997 Perl_croak(aTHX_ "Missing $ on loop variable");
6999 /* The buffer may have been reallocated, update s */
7000 s = SvPVX(PL_linestr) + s_off;
7006 yyl_do(pTHX_ char *s, I32 orig_keyword)
7010 PRETERMBLOCK(KW_DO);
7015 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7017 if (len && memNEs(PL_tokenbuf+1, len, "CORE")
7018 && !keyword(PL_tokenbuf + 1, len, 0)) {
7019 SSize_t off = s-SvPVX(PL_linestr);
7021 s = SvPVX(PL_linestr)+off;
7023 force_ident_maybe_lex('&');
7028 if (orig_keyword == KEY_do)
7036 yyl_my(pTHX_ char *s, I32 my)
7040 yyerror(Perl_form(aTHX_
7041 "Can't redeclare \"%s\" in \"%s\"",
7042 my == KEY_my ? "my" :
7043 my == KEY_state ? "state" : "our",
7044 PL_in_my == KEY_my ? "my" :
7045 PL_in_my == KEY_state ? "state" : "our"));
7049 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
7051 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
7052 if (memEQs(PL_tokenbuf, len, "sub"))
7053 return yyl_sub(aTHX_ s, my);
7054 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
7055 if (!PL_in_my_stash) {
7059 i = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
7060 PERL_MY_SNPRINTF_POST_GUARD(i, sizeof(tmpbuf));
7061 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
7064 else if (*s == '\\') {
7065 if (!FEATURE_MYREF_IS_ENABLED)
7066 Perl_croak(aTHX_ "The experimental declared_refs "
7067 "feature is not enabled");
7068 Perl_ck_warner_d(aTHX_
7069 packWARN(WARN_EXPERIMENTAL__DECLARED_REFS),
7070 "Declaring references is experimental");
7075 static int yyl_try(pTHX_ char*);
7078 yyl_eol_needs_semicolon(pTHX_ char **ps)
7081 if (PL_lex_state != LEX_NORMAL
7082 || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
7084 const bool in_comment = *s == '#';
7086 if (*s == '#' && s == PL_linestart && PL_in_eval
7087 && !PL_rsfp && !PL_parser->filtered) {
7088 /* handle eval qq[#line 1 "foo"\n ...] */
7089 CopLINE_dec(PL_curcop);
7090 incline(s, PL_bufend);
7093 while (d < PL_bufend && *d != '\n')
7098 if (in_comment && d == PL_bufend
7099 && PL_lex_state == LEX_INTERPNORMAL
7100 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
7101 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
7103 incline(s, PL_bufend);
7104 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
7105 PL_lex_state = LEX_FORMLINE;
7106 force_next(FORMRBRACK);
7112 while (s < PL_bufend && *s != '\n')
7114 if (s < PL_bufend) {
7117 incline(s, PL_bufend);
7125 yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s)
7133 bof = cBOOL(PL_rsfp);
7136 PL_bufptr = PL_bufend;
7137 COPLINE_INC_WITH_HERELINES;
7138 if (!lex_next_chunk(fake_eof)) {
7139 CopLINE_dec(PL_curcop);
7141 TOKEN(PERLY_SEMICOLON); /* not infinite loop because rsfp is NULL now */
7143 CopLINE_dec(PL_curcop);
7145 /* If it looks like the start of a BOM or raw UTF-16,
7146 * check if it in fact is. */
7149 || *(U8*)s == BOM_UTF8_FIRST_BYTE
7153 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
7154 bof = (offset == (Off_t)SvCUR(PL_linestr));
7155 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
7156 /* offset may include swallowed CR */
7158 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
7161 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7162 s = swallow_bom((U8*)s);
7165 if (PL_parser->in_pod) {
7166 /* Incest with pod. */
7167 if ( memBEGINPs(s, (STRLEN) (PL_bufend - s), "=cut")
7170 SvPVCLEAR(PL_linestr);
7171 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
7172 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7173 PL_last_lop = PL_last_uni = NULL;
7174 PL_parser->in_pod = 0;
7177 if (PL_rsfp || PL_parser->filtered)
7178 incline(s, PL_bufend);
7179 } while (PL_parser->in_pod);
7181 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
7182 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7183 PL_last_lop = PL_last_uni = NULL;
7184 if (CopLINE(PL_curcop) == 1) {
7185 while (s < PL_bufend && isSPACE(*s))
7187 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
7191 if (*s == '#' && *(s+1) == '!')
7193 #ifdef ALTERNATE_SHEBANG
7195 static char const as[] = ALTERNATE_SHEBANG;
7196 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
7197 d = s + (sizeof(as) - 1);
7199 #endif /* ALTERNATE_SHEBANG */
7208 while (*d && !isSPACE(*d))
7212 #ifdef ARG_ZERO_IS_SCRIPT
7213 if (ipathend > ipath) {
7215 * HP-UX (at least) sets argv[0] to the script name,
7216 * which makes $^X incorrect. And Digital UNIX and Linux,
7217 * at least, set argv[0] to the basename of the Perl
7218 * interpreter. So, having found "#!", we'll set it right.
7220 SV* copfilesv = CopFILESV(PL_curcop);
7223 GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
7225 assert(SvPOK(x) || SvGMAGICAL(x));
7226 if (sv_eq(x, copfilesv)) {
7227 sv_setpvn(x, ipath, ipathend - ipath);
7233 const char *bstart = SvPV_const(copfilesv, blen);
7234 const char * const lstart = SvPV_const(x, llen);
7236 bstart += blen - llen;
7237 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
7238 sv_setpvn(x, ipath, ipathend - ipath);
7245 /* Anything to do if no copfilesv? */
7247 TAINT_NOT; /* $^X is always tainted, but that's OK */
7249 #endif /* ARG_ZERO_IS_SCRIPT */
7254 d = instr(s,"perl -");
7256 d = instr(s,"perl");
7258 /* avoid getting into infinite loops when shebang
7259 * line contains "Perl" rather than "perl" */
7261 for (d = ipathend-4; d >= ipath; --d) {
7262 if (isALPHA_FOLD_EQ(*d, 'p')
7263 && !ibcmp(d, "perl", 4))
7273 #ifdef ALTERNATE_SHEBANG
7275 * If the ALTERNATE_SHEBANG on this system starts with a
7276 * character that can be part of a Perl expression, then if
7277 * we see it but not "perl", we're probably looking at the
7278 * start of Perl code, not a request to hand off to some
7279 * other interpreter. Similarly, if "perl" is there, but
7280 * not in the first 'word' of the line, we assume the line
7281 * contains the start of the Perl program.
7283 if (d && *s != '#') {
7284 const char *c = ipath;
7285 while (*c && !memCHRs("; \t\r\n\f\v#", *c))
7288 d = NULL; /* "perl" not in first word; ignore */
7290 *s = '#'; /* Don't try to parse shebang line */
7292 #endif /* ALTERNATE_SHEBANG */
7297 && !instr(s,"indir")
7298 && instr(PL_origargv[0],"perl"))
7304 while (s < PL_bufend && isSPACE(*s))
7306 if (s < PL_bufend) {
7307 Newx(newargv,PL_origargc+3,char*);
7309 while (s < PL_bufend && !isSPACE(*s))
7312 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
7315 newargv = PL_origargv;
7318 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
7320 Perl_croak(aTHX_ "Can't exec %s", ipath);
7323 while (*d && !isSPACE(*d))
7325 while (SPACE_OR_TAB(*d))
7329 const bool switches_done = PL_doswitches;
7330 const U32 oldpdb = PL_perldb;
7331 const bool oldn = PL_minus_n;
7332 const bool oldp = PL_minus_p;
7336 bool baduni = FALSE;
7338 const char *d2 = d1 + 1;
7339 if (parse_unicode_opts((const char **)&d2)
7343 if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
7344 const char * const m = d1;
7345 while (*d1 && !isSPACE(*d1))
7347 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
7350 d1 = moreswitches(d1);
7352 if (PL_doswitches && !switches_done) {
7353 int argc = PL_origargc;
7354 char **argv = PL_origargv;
7357 } while (argc && argv[0][0] == '-' && argv[0][1]);
7358 init_argv_symbols(argc,argv);
7360 if ( (PERLDB_LINE_OR_SAVESRC && !oldpdb)
7361 || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
7362 /* if we have already added "LINE: while (<>) {",
7363 we must not do it again */
7365 SvPVCLEAR(PL_linestr);
7366 PL_bufptr = PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
7367 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7368 PL_last_lop = PL_last_uni = NULL;
7369 PL_preambled = FALSE;
7370 if (PERLDB_LINE_OR_SAVESRC)
7371 (void)gv_fetchfile(PL_origfilename);
7379 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
7380 PL_lex_state = LEX_FORMLINE;
7381 force_next(FORMRBRACK);
7382 TOKEN(PERLY_SEMICOLON);
7390 yyl_fatcomma(pTHX_ char *s, STRLEN len)
7394 = newSVOP(OP_CONST, 0,
7395 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7396 pl_yylval.opval->op_private = OPpCONST_BARE;
7401 yyl_safe_bareword(pTHX_ char *s, const char lastchar)
7403 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7404 && PL_parser->saw_infix_sigil)
7406 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7407 "Operator or semicolon missing before %c%" UTF8f,
7409 UTF8fARG(UTF, strlen(PL_tokenbuf),
7411 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7412 "Ambiguous use of %c resolved as operator %c",
7413 lastchar, lastchar);
7419 yyl_constant_op(pTHX_ char *s, SV *sv, CV *cv, OP *rv2cv_op, PADOFFSET off)
7423 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7424 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7425 if (SvTYPE(sv) == SVt_PVAV)
7426 pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7429 pl_yylval.opval->op_private = 0;
7430 pl_yylval.opval->op_folded = 1;
7431 pl_yylval.opval->op_flags |= OPf_SPECIAL;
7436 op_free(pl_yylval.opval);
7438 off ? newCVREF(0, rv2cv_op) : rv2cv_op;
7439 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7440 PL_last_lop = PL_oldbufptr;
7441 PL_last_lop_op = OP_ENTERSUB;
7443 /* Is there a prototype? */
7445 int k = yyl_subproto(aTHX_ s, cv);
7450 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7452 force_next(off ? PRIVATEREF : BAREWORD);
7453 if (!PL_lex_allbrackets
7454 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7456 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7462 /* Honour "reserved word" warnings, and enforce strict subs */
7464 yyl_strictwarn_bareword(pTHX_ const char lastchar)
7466 /* after "print" and similar functions (corresponding to
7467 * "F? L" in opcode.pl), whatever wasn't already parsed as
7468 * a filehandle should be subject to "strict subs".
7469 * Likewise for the optional indirect-object argument to system
7470 * or exec, which can't be a bareword */
7471 if ((PL_last_lop_op == OP_PRINT
7472 || PL_last_lop_op == OP_PRTF
7473 || PL_last_lop_op == OP_SAY
7474 || PL_last_lop_op == OP_SYSTEM
7475 || PL_last_lop_op == OP_EXEC)
7476 && (PL_hints & HINT_STRICT_SUBS))
7478 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7481 if (lastchar != '-' && ckWARN(WARN_RESERVED)) {
7482 char *d = PL_tokenbuf;
7485 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0)) {
7486 /* PL_warn_reserved is constant */
7487 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
7488 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7490 GCC_DIAG_RESTORE_STMT;
7496 yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 orig_keyword, struct code c)
7499 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7501 bool no_op_error = FALSE;
7502 /* Use this var to track whether intuit_method has been
7503 called. intuit_method returns 0 or > 255. */
7506 if (PL_expect == XOPERATOR) {
7507 if (PL_bufptr == PL_linestart) {
7508 CopLINE_dec(PL_curcop);
7509 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7510 CopLINE_inc(PL_curcop);
7513 /* We want to call no_op with s pointing after the
7514 bareword, so defer it. But we want it to come
7515 before the Bad name croak. */
7519 /* Get the rest if it looks like a package qualifier */
7521 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7523 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7526 no_op("Bareword",s);
7527 no_op_error = FALSE;
7530 Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
7531 UTF8fARG(UTF, len, PL_tokenbuf),
7532 *s == '\'' ? "'" : "::");
7538 no_op("Bareword",s);
7540 /* See if the name is "Foo::",
7541 in which case Foo is a bareword
7542 (and a package name). */
7544 if (len > 2 && PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':') {
7545 if (ckWARN(WARN_BAREWORD)
7546 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7547 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7548 "Bareword \"%" UTF8f
7549 "\" refers to nonexistent package",
7550 UTF8fARG(UTF, len, PL_tokenbuf));
7552 PL_tokenbuf[len] = '\0';
7561 /* if we saw a global override before, get the right name */
7564 c.sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len);
7566 SV *sv = newSVpvs("CORE::GLOBAL::");
7572 /* Presume this is going to be a bareword of some sort. */
7574 pl_yylval.opval = newSVOP(OP_CONST, 0, c.sv);
7575 pl_yylval.opval->op_private = OPpCONST_BARE;
7577 /* And if "Foo::", then that's what it certainly is. */
7579 return yyl_safe_bareword(aTHX_ s, lastchar);
7582 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(c.sv));
7583 const_op->op_private = OPpCONST_BARE;
7584 c.rv2cv_op = newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
7588 : SvROK(c.gv) && SvTYPE(SvRV(c.gv)) == SVt_PVCV
7591 : rv2cv_op_cv(c.rv2cv_op, RV2CVOPCV_RETURN_STUB);
7594 /* See if it's the indirect object for a list operator. */
7597 && PL_oldoldbufptr < PL_bufptr
7598 && (PL_oldoldbufptr == PL_last_lop
7599 || PL_oldoldbufptr == PL_last_uni)
7600 && /* NO SKIPSPACE BEFORE HERE! */
7602 || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
7605 bool immediate_paren = *s == '(';
7608 /* (Now we can afford to cross potential line boundary.) */
7611 /* intuit_method() can indirectly call lex_next_chunk(),
7614 s_off = s - SvPVX(PL_linestr);
7615 /* Two barewords in a row may indicate method call. */
7616 if ( ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
7618 && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7620 /* the code at method: doesn't use s */
7623 s = SvPVX(PL_linestr) + s_off;
7625 /* If not a declared subroutine, it's an indirect object. */
7626 /* (But it's an indir obj regardless for sort.) */
7627 /* Also, if "_" follows a filetest operator, it's a bareword */
7630 ( !immediate_paren && (PL_last_lop_op == OP_SORT
7632 && (PL_last_lop_op != OP_MAPSTART
7633 && PL_last_lop_op != OP_GREPSTART))))
7634 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7635 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
7639 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7640 yyl_strictwarn_bareword(aTHX_ lastchar);
7641 op_free(c.rv2cv_op);
7642 return yyl_safe_bareword(aTHX_ s, lastchar);
7646 PL_expect = XOPERATOR;
7649 /* Is this a word before a => operator? */
7650 if (*s == '=' && s[1] == '>' && !pkgname) {
7651 op_free(c.rv2cv_op);
7653 if (c.gvp || (c.lex && !c.off)) {
7654 assert (cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7655 /* This is our own scalar, created a few lines
7656 above, so this is safe. */
7657 SvREADONLY_off(c.sv);
7658 sv_setpv(c.sv, PL_tokenbuf);
7659 if (UTF && !IN_BYTES
7660 && is_utf8_string((U8*)PL_tokenbuf, len))
7662 SvREADONLY_on(c.sv);
7667 /* If followed by a paren, it's certainly a subroutine. */
7672 while (SPACE_OR_TAB(*d))
7674 if (*d == ')' && (c.sv = cv_const_sv_or_av(c.cv)))
7675 return yyl_constant_op(aTHX_ d + 1, c.sv, c.cv, c.rv2cv_op, c.off);
7677 NEXTVAL_NEXTTOKE.opval =
7678 c.off ? c.rv2cv_op : pl_yylval.opval;
7680 op_free(pl_yylval.opval), force_next(PRIVATEREF);
7681 else op_free(c.rv2cv_op), force_next(BAREWORD);
7683 TOKEN(PERLY_AMPERSAND);
7686 /* If followed by var or block, call it a method (unless sub) */
7688 if ((*s == '$' || *s == '{') && !c.cv && FEATURE_INDIRECT_IS_ENABLED) {
7689 op_free(c.rv2cv_op);
7690 PL_last_lop = PL_oldbufptr;
7691 PL_last_lop_op = OP_METHOD;
7692 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7693 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7694 PL_expect = XBLOCKTERM;
7696 return REPORT(METHCALL0);
7699 /* If followed by a bareword, see if it looks like indir obj. */
7703 && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$')
7704 && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7707 if (c.lex && !c.off) {
7708 assert(cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7709 SvREADONLY_off(c.sv);
7710 sv_setpvn(c.sv, PL_tokenbuf, len);
7711 if (UTF && !IN_BYTES
7712 && is_utf8_string((U8*)PL_tokenbuf, len))
7714 else SvUTF8_off(c.sv);
7716 op_free(c.rv2cv_op);
7717 if (key == METHCALL0 && !PL_lex_allbrackets
7718 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7720 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7725 /* Not a method, so call it a subroutine (if defined) */
7728 /* Check for a constant sub */
7729 c.sv = cv_const_sv_or_av(c.cv);
7730 return yyl_constant_op(aTHX_ s, c.sv, c.cv, c.rv2cv_op, c.off);
7733 /* Call it a bare word */
7735 if (PL_hints & HINT_STRICT_SUBS)
7736 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7738 yyl_strictwarn_bareword(aTHX_ lastchar);
7740 op_free(c.rv2cv_op);
7742 return yyl_safe_bareword(aTHX_ s, lastchar);
7746 yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct code c)
7749 default: /* not a keyword */
7750 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7753 FUN0OP( newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0)) );
7757 newSVOP(OP_CONST, 0,
7758 Perl_newSVpvf(aTHX_ "%" IVdf, (IV)CopLINE(PL_curcop)))
7761 case KEY___PACKAGE__:
7763 newSVOP(OP_CONST, 0, (PL_curstash
7764 ? newSVhek(HvNAME_HEK(PL_curstash))
7770 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D'))
7771 yyl_data_handle(aTHX);
7772 return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s);
7775 /* If !CvCLONE(PL_compcv) then rpeep will probably turn this into an
7776 * OP_CONST. We need to make it big enough to allow room for that if
7778 FUN0OP(CvCLONE(PL_compcv)
7779 ? newOP(OP_RUNCV, 0)
7780 : newSVOP(OP_RUNCV, 0, &PL_sv_undef));
7789 if (PL_expect == XSTATE)
7790 return yyl_sub(aTHX_ PL_bufptr, key);
7791 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7800 LOP(OP_ACCEPT,XTERM);
7803 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7808 LOP(OP_ATAN2,XTERM);
7814 LOP(OP_BINMODE,XTERM);
7817 LOP(OP_BLESS,XTERM);
7823 Perl_ck_warner_d(aTHX_
7824 packWARN(WARN_EXPERIMENTAL__TRY), "try/catch is experimental");
7831 /* We have to disambiguate the two senses of
7832 "continue". If the next token is a '{' then
7833 treat it as the start of a continue block;
7834 otherwise treat it as a control operator.
7838 PREBLOCK(KW_CONTINUE);
7844 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7854 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7863 LOP(OP_CRYPT,XTERM);
7866 LOP(OP_CHMOD,XTERM);
7869 LOP(OP_CHOWN,XTERM);
7872 LOP(OP_CONNECT,XTERM);
7884 PREBLOCK(KW_DEFAULT);
7887 Perl_ck_warner_d(aTHX_
7888 packWARN(WARN_EXPERIMENTAL__DEFER), "defer is experimental");
7892 return yyl_do(aTHX_ s, orig_keyword);
7895 PL_hints |= HINT_BLOCK_SCOPE;
7905 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7906 STR_WITH_LEN("NDBM_File::"),
7907 STR_WITH_LEN("DB_File::"),
7908 STR_WITH_LEN("GDBM_File::"),
7909 STR_WITH_LEN("SDBM_File::"),
7910 STR_WITH_LEN("ODBM_File::"),
7912 LOP(OP_DBMOPEN,XTERM);
7924 pl_yylval.ival = CopLINE(PL_curcop);
7928 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7940 if (*s == '{') { /* block eval */
7941 PL_expect = XTERMBLOCK;
7942 UNIBRACK(OP_ENTERTRY);
7944 else { /* string eval */
7946 UNIBRACK(OP_ENTEREVAL);
7951 UNIBRACK(-OP_ENTEREVAL);
7965 case KEY_endhostent:
7971 case KEY_endservent:
7974 case KEY_endprotoent:
7984 Perl_ck_warner_d(aTHX_
7985 packWARN(WARN_EXPERIMENTAL__TRY), "try/catch/finally is experimental");
7986 PREBLOCK(KW_FINALLY);
7990 return yyl_foreach(aTHX_ s);
7993 LOP(OP_FORMLINE,XTERM);
8002 LOP(OP_FCNTL,XTERM);
8008 LOP(OP_FLOCK,XTERM);
8011 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8016 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8021 LOP(OP_GREPSTART, XREF);
8038 case KEY_getpriority:
8039 LOP(OP_GETPRIORITY,XTERM);
8041 case KEY_getprotobyname:
8044 case KEY_getprotobynumber:
8045 LOP(OP_GPBYNUMBER,XTERM);
8047 case KEY_getprotoent:
8059 case KEY_getpeername:
8060 UNI(OP_GETPEERNAME);
8062 case KEY_gethostbyname:
8065 case KEY_gethostbyaddr:
8066 LOP(OP_GHBYADDR,XTERM);
8068 case KEY_gethostent:
8071 case KEY_getnetbyname:
8074 case KEY_getnetbyaddr:
8075 LOP(OP_GNBYADDR,XTERM);
8080 case KEY_getservbyname:
8081 LOP(OP_GSBYNAME,XTERM);
8083 case KEY_getservbyport:
8084 LOP(OP_GSBYPORT,XTERM);
8086 case KEY_getservent:
8089 case KEY_getsockname:
8090 UNI(OP_GETSOCKNAME);
8092 case KEY_getsockopt:
8093 LOP(OP_GSOCKOPT,XTERM);
8108 pl_yylval.ival = CopLINE(PL_curcop);
8109 Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8110 "given is experimental");
8114 LOP( orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB, XTERM );
8120 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8122 pl_yylval.ival = CopLINE(PL_curcop);
8126 LOP(OP_INDEX,XTERM);
8132 LOP(OP_IOCTL,XTERM);
8162 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8167 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8181 LOP(OP_LISTEN,XTERM);
8190 s = scan_pat(s,OP_MATCH);
8191 TERM(sublex_start());
8194 LOP(OP_MAPSTART, XREF);
8197 LOP(OP_MKDIR,XTERM);
8200 LOP(OP_MSGCTL,XTERM);
8203 LOP(OP_MSGGET,XTERM);
8206 LOP(OP_MSGRCV,XTERM);
8209 LOP(OP_MSGSND,XTERM);
8214 return yyl_my(aTHX_ s, key);
8220 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8225 s = tokenize_use(0, s);
8226 TOKEN(KW_USE_or_NO);
8229 if (*s == '(' || (s = skipspace(s), *s == '('))
8232 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8233 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8239 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8241 char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8242 for (t=d; isSPACE(*t);)
8244 if ( *t && memCHRs("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8246 && !(t[0] == '=' && t[1] == '>')
8247 && !(t[0] == ':' && t[1] == ':')
8248 && !keyword(s, d-s, 0)
8250 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8251 "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")",
8252 UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8258 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8260 pl_yylval.ival = OP_OR;
8270 LOP(OP_OPEN_DIR,XTERM);
8273 checkcomma(s,PL_tokenbuf,"filehandle");
8277 checkcomma(s,PL_tokenbuf,"filehandle");
8296 s = force_word(s,BAREWORD,FALSE,TRUE);
8298 s = force_strict_version(s);
8299 PREBLOCK(KW_PACKAGE);
8302 LOP(OP_PIPE_OP,XTERM);
8305 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8307 missingterm(NULL, 0);
8308 COPLINE_SET_FROM_MULTI_END;
8309 pl_yylval.ival = OP_CONST;
8310 TERM(sublex_start());
8316 return yyl_qw(aTHX_ s, len);
8319 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8321 missingterm(NULL, 0);
8322 pl_yylval.ival = OP_STRINGIFY;
8323 if (SvIVX(PL_lex_stuff) == '\'')
8324 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
8325 TERM(sublex_start());
8328 s = scan_pat(s,OP_QR);
8329 TERM(sublex_start());
8332 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8334 missingterm(NULL, 0);
8335 pl_yylval.ival = OP_BACKTICK;
8336 TERM(sublex_start());
8342 return yyl_require(aTHX_ s, orig_keyword);
8351 LOP(OP_RENAME,XTERM);
8360 LOP(OP_RINDEX,XTERM);
8369 UNIDOR(OP_READLINE);
8372 UNIDOR(OP_BACKTICK);
8381 LOP(OP_REVERSE,XTERM);
8384 UNIDOR(OP_READLINK);
8391 if (pl_yylval.opval)
8392 TERM(sublex_start());
8394 TOKEN(1); /* force error */
8397 checkcomma(s,PL_tokenbuf,"filehandle");
8407 LOP(OP_SELECT,XTERM);
8413 LOP(OP_SEMCTL,XTERM);
8416 LOP(OP_SEMGET,XTERM);
8419 LOP(OP_SEMOP,XTERM);
8425 LOP(OP_SETPGRP,XTERM);
8427 case KEY_setpriority:
8428 LOP(OP_SETPRIORITY,XTERM);
8430 case KEY_sethostent:
8436 case KEY_setservent:
8439 case KEY_setprotoent:
8449 LOP(OP_SEEKDIR,XTERM);
8451 case KEY_setsockopt:
8452 LOP(OP_SSOCKOPT,XTERM);
8458 LOP(OP_SHMCTL,XTERM);
8461 LOP(OP_SHMGET,XTERM);
8464 LOP(OP_SHMREAD,XTERM);
8467 LOP(OP_SHMWRITE,XTERM);
8470 LOP(OP_SHUTDOWN,XTERM);
8479 LOP(OP_SOCKET,XTERM);
8481 case KEY_socketpair:
8482 LOP(OP_SOCKPAIR,XTERM);
8485 checkcomma(s,PL_tokenbuf,"subroutine name");
8488 s = force_word(s,BAREWORD,TRUE,TRUE);
8492 LOP(OP_SPLIT,XTERM);
8495 LOP(OP_SPRINTF,XTERM);
8498 LOP(OP_SPLICE,XTERM);
8513 LOP(OP_SUBSTR,XTERM);
8517 return yyl_sub(aTHX_ s, key);
8520 LOP(OP_SYSTEM,XREF);
8523 LOP(OP_SYMLINK,XTERM);
8526 LOP(OP_SYSCALL,XTERM);
8529 LOP(OP_SYSOPEN,XTERM);
8532 LOP(OP_SYSSEEK,XTERM);
8535 LOP(OP_SYSREAD,XTERM);
8538 LOP(OP_SYSWRITE,XTERM);
8543 TERM(sublex_start());
8564 LOP(OP_TRUNCATE,XTERM);
8567 pl_yylval.ival = CopLINE(PL_curcop);
8568 Perl_ck_warner_d(aTHX_
8569 packWARN(WARN_EXPERIMENTAL__TRY), "try/catch is experimental");
8582 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8584 pl_yylval.ival = CopLINE(PL_curcop);
8588 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8590 pl_yylval.ival = CopLINE(PL_curcop);
8591 OPERATOR(KW_UNLESS);
8594 LOP(OP_UNLINK,XTERM);
8600 LOP(OP_UNPACK,XTERM);
8603 LOP(OP_UTIME,XTERM);
8609 LOP(OP_UNSHIFT,XTERM);
8612 s = tokenize_use(1, s);
8613 TOKEN(KW_USE_or_NO);
8622 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8624 pl_yylval.ival = CopLINE(PL_curcop);
8625 Perl_ck_warner_d(aTHX_
8626 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8627 "when is experimental");
8631 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8633 pl_yylval.ival = CopLINE(PL_curcop);
8637 PL_hints |= HINT_BLOCK_SCOPE;
8644 LOP(OP_WAITPID,XTERM);
8650 /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8651 * we use the same number on EBCDIC */
8652 gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8656 if (PL_expect == XOPERATOR) {
8657 if (*s == '=' && !PL_lex_allbrackets
8658 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8665 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
8668 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8670 pl_yylval.ival = OP_XOR;
8676 yyl_key_core(pTHX_ char *s, STRLEN len, struct code c)
8679 I32 orig_keyword = 0;
8683 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8684 if ((*s == ':' && s[1] == ':')
8685 || (!(key = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
8687 Copy(PL_bufptr, PL_tokenbuf, olen, char);
8688 return yyl_just_a_word(aTHX_ d, olen, 0, c);
8691 Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword",
8692 UTF8fARG(UTF, len, PL_tokenbuf));
8695 else if (key == KEY_require || key == KEY_do
8697 /* that's a way to remember we saw "CORE::" */
8700 /* Known to be a reserved word at this point */
8701 return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8705 yyl_keylookup(pTHX_ char *s, GV *gv)
8710 struct code c = no_code;
8711 I32 orig_keyword = 0;
8717 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8719 /* Some keywords can be followed by any delimiter, including ':' */
8720 anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
8722 /* x::* is just a word, unless x is "CORE" */
8723 if (!anydelim && *s == ':' && s[1] == ':') {
8724 if (memEQs(PL_tokenbuf, len, "CORE"))
8725 return yyl_key_core(aTHX_ s, len, c);
8726 return yyl_just_a_word(aTHX_ s, len, 0, c);
8730 while (d < PL_bufend && isSPACE(*d))
8731 d++; /* no comments skipped here, or s### is misparsed */
8733 /* Is this a word before a => operator? */
8734 if (*d == '=' && d[1] == '>') {
8735 return yyl_fatcomma(aTHX_ s, len);
8738 /* Check for plugged-in keyword */
8742 char *saved_bufptr = PL_bufptr;
8744 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
8746 if (result == KEYWORD_PLUGIN_DECLINE) {
8747 /* not a plugged-in keyword */
8748 PL_bufptr = saved_bufptr;
8749 } else if (result == KEYWORD_PLUGIN_STMT) {
8750 pl_yylval.opval = o;
8752 if (!PL_nexttoke) PL_expect = XSTATE;
8753 return REPORT(PLUGSTMT);
8754 } else if (result == KEYWORD_PLUGIN_EXPR) {
8755 pl_yylval.opval = o;
8757 if (!PL_nexttoke) PL_expect = XOPERATOR;
8758 return REPORT(PLUGEXPR);
8760 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", PL_tokenbuf);
8764 /* Is this a label? */
8765 if (!anydelim && PL_expect == XSTATE
8766 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8769 newSVOP(OP_CONST, 0,
8770 newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0));
8775 /* Check for lexical sub */
8776 if (PL_expect != XOPERATOR) {
8777 char tmpbuf[sizeof PL_tokenbuf + 1];
8779 Copy(PL_tokenbuf, tmpbuf+1, len, char);
8780 c.off = pad_findmy_pvn(tmpbuf, len+1, 0);
8781 if (c.off != NOT_IN_PAD) {
8782 assert(c.off); /* we assume this is boolean-true below */
8783 if (PAD_COMPNAME_FLAGS_isOUR(c.off)) {
8784 HV * const stash = PAD_COMPNAME_OURSTASH(c.off);
8785 HEK * const stashname = HvNAME_HEK(stash);
8786 c.sv = newSVhek(stashname);
8787 sv_catpvs(c.sv, "::");
8788 sv_catpvn_flags(c.sv, PL_tokenbuf, len,
8789 (UTF ? SV_CATUTF8 : SV_CATBYTES));
8790 c.gv = gv_fetchsv(c.sv, GV_NOADD_NOINIT | SvUTF8(c.sv),
8796 return yyl_just_a_word(aTHX_ s, len, 0, c);
8800 c.rv2cv_op = newOP(OP_PADANY, 0);
8801 c.rv2cv_op->op_targ = c.off;
8802 c.cv = find_lexical_cv(c.off);
8805 return yyl_just_a_word(aTHX_ s, len, 0, c);
8810 /* Check for built-in keyword */
8811 key = keyword(PL_tokenbuf, len, 0);
8814 key = yyl_secondclass_keyword(aTHX_ s, len, key, &orig_keyword, &c.gv, &c.gvp);
8816 if (key && key != KEY___DATA__ && key != KEY___END__
8817 && (!anydelim || *s != '#')) {
8818 /* no override, and not s### either; skipspace is safe here
8819 * check for => on following line */
8821 STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
8822 STRLEN soff = s - SvPVX(PL_linestr);
8824 arrow = *s == '=' && s[1] == '>';
8825 PL_bufptr = SvPVX(PL_linestr) + bufoff;
8826 s = SvPVX(PL_linestr) + soff;
8828 return yyl_fatcomma(aTHX_ s, len);
8831 return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8835 yyl_try(pTHX_ char *s)
8844 if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s)) {
8845 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
8849 yyl_croak_unrecognised(aTHX_ s);
8853 /* emulate EOF on ^D or ^Z */
8854 if ((tok = yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s)) != YYL_RETRY)
8861 if ((!PL_rsfp || PL_lex_inwhat)
8862 && (!PL_parser->filtered || s+1 < PL_bufend)) {
8866 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
8868 yyerror((const char *)
8870 ? "Format not terminated"
8871 : "Missing right curly or square bracket"));
8874 PerlIO_printf(Perl_debug_log, "### Tokener got EOF\n");
8878 if (s++ < PL_bufend)
8879 goto retry; /* ignore stray nulls */
8882 if (!PL_in_eval && !PL_preambled) {
8883 PL_preambled = TRUE;
8885 /* Generate a string of Perl code to load the debugger.
8886 * If PERL5DB is set, it will return the contents of that,
8887 * otherwise a compile-time require of perl5db.pl. */
8889 const char * const pdb = PerlEnv_getenv("PERL5DB");
8892 sv_setpv(PL_linestr, pdb);
8893 sv_catpvs(PL_linestr,";");
8895 SETERRNO(0,SS_NORMAL);
8896 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
8898 PL_parser->preambling = CopLINE(PL_curcop);
8900 SvPVCLEAR(PL_linestr);
8901 if (PL_preambleav) {
8902 SV **svp = AvARRAY(PL_preambleav);
8903 SV **const end = svp + AvFILLp(PL_preambleav);
8905 sv_catsv(PL_linestr, *svp);
8907 sv_catpvs(PL_linestr, ";");
8909 sv_free(MUTABLE_SV(PL_preambleav));
8910 PL_preambleav = NULL;
8913 sv_catpvs(PL_linestr,
8914 "use feature ':" STRINGIFY(PERL_REVISION) "." STRINGIFY(PERL_VERSION) "';");
8915 if (PL_minus_n || PL_minus_p) {
8916 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
8918 sv_catpvs(PL_linestr,"chomp;");
8921 if ( ( *PL_splitstr == '/'
8922 || *PL_splitstr == '\''
8923 || *PL_splitstr == '"')
8924 && strchr(PL_splitstr + 1, *PL_splitstr))
8926 /* strchr is ok, because -F pattern can't contain
8928 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
8931 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
8932 bytes can be used as quoting characters. :-) */
8933 const char *splits = PL_splitstr;
8934 sv_catpvs(PL_linestr, "our @F=split(q\0");
8937 if (*splits == '\\')
8938 sv_catpvn(PL_linestr, splits, 1);
8939 sv_catpvn(PL_linestr, splits, 1);
8940 } while (*splits++);
8941 /* This loop will embed the trailing NUL of
8942 PL_linestr as the last thing it does before
8944 sv_catpvs(PL_linestr, ");");
8948 sv_catpvs(PL_linestr,"our @F=split(' ');");
8951 sv_catpvs(PL_linestr, "\n");
8952 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
8953 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
8954 PL_last_lop = PL_last_uni = NULL;
8955 if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
8956 update_debugger_info(PL_linestr, NULL, 0);
8959 if ((tok = yyl_fake_eof(aTHX_ 0, cBOOL(PL_rsfp), s)) != YYL_RETRY)
8964 #ifdef PERL_STRICT_CR
8965 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
8967 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
8969 case ' ': case '\t': case '\f': case '\v':
8975 const bool needs_semicolon = yyl_eol_needs_semicolon(aTHX_ &s);
8976 if (needs_semicolon)
8977 TOKEN(PERLY_SEMICOLON);
8983 return yyl_hyphen(aTHX_ s);
8986 return yyl_plus(aTHX_ s);
8989 return yyl_star(aTHX_ s);
8992 return yyl_percent(aTHX_ s);
8995 return yyl_caret(aTHX_ s);
8998 return yyl_leftsquare(aTHX_ s);
9001 return yyl_tilde(aTHX_ s);
9004 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
9007 OPERATOR(PERLY_COMMA);
9010 return yyl_just_a_word(aTHX_ s, 0, 0, no_code);
9011 return yyl_colon(aTHX_ s + 1);
9014 return yyl_leftparen(aTHX_ s + 1);
9017 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
9022 TOKEN(PERLY_SEMICOLON);
9025 return yyl_rightparen(aTHX_ s);
9028 return yyl_rightsquare(aTHX_ s);
9031 return yyl_leftcurly(aTHX_ s + 1, 0);
9034 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
9036 return yyl_rightcurly(aTHX_ s, 0);
9039 return yyl_ampersand(aTHX_ s);
9042 return yyl_verticalbar(aTHX_ s);
9045 if (s[1] == '=' && (s == PL_linestart || s[-1] == '\n')
9046 && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "====="))
9048 s = vcs_conflict_marker(s + 7);
9054 const char tmp = *s++;
9056 if (!PL_lex_allbrackets
9057 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
9065 if (!PL_lex_allbrackets
9066 && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
9071 OPERATOR(PERLY_COMMA);
9075 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
9076 && memCHRs("+-*/%.^&|<",tmp))
9077 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9078 "Reversed %c= operator",(int)tmp);
9080 if (PL_expect == XSTATE
9082 && (s == PL_linestart+1 || s[-2] == '\n') )
9084 if ( (PL_in_eval && !PL_rsfp && !PL_parser->filtered)
9085 || PL_lex_state != LEX_NORMAL)
9090 incline(s, PL_bufend);
9091 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut"))
9093 s = (char *) memchr(s,'\n', d - s);
9098 incline(s, PL_bufend);
9106 PL_parser->in_pod = 1;
9110 if (PL_expect == XBLOCK) {
9112 #ifdef PERL_STRICT_CR
9113 while (SPACE_OR_TAB(*t))
9115 while (SPACE_OR_TAB(*t) || *t == '\r')
9118 if (*t == '\n' || *t == '#') {
9119 ENTER_with_name("lex_format");
9120 SAVEI8(PL_parser->form_lex_state);
9121 SAVEI32(PL_lex_formbrack);
9122 PL_parser->form_lex_state = PL_lex_state;
9123 PL_lex_formbrack = PL_lex_brackets + 1;
9124 PL_parser->sub_error_count = PL_error_count;
9125 return yyl_leftcurly(aTHX_ s, 1);
9128 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
9136 return yyl_bang(aTHX_ s + 1);
9139 if (s[1] == '<' && (s == PL_linestart || s[-1] == '\n')
9140 && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "<<<<<"))
9142 s = vcs_conflict_marker(s + 7);
9145 return yyl_leftpointy(aTHX_ s);
9148 if (s[1] == '>' && (s == PL_linestart || s[-1] == '\n')
9149 && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), ">>>>>"))
9151 s = vcs_conflict_marker(s + 7);
9154 return yyl_rightpointy(aTHX_ s + 1);
9157 return yyl_dollar(aTHX_ s);
9160 return yyl_snail(aTHX_ s);
9162 case '/': /* may be division, defined-or, or pattern */
9163 return yyl_slash(aTHX_ s);
9165 case '?': /* conditional */
9167 if (!PL_lex_allbrackets
9168 && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
9173 PL_lex_allbrackets++;
9174 OPERATOR(PERLY_QUESTION_MARK);
9177 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
9178 #ifdef PERL_STRICT_CR
9181 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
9183 && (s == PL_linestart || s[-1] == '\n') )
9186 /* formbrack==2 means dot seen where arguments expected */
9187 return yyl_rightcurly(aTHX_ s, 2);
9189 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
9193 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
9196 if (!PL_lex_allbrackets
9197 && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
9205 pl_yylval.ival = OPf_SPECIAL;
9211 if (*s == '=' && !PL_lex_allbrackets
9212 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
9220 case '0': case '1': case '2': case '3': case '4':
9221 case '5': case '6': case '7': case '8': case '9':
9222 s = scan_num(s, &pl_yylval);
9223 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
9224 if (PL_expect == XOPERATOR)
9229 return yyl_sglquote(aTHX_ s);
9232 return yyl_dblquote(aTHX_ s);
9235 return yyl_backtick(aTHX_ s);
9238 return yyl_backslash(aTHX_ s + 1);
9241 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
9242 char *start = s + 2;
9243 while (isDIGIT(*start) || *start == '_')
9245 if (*start == '.' && isDIGIT(start[1])) {
9246 s = scan_num(s, &pl_yylval);
9249 else if ((*start == ':' && start[1] == ':')
9250 || (PL_expect == XSTATE && *start == ':')) {
9251 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9255 else if (PL_expect == XSTATE) {
9257 while (d < PL_bufend && isSPACE(*d)) d++;
9259 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9264 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
9265 if (!isALPHA(*start) && (PL_expect == XTERM
9266 || PL_expect == XREF || PL_expect == XSTATE
9267 || PL_expect == XTERMORDORDOR)) {
9268 GV *const gv = gv_fetchpvn_flags(s, start - s,
9269 UTF ? SVf_UTF8 : 0, SVt_PVCV);
9271 s = scan_num(s, &pl_yylval);
9276 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9281 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
9285 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9316 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9326 Works out what to call the token just pulled out of the input
9327 stream. The yacc parser takes care of taking the ops we return and
9328 stitching them into a tree.
9331 The type of the next token
9334 Check if we have already built the token; if so, use it.
9335 Switch based on the current state:
9336 - if we have a case modifier in a string, deal with that
9337 - handle other cases of interpolation inside a string
9338 - scan the next line if we are inside a format
9339 In the normal state, switch on the next character:
9341 if alphabetic, go to key lookup
9342 unrecognized character - croak
9343 - 0/4/26: handle end-of-line or EOF
9344 - cases for whitespace
9345 - \n and #: handle comments and line numbers
9346 - various operators, brackets and sigils
9349 - 'v': vstrings (or go to key lookup)
9350 - 'x' repetition operator (or go to key lookup)
9351 - other ASCII alphanumerics (key lookup begins here):
9354 scan built-in keyword (but do nothing with it yet)
9355 check for statement label
9356 check for lexical subs
9357 return yyl_just_a_word if there is one
9358 see whether built-in keyword is overridden
9359 switch on keyword number:
9360 - default: return yyl_just_a_word:
9361 not a built-in keyword; handle bareword lookup
9362 disambiguate between method and sub call
9363 fall back to bareword
9364 - cases for built-in keywords
9370 char *s = PL_bufptr;
9372 if (UNLIKELY(PL_parser->recheck_utf8_validity)) {
9373 const U8* first_bad_char_loc;
9374 if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr,
9375 PL_bufend - PL_bufptr,
9376 &first_bad_char_loc)))
9378 _force_out_malformed_utf8_message(first_bad_char_loc,
9381 1 /* 1 means die */ );
9382 NOT_REACHED; /* NOTREACHED */
9384 PL_parser->recheck_utf8_validity = FALSE;
9387 SV* tmp = newSVpvs("");
9388 PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n",
9389 (IV)CopLINE(PL_curcop),
9390 lex_state_names[PL_lex_state],
9391 exp_name[PL_expect],
9392 pv_display(tmp, s, strlen(s), 0, 60));
9396 /* when we've already built the next token, just pull it out of the queue */
9399 pl_yylval = PL_nextval[PL_nexttoke];
9402 next_type = PL_nexttype[PL_nexttoke];
9403 if (next_type & (7<<24)) {
9404 if (next_type & (1<<24)) {
9405 if (PL_lex_brackets > 100)
9406 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
9407 PL_lex_brackstack[PL_lex_brackets++] =
9408 (char) ((U8) (next_type >> 16));
9410 if (next_type & (2<<24))
9411 PL_lex_allbrackets++;
9412 if (next_type & (4<<24))
9413 PL_lex_allbrackets--;
9414 next_type &= 0xffff;
9416 return REPORT(next_type == 'p' ? pending_ident() : next_type);
9420 switch (PL_lex_state) {
9422 case LEX_INTERPNORMAL:
9425 /* interpolated case modifiers like \L \U, including \Q and \E.
9426 when we get here, PL_bufptr is at the \
9428 case LEX_INTERPCASEMOD:
9429 /* handle \E or end of string */
9430 return yyl_interpcasemod(aTHX_ s);
9432 case LEX_INTERPPUSH:
9433 return REPORT(sublex_push());
9435 case LEX_INTERPSTART:
9436 if (PL_bufptr == PL_bufend)
9437 return REPORT(sublex_done());
9439 if(*PL_bufptr != '(')
9440 PerlIO_printf(Perl_debug_log, "### Interpolated variable\n");
9443 /* for /@a/, we leave the joining for the regex engine to do
9444 * (unless we're within \Q etc) */
9445 PL_lex_dojoin = (*PL_bufptr == '@'
9446 && (!PL_lex_inpat || PL_lex_casemods));
9447 PL_lex_state = LEX_INTERPNORMAL;
9448 if (PL_lex_dojoin) {
9449 NEXTVAL_NEXTTOKE.ival = 0;
9450 force_next(PERLY_COMMA);
9451 force_ident("\"", PERLY_DOLLAR);
9452 NEXTVAL_NEXTTOKE.ival = 0;
9453 force_next(PERLY_DOLLAR);
9454 NEXTVAL_NEXTTOKE.ival = 0;
9455 force_next((2<<24)|PERLY_PAREN_OPEN);
9456 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
9459 /* Convert (?{...}) and friends to 'do {...}' */
9460 if (PL_lex_inpat && *PL_bufptr == '(') {
9461 PL_parser->lex_shared->re_eval_start = PL_bufptr;
9463 if (*PL_bufptr != '{')
9465 PL_expect = XTERMBLOCK;
9469 if (PL_lex_starts++) {
9471 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9472 if (!PL_lex_casemods && PL_lex_inpat)
9475 AopNOASSIGN(OP_CONCAT);
9479 case LEX_INTERPENDMAYBE:
9480 if (intuit_more(PL_bufptr, PL_bufend)) {
9481 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
9487 if (PL_lex_dojoin) {
9488 const U8 dojoin_was = PL_lex_dojoin;
9489 PL_lex_dojoin = FALSE;
9490 PL_lex_state = LEX_INTERPCONCAT;
9491 PL_lex_allbrackets--;
9492 return REPORT(dojoin_was == 1 ? (int)PERLY_PAREN_CLOSE : (int)POSTJOIN);
9494 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
9495 && SvEVALED(PL_lex_repl))
9497 if (PL_bufptr != PL_bufend)
9498 Perl_croak(aTHX_ "Bad evalled substitution pattern");
9501 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
9502 re_eval_str. If the here-doc body's length equals the previous
9503 value of re_eval_start, re_eval_start will now be null. So
9504 check re_eval_str as well. */
9505 if (PL_parser->lex_shared->re_eval_start
9506 || PL_parser->lex_shared->re_eval_str) {
9508 if (*PL_bufptr != ')')
9509 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
9511 /* having compiled a (?{..}) expression, return the original
9512 * text too, as a const */
9513 if (PL_parser->lex_shared->re_eval_str) {
9514 sv = PL_parser->lex_shared->re_eval_str;
9515 PL_parser->lex_shared->re_eval_str = NULL;
9517 PL_bufptr - PL_parser->lex_shared->re_eval_start);
9518 SvPV_shrink_to_cur(sv);
9520 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
9521 PL_bufptr - PL_parser->lex_shared->re_eval_start);
9522 NEXTVAL_NEXTTOKE.opval =
9523 newSVOP(OP_CONST, 0,
9526 PL_parser->lex_shared->re_eval_start = NULL;
9528 return REPORT(PERLY_COMMA);
9532 case LEX_INTERPCONCAT:
9534 if (PL_lex_brackets)
9535 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
9536 (long) PL_lex_brackets);
9538 if (PL_bufptr == PL_bufend)
9539 return REPORT(sublex_done());
9541 /* m'foo' still needs to be parsed for possible (?{...}) */
9542 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
9543 SV *sv = newSVsv(PL_linestr);
9545 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
9549 int save_error_count = PL_error_count;
9551 s = scan_const(PL_bufptr);
9553 /* Set flag if this was a pattern and there were errors. op.c will
9554 * refuse to compile a pattern with this flag set. Otherwise, we
9555 * could get segfaults, etc. */
9556 if (PL_lex_inpat && PL_error_count > save_error_count) {
9557 ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR;
9560 PL_lex_state = LEX_INTERPCASEMOD;
9562 PL_lex_state = LEX_INTERPSTART;
9565 if (s != PL_bufptr) {
9566 NEXTVAL_NEXTTOKE = pl_yylval;
9569 if (PL_lex_starts++) {
9570 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9571 if (!PL_lex_casemods && PL_lex_inpat)
9574 AopNOASSIGN(OP_CONCAT);
9584 if (PL_parser->sub_error_count != PL_error_count) {
9585 /* There was an error parsing a formline, which tends to
9587 Unlike interpolated sub-parsing, we can't treat any of
9588 these as recoverable, so no need to check sub_no_recover.
9592 assert(PL_lex_formbrack);
9593 s = scan_formline(PL_bufptr);
9594 if (!PL_lex_formbrack)
9595 return yyl_rightcurly(aTHX_ s, 1);
9600 /* We really do *not* want PL_linestr ever becoming a COW. */
9601 assert (!SvIsCOW(PL_linestr));
9603 PL_oldoldbufptr = PL_oldbufptr;
9606 if (PL_in_my == KEY_sigvar) {
9607 PL_parser->saw_infix_sigil = 0;
9608 return yyl_sigvar(aTHX_ s);
9612 /* yyl_try() and its callees might consult PL_parser->saw_infix_sigil.
9613 On its return, we then need to set it to indicate whether the token
9614 we just encountered was an infix operator that (if we hadn't been
9615 expecting an operator) have been a sigil.
9617 bool expected_operator = (PL_expect == XOPERATOR);
9618 int ret = yyl_try(aTHX_ s);
9619 switch (pl_yylval.ival) {
9624 if (expected_operator) {
9625 PL_parser->saw_infix_sigil = 1;
9630 PL_parser->saw_infix_sigil = 0;
9640 Looks up an identifier in the pad or in a package
9642 PL_in_my == KEY_sigvar indicates that this is a subroutine signature variable
9643 rather than a plain pad var.
9646 PRIVATEREF if this is a lexical name.
9647 BAREWORD if this belongs to a package.
9650 if we're in a my declaration
9651 croak if they tried to say my($foo::bar)
9652 build the ops for a my() declaration
9653 if it's an access to a my() variable
9654 build ops for access to a my() variable
9655 if in a dq string, and they've said @foo and we can't find @foo
9657 build ops for a bareword
9661 S_pending_ident(pTHX)
9664 const char pit = (char)pl_yylval.ival;
9665 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
9666 /* All routes through this function want to know if there is a colon. */
9667 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
9669 DEBUG_T({ PerlIO_printf(Perl_debug_log,
9670 "### Pending identifier '%s'\n", PL_tokenbuf); });
9671 assert(tokenbuf_len >= 2);
9673 /* if we're in a my(), we can't allow dynamics here.
9674 $foo'bar has already been turned into $foo::bar, so
9675 just check for colons.
9677 if it's a legal name, the OP is a PADANY.
9680 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9682 /* diag_listed_as: No package name allowed for variable %s
9684 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
9686 *PL_tokenbuf=='&' ? "subroutine" : "variable",
9687 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
9688 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
9693 /* "my" variable %s can't be in a package */
9694 /* PL_no_myglob is constant */
9695 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
9696 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
9697 PL_in_my == KEY_my ? "my" : "state",
9698 *PL_tokenbuf == '&' ? "subroutine" : "variable",
9700 UTF ? SVf_UTF8 : 0);
9701 GCC_DIAG_RESTORE_STMT;
9704 if (PL_in_my == KEY_sigvar) {
9705 /* A signature 'padop' needs in addition, an op_first to
9706 * point to a child sigdefelem, and an extra field to hold
9707 * the signature index. We can achieve both by using an
9708 * UNOP_AUX and (ab)using the op_aux field to hold the
9709 * index. If we ever need more fields, use a real malloced
9710 * aux strut instead.
9712 o = newUNOP_AUX(OP_ARGELEM, 0, NULL,
9713 INT2PTR(UNOP_AUX_item *,
9714 (PL_parser->sig_elems)));
9715 o->op_private |= ( PL_tokenbuf[0] == '$' ? OPpARGELEM_SV
9716 : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV
9720 o = newOP(OP_PADANY, 0);
9721 o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
9722 UTF ? SVf_UTF8 : 0);
9723 if (PL_in_my == KEY_sigvar)
9726 pl_yylval.opval = o;
9732 build the ops for accesses to a my() variable.
9737 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9739 if (tmp != NOT_IN_PAD) {
9740 /* might be an "our" variable" */
9741 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9742 /* build ops for a bareword */
9743 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9744 HEK * const stashname = HvNAME_HEK(stash);
9745 SV * const sym = newSVhek(stashname);
9746 sv_catpvs(sym, "::");
9747 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9748 pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
9749 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9753 ((PL_tokenbuf[0] == '$') ? SVt_PV
9754 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9759 pl_yylval.opval = newOP(OP_PADANY, 0);
9760 pl_yylval.opval->op_targ = tmp;
9766 Whine if they've said @foo or @foo{key} in a doublequoted string,
9767 and @foo (or %foo) isn't a variable we can find in the symbol
9770 if (ckWARN(WARN_AMBIGUOUS)
9772 && PL_lex_state != LEX_NORMAL
9773 && !PL_lex_brackets)
9775 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9776 ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
9778 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9781 /* Downgraded from fatal to warning 20000522 mjd */
9782 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9783 "Possible unintended interpolation of %" UTF8f
9785 UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9789 /* build ops for a bareword */
9790 pl_yylval.opval = newSVOP(OP_CONST, 0,
9791 newSVpvn_flags(PL_tokenbuf + 1,
9792 tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9793 UTF ? SVf_UTF8 : 0 ));
9794 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9796 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9797 (PL_in_eval ? GV_ADDMULTI : GV_ADD)
9798 | ( UTF ? SVf_UTF8 : 0 ),
9799 ((PL_tokenbuf[0] == '$') ? SVt_PV
9800 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9806 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9808 PERL_ARGS_ASSERT_CHECKCOMMA;
9810 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
9811 if (ckWARN(WARN_SYNTAX)) {
9814 for (w = s+2; *w && level; w++) {
9822 /* the list of chars below is for end of statements or
9823 * block / parens, boolean operators (&&, ||, //) and branch
9824 * constructs (or, and, if, until, unless, while, err, for).
9825 * Not a very solid hack... */
9826 if (!*w || !memCHRs(";&/|})]oaiuwef!=", *w))
9827 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9828 "%s (...) interpreted as function",name);
9831 while (s < PL_bufend && isSPACE(*s))
9835 while (s < PL_bufend && isSPACE(*s))
9837 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
9838 const char * const w = s;
9839 s += UTF ? UTF8SKIP(s) : 1;
9840 while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
9841 s += UTF ? UTF8SKIP(s) : 1;
9842 while (s < PL_bufend && isSPACE(*s))
9846 if (keyword(w, s - w, 0))
9849 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9850 if (gv && GvCVu(gv))
9855 Copy(w, tmpbuf+1, s - w, char);
9857 off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
9858 if (off != NOT_IN_PAD) return;
9860 Perl_croak(aTHX_ "No comma allowed after %s", what);
9865 /* S_new_constant(): do any overload::constant lookup.
9867 Either returns sv, or mortalizes/frees sv and returns a new SV*.
9868 Best used as sv=new_constant(..., sv, ...).
9869 If s, pv are NULL, calls subroutine with one argument,
9870 and <type> is used with error messages only.
9871 <type> is assumed to be well formed UTF-8.
9873 If error_msg is not NULL, *error_msg will be set to any error encountered.
9874 Otherwise yyerror() will be used to output it */
9877 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9878 SV *sv, SV *pv, const char *type, STRLEN typelen,
9879 const char ** error_msg)
9882 HV * table = GvHV(PL_hintgv); /* ^H */
9887 const char *why1 = "", *why2 = "", *why3 = "";
9888 const char * optional_colon = ":"; /* Only some messages have a colon */
9891 PERL_ARGS_ASSERT_NEW_CONSTANT;
9892 /* We assume that this is true: */
9895 sv_2mortal(sv); /* Parent created it permanently */
9898 || ! (PL_hints & HINT_LOCALIZE_HH))
9901 optional_colon = "";
9905 cvp = hv_fetch(table, key, keylen, FALSE);
9906 if (!cvp || !SvOK(*cvp)) {
9909 why3 = "} is not defined";
9915 pv = newSVpvn_flags(s, len, SVs_TEMP);
9917 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9919 typesv = &PL_sv_undef;
9921 PUSHSTACKi(PERLSI_OVERLOAD);
9933 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9937 /* Check the eval first */
9938 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9940 const char * errstr;
9941 sv_catpvs(errsv, "Propagated");
9942 errstr = SvPV_const(errsv, errlen);
9943 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9945 res = SvREFCNT_inc_simple_NN(sv);
9949 SvREFCNT_inc_simple_void_NN(res);
9962 (void)sv_2mortal(sv);
9964 why1 = "Call to &{$^H{";
9966 why3 = "}} did not return a defined value";
9970 msg = Perl_form(aTHX_ "Constant(%.*s)%s %s%s%s",
9971 (int)(type ? typelen : len),
9979 yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9981 return SvREFCNT_inc_simple_NN(sv);
9984 PERL_STATIC_INLINE void
9985 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
9986 bool is_utf8, bool check_dollar, bool tick_warn)
9989 const char *olds = *s;
9990 PERL_ARGS_ASSERT_PARSE_IDENT;
9992 while (*s < PL_bufend) {
9994 Perl_croak(aTHX_ "%s", ident_too_long);
9995 if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) {
9996 /* The UTF-8 case must come first, otherwise things
9997 * like c\N{COMBINING TILDE} would start failing, as the
9998 * isWORDCHAR_A case below would gobble the 'c' up.
10001 char *t = *s + UTF8SKIP(*s);
10002 while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
10005 if (*d + (t - *s) > e)
10006 Perl_croak(aTHX_ "%s", ident_too_long);
10007 Copy(*s, *d, t - *s, char);
10011 else if ( isWORDCHAR_A(**s) ) {
10014 } while (isWORDCHAR_A(**s) && *d < e);
10016 else if ( allow_package
10018 && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8))
10025 else if (allow_package && **s == ':' && (*s)[1] == ':'
10026 /* Disallow things like Foo::$bar. For the curious, this is
10027 * the code path that triggers the "Bad name after" warning
10028 * when looking for barewords.
10030 && !(check_dollar && (*s)[2] == '$')) {
10037 if (UNLIKELY(tick_warn && saw_tick && PL_lex_state == LEX_INTERPNORMAL
10038 && !PL_lex_brackets && ckWARN(WARN_SYNTAX))) {
10041 Newx(this_d, *s - olds + saw_tick + 2, char); /* +2 for $# */
10043 SAVEFREEPV(this_d);
10044 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10045 "Old package separator used in string");
10046 if (olds[-1] == '#')
10049 while (olds < *s) {
10050 if (*olds == '\'') {
10057 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
10058 "\t(Did you mean \"%" UTF8f "\" instead?)\n",
10059 UTF8fARG(is_utf8, d2-this_d, this_d));
10064 /* Returns a NUL terminated string, with the length of the string written to
10068 Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
10071 char * const e = d + destlen - 3; /* two-character token, ending NUL */
10072 bool is_utf8 = cBOOL(UTF);
10074 PERL_ARGS_ASSERT_SCAN_WORD;
10076 parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, FALSE);
10083 /* scan s and extract an identifier ($var) from it if possible
10085 * XXX: This function has subtle implications on parsing, and
10086 * changing how it behaves can cause a variable to change from
10087 * being a run time rv2sv call or a compile time binding to a
10088 * specific variable name.
10091 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
10093 I32 herelines = PL_parser->herelines;
10094 SSize_t bracket = -1;
10097 char * const e = d + destlen - 3; /* two-character token, ending NUL */
10098 bool is_utf8 = cBOOL(UTF);
10099 I32 orig_copline = 0, tmp_copline = 0;
10101 PERL_ARGS_ASSERT_SCAN_IDENT;
10103 if (isSPACE(*s) || !*s)
10105 if (isDIGIT(*s)) { /* handle $0 and $1 $2 and $10 and etc */
10106 bool is_zero= *s == '0' ? TRUE : FALSE;
10107 char *digit_start= d;
10109 while (s < PL_bufend && isDIGIT(*s)) {
10111 Perl_croak(aTHX_ "%s", ident_too_long);
10114 if (is_zero && d - digit_start > 1)
10115 Perl_croak(aTHX_ ident_var_zero_multi_digit);
10117 else { /* See if it is a "normal" identifier */
10118 parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE);
10123 /* Either a digit variable, or parse_ident() found an identifier
10124 (anything valid as a bareword), so job done and return. */
10125 if (PL_lex_state != LEX_NORMAL)
10126 PL_lex_state = LEX_INTERPENDMAYBE;
10130 /* Here, it is not a run-of-the-mill identifier name */
10132 if (*s == '$' && s[1]
10133 && ( isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8)
10134 || isDIGIT_A((U8)s[1])
10137 || memBEGINs(s+1, (STRLEN) (PL_bufend - (s+1)), "::")) )
10139 /* Dereferencing a value in a scalar variable.
10140 The alternatives are different syntaxes for a scalar variable.
10141 Using ' as a leading package separator isn't allowed. :: is. */
10144 /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */
10146 bracket = s - SvPVX(PL_linestr);
10148 orig_copline = CopLINE(PL_curcop);
10149 if (s < PL_bufend && isSPACE(*s)) {
10155 /* Extract the first character of the variable name from 's' and
10156 * copy it, null terminated into 'd'. Note that this does not
10157 * involve checking for just IDFIRST characters, as it allows the
10158 * '^' for ${^FOO} type variable names, and it allows all the
10159 * characters that are legal in a single character variable name.
10161 * The legal ones are any of:
10162 * a) all ASCII characters except:
10163 * 1) control and space-type ones, like NUL, SOH, \t, and SPACE;
10165 * The final case currently doesn't get this far in the program, so we
10166 * don't test for it. If that were to change, it would be ok to allow it.
10167 * b) When not under Unicode rules, any upper Latin1 character
10168 * c) Otherwise, when unicode rules are used, all XIDS characters.
10170 * Because all ASCII characters have the same representation whether
10171 * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
10172 * '{' without knowing if is UTF-8 or not. */
10174 if ((s <= PL_bufend - ((is_utf8)
10181 ? isIDFIRST_utf8_safe(s, PL_bufend)
10183 && LIKELY((U8) *s != LATIN1_TO_NATIVE(0xAD))
10189 const STRLEN skip = UTF8SKIP(s);
10192 for ( i = 0; i < skip; i++ )
10201 /* special case to handle ${10}, ${11} the same way we handle ${1} etc */
10203 bool is_zero= *d == '0' ? TRUE : FALSE;
10204 char *digit_start= d;
10205 while (s < PL_bufend && isDIGIT(*s)) {
10208 Perl_croak(aTHX_ "%s", ident_too_long);
10211 if (is_zero && d - digit_start >= 1) /* d points at the last digit */
10212 Perl_croak(aTHX_ ident_var_zero_multi_digit);
10216 /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
10217 else if (*d == '^' && *s && isCONTROLVAR(*s)) {
10221 /* Warn about ambiguous code after unary operators if {...} notation isn't
10222 used. There's no difference in ambiguity; it's merely a heuristic
10223 about when not to warn. */
10224 else if (ck_uni && bracket == -1)
10227 if (bracket != -1) {
10230 /* If we were processing {...} notation then... */
10231 if (isIDFIRST_lazy_if_safe(d, e, is_utf8)
10232 || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
10235 /* note we have to check for a normal identifier first,
10236 * as it handles utf8 symbols, and only after that has
10237 * been ruled out can we look at the caret words */
10238 if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) {
10239 /* if it starts as a valid identifier, assume that it is one.
10240 (the later check for } being at the expected point will trap
10241 cases where this doesn't pan out.) */
10242 d += is_utf8 ? UTF8SKIP(d) : 1;
10243 parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE);
10246 else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
10248 while (isWORDCHAR(*s) && d < e) {
10252 Perl_croak(aTHX_ "%s", ident_too_long);
10255 tmp_copline = CopLINE(PL_curcop);
10256 if (s < PL_bufend && isSPACE(*s)) {
10259 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10260 /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation. */
10261 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10262 const char * const brack =
10264 ((*s == '[') ? "[...]" : "{...}");
10265 orig_copline = CopLINE(PL_curcop);
10266 CopLINE_set(PL_curcop, tmp_copline);
10267 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
10268 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10269 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10270 funny, dest, brack, funny, dest, brack);
10271 CopLINE_set(PL_curcop, orig_copline);
10274 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10275 PL_lex_allbrackets++;
10280 if ( !tmp_copline )
10281 tmp_copline = CopLINE(PL_curcop);
10282 if ((skip = s < PL_bufend && isSPACE(*s))) {
10283 /* Avoid incrementing line numbers or resetting PL_linestart,
10284 in case we have to back up. */
10285 STRLEN s_off = s - SvPVX(PL_linestr);
10287 s = SvPVX(PL_linestr) + s_off;
10292 /* Expect to find a closing } after consuming any trailing whitespace.
10295 /* Now increment line numbers if applicable. */
10299 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10300 PL_lex_state = LEX_INTERPEND;
10303 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) {
10304 if (ckWARN(WARN_AMBIGUOUS)
10305 && (keyword(dest, d - dest, 0)
10306 || get_cvn_flags(dest, d - dest, is_utf8
10310 SV *tmp = newSVpvn_flags( dest, d - dest,
10311 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
10314 orig_copline = CopLINE(PL_curcop);
10315 CopLINE_set(PL_curcop, tmp_copline);
10316 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10317 "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
10318 funny, SVfARG(tmp), funny, SVfARG(tmp));
10319 CopLINE_set(PL_curcop, orig_copline);
10324 /* Didn't find the closing } at the point we expected, so restore
10325 state such that the next thing to process is the opening { and */
10326 s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
10327 CopLINE_set(PL_curcop, orig_copline);
10328 PL_parser->herelines = herelines;
10330 PL_parser->sub_no_recover = TRUE;
10333 else if ( PL_lex_state == LEX_INTERPNORMAL
10334 && !PL_lex_brackets
10335 && !intuit_more(s, PL_bufend))
10336 PL_lex_state = LEX_INTERPEND;
10341 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
10343 /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
10344 * found in the parse starting at 's', based on the subset that are valid
10345 * in this context input to this routine in 'valid_flags'. Advances s.
10346 * Returns TRUE if the input should be treated as a valid flag, so the next
10347 * char may be as well; otherwise FALSE. 'charset' should point to a NUL
10348 * upon first call on the current regex. This routine will set it to any
10349 * charset modifier found. The caller shouldn't change it. This way,
10350 * another charset modifier encountered in the parse can be detected as an
10351 * error, as we have decided to allow only one */
10353 const char c = **s;
10354 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
10356 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
10357 if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) {
10358 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
10359 UTF ? SVf_UTF8 : 0);
10361 /* Pretend that it worked, so will continue processing before
10370 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
10371 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
10372 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
10373 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
10374 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
10375 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
10376 case LOCALE_PAT_MOD:
10378 goto multiple_charsets;
10380 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
10383 case UNICODE_PAT_MOD:
10385 goto multiple_charsets;
10387 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
10390 case ASCII_RESTRICT_PAT_MOD:
10392 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
10396 /* Error if previous modifier wasn't an 'a', but if it was, see
10397 * if, and accept, a second occurrence (only) */
10398 if (*charset != 'a'
10399 || get_regex_charset(*pmfl)
10400 != REGEX_ASCII_RESTRICTED_CHARSET)
10402 goto multiple_charsets;
10404 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
10408 case DEPENDS_PAT_MOD:
10410 goto multiple_charsets;
10412 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
10421 if (*charset != c) {
10422 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
10424 else if (c == 'a') {
10425 /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
10426 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
10429 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
10432 /* Pretend that it worked, so will continue processing before dieing */
10438 S_scan_pat(pTHX_ char *start, I32 type)
10442 const char * const valid_flags =
10443 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
10444 char charset = '\0'; /* character set modifier */
10445 unsigned int x_mod_count = 0;
10447 PERL_ARGS_ASSERT_SCAN_PAT;
10449 s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
10451 Perl_croak(aTHX_ "Search pattern not terminated");
10453 pm = (PMOP*)newPMOP(type, 0);
10454 if (PL_multi_open == '?') {
10455 /* This is the only point in the code that sets PMf_ONCE: */
10456 pm->op_pmflags |= PMf_ONCE;
10458 /* Hence it's safe to do this bit of PMOP book-keeping here, which
10459 allows us to restrict the list needed by reset to just the ??
10461 assert(type != OP_TRANS);
10463 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
10466 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
10469 elements = mg->mg_len / sizeof(PMOP**);
10470 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
10471 ((PMOP**)mg->mg_ptr) [elements++] = pm;
10472 mg->mg_len = elements * sizeof(PMOP**);
10473 PmopSTASH_set(pm,PL_curstash);
10477 /* if qr/...(?{..}).../, then need to parse the pattern within a new
10478 * anon CV. False positives like qr/[(?{]/ are harmless */
10480 if (type == OP_QR) {
10482 char *e, *p = SvPV(PL_lex_stuff, len);
10484 for (; p < e; p++) {
10485 if (p[0] == '(' && p[1] == '?'
10486 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
10488 pm->op_pmflags |= PMf_HAS_CV;
10492 pm->op_pmflags |= PMf_IS_QR;
10495 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
10496 &s, &charset, &x_mod_count))
10498 /* issue a warning if /c is specified,but /g is not */
10499 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
10501 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
10502 "Use of /c modifier is meaningless without /g" );
10505 PL_lex_op = (OP*)pm;
10506 pl_yylval.ival = OP_MATCH;
10511 S_scan_subst(pTHX_ char *start)
10517 line_t linediff = 0;
10519 char charset = '\0'; /* character set modifier */
10520 unsigned int x_mod_count = 0;
10523 PERL_ARGS_ASSERT_SCAN_SUBST;
10525 pl_yylval.ival = OP_NULL;
10527 s = scan_str(start, TRUE, FALSE, FALSE, &t);
10530 Perl_croak(aTHX_ "Substitution pattern not terminated");
10534 first_start = PL_multi_start;
10535 first_line = CopLINE(PL_curcop);
10536 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10538 SvREFCNT_dec_NN(PL_lex_stuff);
10539 PL_lex_stuff = NULL;
10540 Perl_croak(aTHX_ "Substitution replacement not terminated");
10542 PL_multi_start = first_start; /* so whole substitution is taken together */
10544 pm = (PMOP*)newPMOP(OP_SUBST, 0);
10548 if (*s == EXEC_PAT_MOD) {
10552 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
10553 &s, &charset, &x_mod_count))
10559 if ((pm->op_pmflags & PMf_CONTINUE)) {
10560 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
10564 SV * const repl = newSVpvs("");
10567 pm->op_pmflags |= PMf_EVAL;
10568 for (; es > 1; es--) {
10569 sv_catpvs(repl, "eval ");
10571 sv_catpvs(repl, "do {");
10572 sv_catsv(repl, PL_parser->lex_sub_repl);
10573 sv_catpvs(repl, "}");
10574 SvREFCNT_dec(PL_parser->lex_sub_repl);
10575 PL_parser->lex_sub_repl = repl;
10579 linediff = CopLINE(PL_curcop) - first_line;
10581 CopLINE_set(PL_curcop, first_line);
10583 if (linediff || es) {
10584 /* the IVX field indicates that the replacement string is a s///e;
10585 * the NVX field indicates how many src code lines the replacement
10587 sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
10588 ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = linediff;
10589 ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
10593 PL_lex_op = (OP*)pm;
10594 pl_yylval.ival = OP_SUBST;
10599 S_scan_trans(pTHX_ char *start)
10606 bool nondestruct = 0;
10609 PERL_ARGS_ASSERT_SCAN_TRANS;
10611 pl_yylval.ival = OP_NULL;
10613 s = scan_str(start,FALSE,FALSE,FALSE,&t);
10615 Perl_croak(aTHX_ "Transliteration pattern not terminated");
10619 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10621 SvREFCNT_dec_NN(PL_lex_stuff);
10622 PL_lex_stuff = NULL;
10623 Perl_croak(aTHX_ "Transliteration replacement not terminated");
10626 complement = del = squash = 0;
10630 complement = OPpTRANS_COMPLEMENT;
10633 del = OPpTRANS_DELETE;
10636 squash = OPpTRANS_SQUASH;
10648 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
10649 o->op_private &= ~OPpTRANS_ALL;
10650 o->op_private |= del|squash|complement;
10653 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
10660 Takes a pointer to the first < in <<FOO.
10661 Returns a pointer to the byte following <<FOO.
10663 This function scans a heredoc, which involves different methods
10664 depending on whether we are in a string eval, quoted construct, etc.
10665 This is because PL_linestr could containing a single line of input, or
10666 a whole string being evalled, or the contents of the current quote-
10669 The two basic methods are:
10670 - Steal lines from the input stream
10671 - Scan the heredoc in PL_linestr and remove it therefrom
10673 In a file scope or filtered eval, the first method is used; in a
10674 string eval, the second.
10676 In a quote-like operator, we have to choose between the two,
10677 depending on where we can find a newline. We peek into outer lex-
10678 ing scopes until we find one with a newline in it. If we reach the
10679 outermost lexing scope and it is a file, we use the stream method.
10680 Otherwise it is treated as an eval.
10684 S_scan_heredoc(pTHX_ char *s)
10686 I32 op_type = OP_SCALAR;
10694 I32 indent_len = 0;
10695 bool indented = FALSE;
10696 const bool infile = PL_rsfp || PL_parser->filtered;
10697 const line_t origline = CopLINE(PL_curcop);
10698 LEXSHARED *shared = PL_parser->lex_shared;
10700 PERL_ARGS_ASSERT_SCAN_HEREDOC;
10703 d = PL_tokenbuf + 1;
10704 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
10705 *PL_tokenbuf = '\n';
10708 if (*peek == '~') {
10713 while (SPACE_OR_TAB(*peek))
10716 if (*peek == '`' || *peek == '\'' || *peek =='"') {
10719 s = delimcpy(d, e, s, PL_bufend, term, &len);
10720 if (s == PL_bufend)
10721 Perl_croak(aTHX_ "Unterminated delimiter for here document");
10727 /* <<\FOO is equivalent to <<'FOO' */
10732 if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
10733 Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
10737 while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
10738 peek += UTF ? UTF8SKIP(peek) : 1;
10741 len = (peek - s >= e - d) ? (e - d) : (peek - s);
10742 Copy(s, d, len, char);
10747 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
10748 Perl_croak(aTHX_ "Delimiter for here document is too long");
10752 len = d - PL_tokenbuf;
10754 #ifndef PERL_STRICT_CR
10755 d = (char *) memchr(s, '\r', PL_bufend - s);
10757 char * const olds = s;
10759 while (s < PL_bufend) {
10765 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
10774 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10779 tmpstr = newSV_type(SVt_PVIV);
10780 SvGROW(tmpstr, 80);
10781 if (term == '\'') {
10782 op_type = OP_CONST;
10783 SvIV_set(tmpstr, -1);
10785 else if (term == '`') {
10786 op_type = OP_BACKTICK;
10787 SvIV_set(tmpstr, '\\');
10790 PL_multi_start = origline + 1 + PL_parser->herelines;
10791 PL_multi_open = PL_multi_close = '<';
10793 /* inside a string eval or quote-like operator */
10794 if (!infile || PL_lex_inwhat) {
10797 char * const olds = s;
10798 PERL_CONTEXT * const cx = CX_CUR();
10799 /* These two fields are not set until an inner lexing scope is
10800 entered. But we need them set here. */
10801 shared->ls_bufptr = s;
10802 shared->ls_linestr = PL_linestr;
10804 if (PL_lex_inwhat) {
10805 /* Look for a newline. If the current buffer does not have one,
10806 peek into the line buffer of the parent lexing scope, going
10807 up as many levels as necessary to find one with a newline
10810 while (!(s = (char *)memchr(
10811 (void *)shared->ls_bufptr, '\n',
10812 SvEND(shared->ls_linestr)-shared->ls_bufptr
10815 shared = shared->ls_prev;
10816 /* shared is only null if we have gone beyond the outermost
10817 lexing scope. In a file, we will have broken out of the
10818 loop in the previous iteration. In an eval, the string buf-
10819 fer ends with "\n;", so the while condition above will have
10820 evaluated to false. So shared can never be null. Or so you
10821 might think. Odd syntax errors like s;@{<<; can gobble up
10822 the implicit semicolon at the end of a flie, causing the
10823 file handle to be closed even when we are not in a string
10824 eval. So shared may be null in that case.
10825 (Closing '>>}' here to balance the earlier open brace for
10826 editors that look for matched pairs.) */
10827 if (UNLIKELY(!shared))
10829 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10830 most lexing scope. In a file, shared->ls_linestr at that
10831 level is just one line, so there is no body to steal. */
10832 if (infile && !shared->ls_prev) {
10838 else { /* eval or we've already hit EOF */
10839 s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10844 linestr = shared->ls_linestr;
10845 bufend = SvEND(linestr);
10850 while (s < bufend - len + 1) {
10852 ++PL_parser->herelines;
10854 if (memEQ(s, PL_tokenbuf + 1, len - 1)) {
10858 /* Only valid if it's preceded by whitespace only */
10859 while (backup != myolds && --backup >= myolds) {
10860 if (! SPACE_OR_TAB(*backup)) {
10866 /* No whitespace or all! */
10867 if (backup == s || *backup == '\n') {
10868 Newx(indent, indent_len + 1, char);
10869 memcpy(indent, backup + 1, indent_len);
10870 indent[indent_len] = 0;
10871 s--; /* before our delimiter */
10872 PL_parser->herelines--; /* this line doesn't count */
10879 while (s < bufend - len + 1
10880 && memNE(s,PL_tokenbuf,len) )
10883 ++PL_parser->herelines;
10887 if (s >= bufend - len + 1) {
10891 sv_setpvn(tmpstr,d+1,s-d);
10893 /* the preceding stmt passes a newline */
10894 PL_parser->herelines++;
10896 /* s now points to the newline after the heredoc terminator.
10897 d points to the newline before the body of the heredoc.
10900 /* We are going to modify linestr in place here, so set
10901 aside copies of the string if necessary for re-evals or
10903 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10904 check shared->re_eval_str. */
10905 if (shared->re_eval_start || shared->re_eval_str) {
10906 /* Set aside the rest of the regexp */
10907 if (!shared->re_eval_str)
10908 shared->re_eval_str =
10909 newSVpvn(shared->re_eval_start,
10910 bufend - shared->re_eval_start);
10911 shared->re_eval_start -= s-d;
10914 if (cxstack_ix >= 0
10915 && CxTYPE(cx) == CXt_EVAL
10916 && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
10917 && cx->blk_eval.cur_text == linestr)
10919 cx->blk_eval.cur_text = newSVsv(linestr);
10920 cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
10923 /* Copy everything from s onwards back to d. */
10924 Move(s,d,bufend-s + 1,char);
10925 SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10926 /* Setting PL_bufend only applies when we have not dug deeper
10927 into other scopes, because sublex_done sets PL_bufend to
10928 SvEND(PL_linestr). */
10929 if (shared == PL_parser->lex_shared)
10930 PL_bufend = SvEND(linestr);
10935 char *oldbufptr_save;
10936 char *oldoldbufptr_save;
10938 SvPVCLEAR(tmpstr); /* avoid "uninitialized" warning */
10939 term = PL_tokenbuf[1];
10941 linestr_save = PL_linestr; /* must restore this afterwards */
10942 d = s; /* and this */
10943 oldbufptr_save = PL_oldbufptr;
10944 oldoldbufptr_save = PL_oldoldbufptr;
10945 PL_linestr = newSVpvs("");
10946 PL_bufend = SvPVX(PL_linestr);
10949 PL_bufptr = PL_bufend;
10950 CopLINE_set(PL_curcop,
10951 origline + 1 + PL_parser->herelines);
10953 if ( !lex_next_chunk(LEX_NO_TERM)
10954 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n'))
10956 /* Simply freeing linestr_save might seem simpler here, as it
10957 does not matter what PL_linestr points to, since we are
10958 about to croak; but in a quote-like op, linestr_save
10959 will have been prospectively freed already, via
10960 SAVEFREESV(PL_linestr) in sublex_push, so it's easier to
10961 restore PL_linestr. */
10962 SvREFCNT_dec_NN(PL_linestr);
10963 PL_linestr = linestr_save;
10964 PL_oldbufptr = oldbufptr_save;
10965 PL_oldoldbufptr = oldoldbufptr_save;
10969 CopLINE_set(PL_curcop, origline);
10971 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10972 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10973 /* ^That should be enough to avoid this needing to grow: */
10974 sv_catpvs(PL_linestr, "\n\0");
10975 assert(s == SvPVX(PL_linestr));
10976 PL_bufend = SvEND(PL_linestr);
10980 PL_parser->herelines++;
10981 PL_last_lop = PL_last_uni = NULL;
10983 #ifndef PERL_STRICT_CR
10984 if (PL_bufend - PL_linestart >= 2) {
10985 if ( (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
10986 || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10988 PL_bufend[-2] = '\n';
10990 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10992 else if (PL_bufend[-1] == '\r')
10993 PL_bufend[-1] = '\n';
10995 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10996 PL_bufend[-1] = '\n';
10999 if (indented && (PL_bufend-s) >= len) {
11000 char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
11003 char *backup = found;
11006 /* Only valid if it's preceded by whitespace only */
11007 while (backup != s && --backup >= s) {
11008 if (! SPACE_OR_TAB(*backup)) {
11014 /* All whitespace or none! */
11015 if (backup == found || SPACE_OR_TAB(*backup)) {
11016 Newx(indent, indent_len + 1, char);
11017 memcpy(indent, backup, indent_len);
11018 indent[indent_len] = 0;
11019 SvREFCNT_dec(PL_linestr);
11020 PL_linestr = linestr_save;
11021 PL_linestart = SvPVX(linestr_save);
11022 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11023 PL_oldbufptr = oldbufptr_save;
11024 PL_oldoldbufptr = oldoldbufptr_save;
11030 /* Didn't find it */
11031 sv_catsv(tmpstr,PL_linestr);
11034 if (*s == term && PL_bufend-s >= len
11035 && memEQ(s,PL_tokenbuf + 1,len))
11037 SvREFCNT_dec(PL_linestr);
11038 PL_linestr = linestr_save;
11039 PL_linestart = SvPVX(linestr_save);
11040 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
11041 PL_oldbufptr = oldbufptr_save;
11042 PL_oldoldbufptr = oldoldbufptr_save;
11047 sv_catsv(tmpstr,PL_linestr);
11053 PL_multi_end = origline + PL_parser->herelines;
11055 if (indented && indent) {
11056 STRLEN linecount = 1;
11057 STRLEN herelen = SvCUR(tmpstr);
11058 char *ss = SvPVX(tmpstr);
11059 char *se = ss + herelen;
11060 SV *newstr = newSV(herelen+1);
11063 /* Trim leading whitespace */
11065 /* newline only? Copy and move on */
11067 sv_catpvs(newstr,"\n");
11071 /* Found our indentation? Strip it */
11073 else if (se - ss >= indent_len
11074 && memEQ(ss, indent, indent_len))
11079 while ((ss + le) < se && *(ss + le) != '\n')
11082 sv_catpvn(newstr, ss, le);
11085 /* Line doesn't begin with our indentation? Croak */
11090 "Indentation on line %d of here-doc doesn't match delimiter",
11096 /* avoid sv_setsv() as we dont wan't to COW here */
11097 sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
11099 SvREFCNT_dec_NN(newstr);
11102 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
11103 SvPV_shrink_to_cur(tmpstr);
11107 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
11111 PL_lex_stuff = tmpstr;
11112 pl_yylval.ival = op_type;
11118 SvREFCNT_dec(tmpstr);
11119 CopLINE_set(PL_curcop, origline);
11120 missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1);
11124 /* scan_inputsymbol
11125 takes: position of first '<' in input buffer
11126 returns: position of first char following the matching '>' in
11128 side-effects: pl_yylval and lex_op are set.
11133 <<>> read from ARGV without magic open
11134 <FH> read from filehandle
11135 <pkg::FH> read from package qualified filehandle
11136 <pkg'FH> read from package qualified filehandle
11137 <$fh> read from filehandle in $fh
11138 <*.h> filename glob
11143 S_scan_inputsymbol(pTHX_ char *start)
11145 char *s = start; /* current position in buffer */
11148 bool nomagicopen = FALSE;
11149 char *d = PL_tokenbuf; /* start of temp holding space */
11150 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
11152 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
11154 end = (char *) memchr(s, '\n', PL_bufend - s);
11157 if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
11158 nomagicopen = TRUE;
11164 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
11166 /* die if we didn't have space for the contents of the <>,
11167 or if it didn't end, or if we see a newline
11170 if (len >= (I32)sizeof PL_tokenbuf)
11171 Perl_croak(aTHX_ "Excessively long <> operator");
11173 Perl_croak(aTHX_ "Unterminated <> operator");
11178 Remember, only scalar variables are interpreted as filehandles by
11179 this code. Anything more complex (e.g., <$fh{$num}>) will be
11180 treated as a glob() call.
11181 This code makes use of the fact that except for the $ at the front,
11182 a scalar variable and a filehandle look the same.
11184 if (*d == '$' && d[1]) d++;
11186 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
11187 while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') {
11188 d += UTF ? UTF8SKIP(d) : 1;
11191 /* If we've tried to read what we allow filehandles to look like, and
11192 there's still text left, then it must be a glob() and not a getline.
11193 Use scan_str to pull out the stuff between the <> and treat it
11194 as nothing more than a string.
11197 if (d - PL_tokenbuf != len) {
11198 pl_yylval.ival = OP_GLOB;
11199 s = scan_str(start,FALSE,FALSE,FALSE,NULL);
11201 Perl_croak(aTHX_ "Glob not terminated");
11205 bool readline_overriden = FALSE;
11207 /* we're in a filehandle read situation */
11210 /* turn <> into <ARGV> */
11212 Copy("ARGV",d,5,char);
11214 /* Check whether readline() is overriden */
11215 if ((gv_readline = gv_override("readline",8)))
11216 readline_overriden = TRUE;
11218 /* if <$fh>, create the ops to turn the variable into a
11222 /* try to find it in the pad for this block, otherwise find
11223 add symbol table ops
11225 const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
11226 if (tmp != NOT_IN_PAD) {
11227 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11228 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11229 HEK * const stashname = HvNAME_HEK(stash);
11230 SV * const sym = newSVhek_mortal(stashname);
11231 sv_catpvs(sym, "::");
11232 sv_catpv(sym, d+1);
11237 OP * const o = newOP(OP_PADSV, 0);
11239 PL_lex_op = readline_overriden
11240 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11241 op_append_elem(OP_LIST, o,
11242 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11243 : newUNOP(OP_READLINE, 0, o);
11251 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
11253 PL_lex_op = readline_overriden
11254 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11255 op_append_elem(OP_LIST,
11256 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11257 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11258 : newUNOP(OP_READLINE, 0,
11259 newUNOP(OP_RV2SV, 0,
11260 newGVOP(OP_GV, 0, gv)));
11262 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
11263 pl_yylval.ival = OP_NULL;
11266 /* If it's none of the above, it must be a literal filehandle
11267 (<Foo::BAR> or <FOO>) so build a simple readline OP */
11269 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
11270 PL_lex_op = readline_overriden
11271 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11272 op_append_elem(OP_LIST,
11273 newGVOP(OP_GV, 0, gv),
11274 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11275 : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
11276 pl_yylval.ival = OP_NULL;
11278 /* leave the token generation above to avoid confusing the parser */
11279 if (!FEATURE_BAREWORD_FILEHANDLES_IS_ENABLED) {
11280 no_bareword_filehandle(d);
11291 start position in buffer
11292 keep_bracketed_quoted preserve \ quoting of embedded delimiters, but
11293 only if they are of the open/close form
11294 keep_delims preserve the delimiters around the string
11295 re_reparse compiling a run-time /(?{})/:
11296 collapse // to /, and skip encoding src
11297 delimp if non-null, this is set to the position of
11298 the closing delimiter, or just after it if
11299 the closing and opening delimiters differ
11300 (i.e., the opening delimiter of a substitu-
11302 returns: position to continue reading from buffer
11303 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11304 updates the read buffer.
11306 This subroutine pulls a string out of the input. It is called for:
11307 q single quotes q(literal text)
11308 ' single quotes 'literal text'
11309 qq double quotes qq(interpolate $here please)
11310 " double quotes "interpolate $here please"
11311 qx backticks qx(/bin/ls -l)
11312 ` backticks `/bin/ls -l`
11313 qw quote words @EXPORT_OK = qw( func() $spam )
11314 m// regexp match m/this/
11315 s/// regexp substitute s/this/that/
11316 tr/// string transliterate tr/this/that/
11317 y/// string transliterate y/this/that/
11318 ($*@) sub prototypes sub foo ($)
11319 (stuff) sub attr parameters sub foo : attr(stuff)
11320 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
11322 In most of these cases (all but <>, patterns and transliterate)
11323 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
11324 calls scan_str(). s/// makes yylex() call scan_subst() which calls
11325 scan_str(). tr/// and y/// make yylex() call scan_trans() which
11328 It skips whitespace before the string starts, and treats the first
11329 character as the delimiter. If the delimiter is one of ([{< then
11330 the corresponding "close" character )]}> is used as the closing
11331 delimiter. It allows quoting of delimiters, and if the string has
11332 balanced delimiters ([{<>}]) it allows nesting.
11334 On success, the SV with the resulting string is put into lex_stuff or,
11335 if that is already non-NULL, into lex_repl. The second case occurs only
11336 when parsing the RHS of the special constructs s/// and tr/// (y///).
11337 For convenience, the terminating delimiter character is stuffed into
11342 Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
11346 SV *sv; /* scalar value: string */
11347 char *s = start; /* current position in the buffer */
11348 char *to; /* current position in the sv's data */
11349 int brackets = 1; /* bracket nesting level */
11350 bool d_is_utf8 = FALSE; /* is there any utf8 content? */
11351 UV open_delim_code; /* code point */
11352 char open_delim_str[UTF8_MAXBYTES+1];
11353 STRLEN delim_byte_len; /* each delimiter currently is the same number
11357 /* The only non-UTF character that isn't a stand alone grapheme is
11358 * white-space, hence can't be a delimiter. */
11359 const char * non_grapheme_msg = "Use of unassigned code point or"
11360 " non-standalone grapheme for a delimiter"
11362 PERL_ARGS_ASSERT_SCAN_STR;
11364 /* skip space before the delimiter */
11365 if (isSPACE(*s)) { /* skipspace can change the buffer 's' is in, so
11366 'start' also has to change */
11367 s = start = skipspace(s);
11370 /* mark where we are, in case we need to report errors */
11373 /* after skipping whitespace, the next character is the delimiter */
11374 if (! UTF || UTF8_IS_INVARIANT(*s)) {
11375 open_delim_code = (U8) *s;
11376 open_delim_str[0] = *s;
11377 delim_byte_len = 1;
11380 open_delim_code = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend,
11382 if (UNLIKELY(! is_grapheme((U8 *) start,
11387 yyerror(non_grapheme_msg);
11390 Copy(s, open_delim_str, delim_byte_len, char);
11392 open_delim_str[delim_byte_len] = '\0'; /* Only for safety */
11395 /* mark where we are */
11396 PL_multi_start = CopLINE(PL_curcop);
11397 PL_multi_open = open_delim_code;
11398 herelines = PL_parser->herelines;
11400 const char * legal_paired_opening_delims;
11401 const char * legal_paired_closing_delims;
11402 const char * deprecated_opening_delims;
11403 if (FEATURE_MORE_DELIMS_IS_ENABLED) {
11405 legal_paired_opening_delims = EXTRA_OPENING_UTF8_BRACKETS;
11406 legal_paired_closing_delims = EXTRA_CLOSING_UTF8_BRACKETS;
11408 /* We are deprecating using a closing delimiter as the opening, in
11409 * case we want in the future to accept them reversed. The string
11410 * may include ones that are legal, but the code below won't look
11411 * at this string unless it didn't find a legal opening one */
11412 deprecated_opening_delims = DEPRECATED_OPENING_UTF8_BRACKETS;
11415 legal_paired_opening_delims = EXTRA_OPENING_NON_UTF8_BRACKETS;
11416 legal_paired_closing_delims = EXTRA_CLOSING_NON_UTF8_BRACKETS;
11417 deprecated_opening_delims = DEPRECATED_OPENING_NON_UTF8_BRACKETS;
11421 legal_paired_opening_delims = "([{<";
11422 legal_paired_closing_delims = ")]}>";
11423 deprecated_opening_delims = (UTF)
11424 ? DEPRECATED_OPENING_UTF8_BRACKETS
11425 : DEPRECATED_OPENING_NON_UTF8_BRACKETS;
11428 const char * legal_paired_opening_delims_end = legal_paired_opening_delims
11429 + strlen(legal_paired_opening_delims);
11430 const char * deprecated_delims_end = deprecated_opening_delims
11431 + strlen(deprecated_opening_delims);
11433 const char * close_delim_str = open_delim_str;
11434 UV close_delim_code = open_delim_code;
11436 /* If the delimiter has a mirror-image closing one, get it */
11437 const char *tmps = ninstr(legal_paired_opening_delims,
11438 legal_paired_opening_delims_end,
11439 open_delim_str, open_delim_str + delim_byte_len);
11441 /* Here, there is a paired delimiter, and tmps points to its position
11442 in the string of the accepted opening paired delimiters. The
11443 corresponding position in the string of closing ones is the
11444 beginning of the paired mate. Both contain the same number of
11446 close_delim_str = legal_paired_closing_delims
11447 + (tmps - legal_paired_opening_delims);
11449 /* The list of paired delimiters contains all the ASCII ones that have
11450 * always been legal, and no other ASCIIs. Don't raise a message if
11451 * using one of these */
11452 if (! isASCII(open_delim_code)) {
11453 Perl_ck_warner_d(aTHX_
11454 packWARN(WARN_EXPERIMENTAL__EXTRA_PAIRED_DELIMITERS),
11455 "Use of '%" UTF8f "' is experimental as a string delimiter",
11456 UTF8fARG(UTF, delim_byte_len, open_delim_str));
11459 close_delim_code = (UTF)
11460 ? valid_utf8_to_uvchr((U8 *) close_delim_str, NULL)
11461 : * (U8 *) close_delim_str;
11463 else { /* Here, the delimiter isn't paired, hence the close is the same as
11464 the open; and has aready been set up. But make sure it isn't
11465 deprecated to use this particular delimiter, as we plan
11466 eventually to make it paired. */
11467 if (ninstr(deprecated_opening_delims, deprecated_delims_end,
11468 open_delim_str, open_delim_str + delim_byte_len))
11470 Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
11471 "Use of '%" UTF8f "' is deprecated as a string delimiter",
11472 UTF8fARG(UTF, delim_byte_len, open_delim_str));
11475 /* Note that a NUL may be used as a delimiter, and this happens when
11476 * delimitting an empty string, and no special handling for it is
11477 * needed, as ninstr() calls are used */
11480 PL_multi_close = close_delim_code;
11482 if (PL_multi_open == PL_multi_close) {
11483 keep_bracketed_quoted = FALSE;
11486 /* create a new SV to hold the contents. 79 is the SV's initial length.
11487 What a random number. */
11488 sv = newSV_type(SVt_PVIV);
11490 SvIV_set(sv, close_delim_code);
11491 (void)SvPOK_only(sv); /* validate pointer */
11493 /* move past delimiter and try to read a complete string */
11495 sv_catpvn(sv, s, delim_byte_len);
11496 s += delim_byte_len;
11498 /* extend sv if need be */
11499 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11500 /* set 'to' to the next character in the sv's string */
11501 to = SvPVX(sv)+SvCUR(sv);
11503 /* read until we run out of string, or we find the closing delimiter */
11504 while (s < PL_bufend) {
11505 /* embedded newlines increment the line count */
11506 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
11507 COPLINE_INC_WITH_HERELINES;
11509 /* backslashes can escape the closing delimiter */
11510 if ( *s == '\\' && s < PL_bufend - delim_byte_len
11512 /* ... but not if the delimiter itself is a backslash */
11513 && close_delim_code != '\\')
11515 /* Here, we have an escaping backslash. If we're supposed to
11516 * discard those that escape the closing delimiter, just
11517 * discard this one */
11518 if ( ! keep_bracketed_quoted
11519 && ( memEQ(s + 1, open_delim_str, delim_byte_len)
11520 || ( PL_multi_open == PL_multi_close
11521 && re_reparse && s[1] == '\\')
11522 || memEQ(s + 1, close_delim_str, delim_byte_len)))
11526 else /* any other escapes are simply copied straight through */
11529 else if ( s < PL_bufend - (delim_byte_len - 1)
11530 && memEQ(s, close_delim_str, delim_byte_len)
11531 && --brackets <= 0)
11533 /* Found unescaped closing delimiter, unnested if we care about
11534 * that; so are done.
11536 * In the case of the opening and closing delimiters being
11537 * different, we have to deal with nesting; the conditional
11538 * above makes sure we don't get here until the nesting level,
11539 * 'brackets', is back down to zero. In the other case,
11540 * nesting isn't an issue, and 'brackets' never can get
11541 * incremented above 0, so will come here at the first closing
11544 * Only grapheme delimiters are legal. */
11545 if ( UTF /* All Non-UTF-8's are graphemes */
11546 && UNLIKELY(! is_grapheme((U8 *) start,
11549 close_delim_code)))
11551 yyerror(non_grapheme_msg);
11556 /* No nesting if open eq close */
11557 else if ( PL_multi_open != PL_multi_close
11558 && s < PL_bufend - (delim_byte_len - 1)
11559 && memEQ(s, open_delim_str, delim_byte_len))
11564 /* Here, still in the middle of the string; copy this character */
11565 if (! UTF || UTF8_IS_INVARIANT((U8) *s)) {
11569 size_t this_char_len = UTF8SKIP(s);
11570 Copy(s, to, this_char_len, char);
11571 s += this_char_len;
11572 to += this_char_len;
11576 } /* End of loop through buffer */
11578 /* Here, found end of the string, OR ran out of buffer: terminate the
11579 * copied string and update the sv's end-of-string */
11581 SvCUR_set(sv, to - SvPVX_const(sv));
11584 * this next chunk reads more into the buffer if we're not done yet
11588 break; /* handle case where we are done yet :-) */
11590 #ifndef PERL_STRICT_CR
11591 if (to - SvPVX_const(sv) >= 2) {
11592 if ( (to[-2] == '\r' && to[-1] == '\n')
11593 || (to[-2] == '\n' && to[-1] == '\r'))
11597 SvCUR_set(sv, to - SvPVX_const(sv));
11599 else if (to[-1] == '\r')
11602 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11606 /* if we're out of file, or a read fails, bail and reset the current
11607 line marker so we can report where the unterminated string began
11609 COPLINE_INC_WITH_HERELINES;
11610 PL_bufptr = PL_bufend;
11611 if (!lex_next_chunk(0)) {
11613 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11616 s = start = PL_bufptr;
11617 } /* End of infinite loop */
11619 /* at this point, we have successfully read the delimited string */
11622 sv_catpvn(sv, s, delim_byte_len);
11623 s += delim_byte_len;
11628 PL_multi_end = CopLINE(PL_curcop);
11629 CopLINE_set(PL_curcop, PL_multi_start);
11630 PL_parser->herelines = herelines;
11632 /* if we allocated too much space, give some back */
11633 if (SvCUR(sv) + 5 < SvLEN(sv)) {
11634 SvLEN_set(sv, SvCUR(sv) + 1);
11635 SvPV_shrink_to_cur(sv);
11638 /* decide whether this is the first or second quoted string we've read
11643 PL_parser->lex_sub_repl = sv;
11646 if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-delim_byte_len : s;
11652 takes: pointer to position in buffer
11653 returns: pointer to new position in buffer
11654 side-effects: builds ops for the constant in pl_yylval.op
11656 Read a number in any of the formats that Perl accepts:
11658 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
11659 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
11660 0b[01](_?[01])* binary integers
11661 0o?[0-7](_?[0-7])* octal integers
11662 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* hexadecimal integers
11663 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+ hexadecimal floats
11665 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
11668 If it reads a number without a decimal point or an exponent, it will
11669 try converting the number to an integer and see if it can do so
11670 without loss of precision.
11674 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
11676 const char *s = start; /* current position in buffer */
11677 char *d; /* destination in temp buffer */
11678 char *e; /* end of temp buffer */
11679 NV nv; /* number read, as a double */
11680 SV *sv = NULL; /* place to put the converted number */
11681 bool floatit; /* boolean: int or float? */
11682 const char *lastub = NULL; /* position of last underbar */
11683 static const char* const number_too_long = "Number too long";
11684 bool warned_about_underscore = 0;
11685 I32 shift; /* shift per digit for hex/oct/bin, hoisted here for fp */
11686 #define WARN_ABOUT_UNDERSCORE() \
11688 if (!warned_about_underscore) { \
11689 warned_about_underscore = 1; \
11690 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \
11691 "Misplaced _ in number"); \
11694 /* Hexadecimal floating point.
11696 * In many places (where we have quads and NV is IEEE 754 double)
11697 * we can fit the mantissa bits of a NV into an unsigned quad.
11698 * (Note that UVs might not be quads even when we have quads.)
11699 * This will not work everywhere, though (either no quads, or
11700 * using long doubles), in which case we have to resort to NV,
11701 * which will probably mean horrible loss of precision due to
11702 * multiple fp operations. */
11703 bool hexfp = FALSE;
11704 int total_bits = 0;
11705 int significant_bits = 0;
11706 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
11707 # define HEXFP_UQUAD
11708 Uquad_t hexfp_uquad = 0;
11709 int hexfp_frac_bits = 0;
11714 NV hexfp_mult = 1.0;
11715 UV high_non_zero = 0; /* highest digit */
11716 int non_zero_integer_digits = 0;
11717 bool new_octal = FALSE; /* octal with "0o" prefix */
11719 PERL_ARGS_ASSERT_SCAN_NUM;
11721 /* We use the first character to decide what type of number this is */
11725 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
11727 /* if it starts with a 0, it could be an octal number, a decimal in
11728 0.13 disguise, or a hexadecimal number, or a binary number. */
11732 u holds the "number so far"
11733 overflowed was the number more than we can hold?
11735 Shift is used when we add a digit. It also serves as an "are
11736 we in octal/hex/binary?" indicator to disallow hex characters
11737 when in octal mode.
11741 bool overflowed = FALSE;
11742 bool just_zero = TRUE; /* just plain 0 or binary number? */
11743 bool has_digs = FALSE;
11744 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11745 static const char* const bases[5] =
11746 { "", "binary", "", "octal", "hexadecimal" };
11747 static const char* const Bases[5] =
11748 { "", "Binary", "", "Octal", "Hexadecimal" };
11749 static const char* const maxima[5] =
11751 "0b11111111111111111111111111111111",
11756 /* check for hex */
11757 if (isALPHA_FOLD_EQ(s[1], 'x')) {
11761 } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
11766 /* check for a decimal in disguise */
11767 else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
11769 /* so it must be octal */
11773 if (isALPHA_FOLD_EQ(*s, 'o')) {
11781 WARN_ABOUT_UNDERSCORE();
11785 /* read the rest of the number */
11787 /* x is used in the overflow test,
11788 b is the digit we're adding on. */
11793 /* if we don't mention it, we're done */
11797 /* _ are ignored -- but warned about if consecutive */
11799 if (lastub && s == lastub + 1)
11800 WARN_ABOUT_UNDERSCORE();
11804 /* 8 and 9 are not octal */
11805 case '8': case '9':
11807 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11811 case '2': case '3': case '4':
11812 case '5': case '6': case '7':
11814 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11817 case '0': case '1':
11818 b = *s++ & 15; /* ASCII digit -> value of digit */
11822 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11823 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11824 /* make sure they said 0x */
11827 b = (*s++ & 7) + 9;
11829 /* Prepare to put the digit we have onto the end
11830 of the number so far. We check for overflows.
11837 assert(shift >= 0);
11838 x = u << shift; /* make room for the digit */
11840 total_bits += shift;
11842 if ((x >> shift) != u
11843 && !(PL_hints & HINT_NEW_BINARY)) {
11846 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11847 "Integer overflow in %s number",
11850 u = x | b; /* add the digit to the end */
11853 n *= nvshift[shift];
11854 /* If an NV has not enough bits in its
11855 * mantissa to represent an UV this summing of
11856 * small low-order numbers is a waste of time
11857 * (because the NV cannot preserve the
11858 * low-order bits anyway): we could just
11859 * remember when did we overflow and in the
11860 * end just multiply n by the right
11865 if (high_non_zero == 0 && b > 0)
11869 non_zero_integer_digits++;
11871 /* this could be hexfp, but peek ahead
11872 * to avoid matching ".." */
11873 if (UNLIKELY(HEXFP_PEEK(s))) {
11881 /* if we get here, we had success: make a scalar value from
11886 /* final misplaced underbar check */
11888 WARN_ABOUT_UNDERSCORE();
11890 if (UNLIKELY(HEXFP_PEEK(s))) {
11891 /* Do sloppy (on the underbars) but quick detection
11892 * (and value construction) for hexfp, the decimal
11893 * detection will shortly be more thorough with the
11894 * underbar checks. */
11896 significant_bits = non_zero_integer_digits * shift;
11899 #else /* HEXFP_NV */
11902 /* Ignore the leading zero bits of
11903 * the high (first) non-zero digit. */
11904 if (high_non_zero) {
11905 if (high_non_zero < 0x8)
11906 significant_bits--;
11907 if (high_non_zero < 0x4)
11908 significant_bits--;
11909 if (high_non_zero < 0x2)
11910 significant_bits--;
11917 bool accumulate = TRUE;
11919 int lim = 1 << shift;
11920 for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) ||
11922 if (isXDIGIT(*h)) {
11923 significant_bits += shift;
11926 if (significant_bits < NV_MANT_DIG) {
11927 /* We are in the long "run" of xdigits,
11928 * accumulate the full four bits. */
11929 assert(shift >= 0);
11930 hexfp_uquad <<= shift;
11932 hexfp_frac_bits += shift;
11933 } else if (significant_bits - shift < NV_MANT_DIG) {
11934 /* We are at a hexdigit either at,
11935 * or straddling, the edge of mantissa.
11936 * We will try grabbing as many as
11937 * possible bits. */
11939 significant_bits - NV_MANT_DIG;
11943 hexfp_uquad <<= tail;
11944 assert((shift - tail) >= 0);
11945 hexfp_uquad |= b >> (shift - tail);
11946 hexfp_frac_bits += tail;
11948 /* Ignore the trailing zero bits
11949 * of the last non-zero xdigit.
11951 * The assumption here is that if
11952 * one has input of e.g. the xdigit
11953 * eight (0x8), there is only one
11954 * bit being input, not the full
11955 * four bits. Conversely, if one
11956 * specifies a zero xdigit, the
11957 * assumption is that one really
11958 * wants all those bits to be zero. */
11960 if ((b & 0x1) == 0x0) {
11961 significant_bits--;
11962 if ((b & 0x2) == 0x0) {
11963 significant_bits--;
11964 if ((b & 0x4) == 0x0) {
11965 significant_bits--;
11971 accumulate = FALSE;
11974 /* Keep skipping the xdigits, and
11975 * accumulating the significant bits,
11976 * but do not shift the uquad
11977 * (which would catastrophically drop
11978 * high-order bits) or accumulate the
11979 * xdigits anymore. */
11981 #else /* HEXFP_NV */
11983 nv_mult /= nvshift[shift];
11985 hexfp_nv += b * nv_mult;
11987 accumulate = FALSE;
11991 if (significant_bits >= NV_MANT_DIG)
11992 accumulate = FALSE;
11996 if ((total_bits > 0 || significant_bits > 0) &&
11997 isALPHA_FOLD_EQ(*h, 'p')) {
11998 bool negexp = FALSE;
12002 else if (*h == '-') {
12008 while (isDIGIT(*h) || *h == '_') {
12011 hexfp_exp += *h - '0';
12014 && -hexfp_exp < NV_MIN_EXP - 1) {
12015 /* NOTE: this means that the exponent
12016 * underflow warning happens for
12017 * the IEEE 754 subnormals (denormals),
12018 * because DBL_MIN_EXP etc are the lowest
12019 * possible binary (or, rather, DBL_RADIX-base)
12020 * exponent for normals, not subnormals.
12022 * This may or may not be a good thing. */
12023 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12024 "Hexadecimal float: exponent underflow");
12030 && hexfp_exp > NV_MAX_EXP - 1) {
12031 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12032 "Hexadecimal float: exponent overflow");
12040 hexfp_exp = -hexfp_exp;
12042 hexfp_exp -= hexfp_frac_bits;
12044 hexfp_mult = Perl_pow(2.0, hexfp_exp);
12051 if (!just_zero && !has_digs) {
12052 /* 0x, 0o or 0b with no digits, treat it as an error.
12053 Originally this backed up the parse before the b or
12054 x, but that has the potential for silent changes in
12055 behaviour, like for: "0x.3" and "0x+$foo".
12058 char *oldbp = PL_bufptr;
12059 if (*d) ++d; /* so the user sees the bad non-digit */
12060 PL_bufptr = (char *)d; /* so yyerror reports the context */
12061 yyerror(Perl_form(aTHX_ "No digits found for %s literal",
12067 if (n > 4294967295.0)
12068 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12069 "%s number > %s non-portable",
12071 new_octal ? "0o37777777777" : maxima[shift]);
12076 if (u > 0xffffffff)
12077 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12078 "%s number > %s non-portable",
12080 new_octal ? "0o37777777777" : maxima[shift]);
12084 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
12085 sv = new_constant(start, s - start, "integer",
12086 sv, NULL, NULL, 0, NULL);
12087 else if (PL_hints & HINT_NEW_BINARY)
12088 sv = new_constant(start, s - start, "binary",
12089 sv, NULL, NULL, 0, NULL);
12094 handle decimal numbers.
12095 we're also sent here when we read a 0 as the first digit
12097 case '1': case '2': case '3': case '4': case '5':
12098 case '6': case '7': case '8': case '9': case '.':
12101 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
12124 NOT_REACHED; /* NOTREACHED */
12128 /* read next group of digits and _ and copy into d */
12131 || UNLIKELY(hexfp && isXDIGIT(*s)))
12133 /* skip underscores, checking for misplaced ones
12137 if (lastub && s == lastub + 1)
12138 WARN_ABOUT_UNDERSCORE();
12142 /* check for end of fixed-length buffer */
12144 Perl_croak(aTHX_ "%s", number_too_long);
12145 /* if we're ok, copy the character */
12150 /* final misplaced underbar check */
12151 if (lastub && s == lastub + 1)
12152 WARN_ABOUT_UNDERSCORE();
12154 /* read a decimal portion if there is one. avoid
12155 3..5 being interpreted as the number 3. followed
12158 if (*s == '.' && s[1] != '.') {
12163 WARN_ABOUT_UNDERSCORE();
12167 /* copy, ignoring underbars, until we run out of digits.
12171 || UNLIKELY(hexfp && isXDIGIT(*s));
12174 /* fixed length buffer check */
12176 Perl_croak(aTHX_ "%s", number_too_long);
12178 if (lastub && s == lastub + 1)
12179 WARN_ABOUT_UNDERSCORE();
12185 /* fractional part ending in underbar? */
12187 WARN_ABOUT_UNDERSCORE();
12188 if (*s == '.' && isDIGIT(s[1])) {
12189 /* oops, it's really a v-string, but without the "v" */
12195 /* read exponent part, if present */
12196 if ((isALPHA_FOLD_EQ(*s, 'e')
12197 || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
12198 && memCHRs("+-0123456789_", s[1]))
12200 int exp_digits = 0;
12201 const char *save_s = s;
12204 /* regardless of whether user said 3E5 or 3e5, use lower 'e',
12205 ditto for p (hexfloats) */
12206 if ((isALPHA_FOLD_EQ(*s, 'e'))) {
12207 /* At least some Mach atof()s don't grok 'E' */
12210 else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
12217 /* stray preinitial _ */
12219 WARN_ABOUT_UNDERSCORE();
12223 /* allow positive or negative exponent */
12224 if (*s == '+' || *s == '-')
12227 /* stray initial _ */
12229 WARN_ABOUT_UNDERSCORE();
12233 /* read digits of exponent */
12234 while (isDIGIT(*s) || *s == '_') {
12238 Perl_croak(aTHX_ "%s", number_too_long);
12242 if (((lastub && s == lastub + 1)
12243 || (!isDIGIT(s[1]) && s[1] != '_')))
12244 WARN_ABOUT_UNDERSCORE();
12250 /* no exponent digits, the [eEpP] could be for something else,
12251 * though in practice we don't get here for p since that's preparsed
12252 * earlier, and results in only the 0xX being consumed, so behave similarly
12253 * for decimal floats and consume only the D.DD, leaving the [eE] to the
12266 We try to do an integer conversion first if no characters
12267 indicating "float" have been found.
12272 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
12274 if (flags == IS_NUMBER_IN_UV) {
12276 sv = newSViv(uv); /* Prefer IVs over UVs. */
12279 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12280 if (uv <= (UV) IV_MIN)
12281 sv = newSViv(-(IV)uv);
12288 /* terminate the string */
12290 if (UNLIKELY(hexfp)) {
12291 # ifdef NV_MANT_DIG
12292 if (significant_bits > NV_MANT_DIG)
12293 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12294 "Hexadecimal float: mantissa overflow");
12297 nv = hexfp_uquad * hexfp_mult;
12298 #else /* HEXFP_NV */
12299 nv = hexfp_nv * hexfp_mult;
12302 nv = Atof(PL_tokenbuf);
12308 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12309 const char *const key = floatit ? "float" : "integer";
12310 const STRLEN keylen = floatit ? 5 : 7;
12311 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12312 key, keylen, sv, NULL, NULL, 0, NULL);
12316 /* if it starts with a v, it could be a v-string */
12319 sv = newSV(5); /* preallocate storage space */
12320 ENTER_with_name("scan_vstring");
12322 s = scan_vstring(s, PL_bufend, sv);
12323 SvREFCNT_inc_simple_void_NN(sv);
12324 LEAVE_with_name("scan_vstring");
12328 /* make the op for the constant and return */
12331 lvalp->opval = newSVOP(OP_CONST, 0, sv);
12333 lvalp->opval = NULL;
12339 S_scan_formline(pTHX_ char *s)
12341 SV * const stuff = newSVpvs("");
12342 bool needargs = FALSE;
12343 bool eofmt = FALSE;
12345 PERL_ARGS_ASSERT_SCAN_FORMLINE;
12347 while (!needargs) {
12351 #ifdef PERL_STRICT_CR
12352 while (SPACE_OR_TAB(*t))
12355 while (SPACE_OR_TAB(*t) || *t == '\r')
12358 if (*t == '\n' || t == PL_bufend) {
12363 eol = (char *) memchr(s,'\n',PL_bufend-s);
12372 for (t = s; t < eol; t++) {
12373 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12375 goto enough; /* ~~ must be first line in formline */
12377 if (*t == '@' || *t == '^')
12381 sv_catpvn(stuff, s, eol-s);
12382 #ifndef PERL_STRICT_CR
12383 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12384 char *end = SvPVX(stuff) + SvCUR(stuff);
12387 SvCUR_set(stuff, SvCUR(stuff) - 1);
12395 if ((PL_rsfp || PL_parser->filtered)
12396 && PL_parser->form_lex_state == LEX_NORMAL) {
12398 PL_bufptr = PL_bufend;
12399 COPLINE_INC_WITH_HERELINES;
12400 got_some = lex_next_chunk(0);
12401 CopLINE_dec(PL_curcop);
12406 incline(s, PL_bufend);
12409 if (!SvCUR(stuff) || needargs)
12410 PL_lex_state = PL_parser->form_lex_state;
12411 if (SvCUR(stuff)) {
12412 PL_expect = XSTATE;
12414 const char *s2 = s;
12415 while (isSPACE(*s2) && *s2 != '\n')
12418 PL_expect = XTERMBLOCK;
12419 NEXTVAL_NEXTTOKE.ival = 0;
12422 NEXTVAL_NEXTTOKE.ival = 0;
12423 force_next(FORMLBRACK);
12426 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12429 NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff);
12433 SvREFCNT_dec(stuff);
12435 PL_lex_formbrack = 0;
12441 =for apidoc start_subparse
12443 Set things up for parsing a subroutine.
12445 If C<is_format> is non-zero, the input is to be considered a format sub
12446 (a specialised sub used to implement perl's C<format> feature); else a
12449 C<flags> are added to the flags for C<PL_compcv>.
12451 This returns the value of C<PL_savestack_ix> that was in effect upon entry to
12458 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12460 const I32 oldsavestack_ix = PL_savestack_ix;
12461 CV* const outsidecv = PL_compcv;
12463 SAVEI32(PL_subline);
12464 save_item(PL_subname);
12465 SAVESPTR(PL_compcv);
12467 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
12468 CvFLAGS(PL_compcv) |= flags;
12470 PL_subline = CopLINE(PL_curcop);
12471 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12472 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
12473 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12474 if (outsidecv && CvPADLIST(outsidecv))
12475 CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
12477 return oldsavestack_ix;
12481 /* Do extra initialisation of a CV (typically one just created by
12482 * start_subparse()) if that CV is for a named sub
12486 Perl_init_named_cv(pTHX_ CV *cv, OP *nameop)
12488 PERL_ARGS_ASSERT_INIT_NAMED_CV;
12490 if (nameop->op_type == OP_CONST) {
12491 const char *const name = SvPV_nolen_const(((SVOP*)nameop)->op_sv);
12492 if ( strEQ(name, "BEGIN")
12493 || strEQ(name, "END")
12494 || strEQ(name, "INIT")
12495 || strEQ(name, "CHECK")
12496 || strEQ(name, "UNITCHECK")
12501 /* State subs inside anonymous subs need to be
12502 clonable themselves. */
12503 if ( CvANON(CvOUTSIDE(cv))
12504 || CvCLONE(CvOUTSIDE(cv))
12505 || !PadnameIsSTATE(PadlistNAMESARRAY(CvPADLIST(
12507 ))[nameop->op_targ])
12514 S_yywarn(pTHX_ const char *const s, U32 flags)
12516 PERL_ARGS_ASSERT_YYWARN;
12518 PL_in_eval |= EVAL_WARNONLY;
12519 yyerror_pv(s, flags);
12524 Perl_abort_execution(pTHX_ const char * const msg, const char * const name)
12526 PERL_ARGS_ASSERT_ABORT_EXECUTION;
12529 Perl_croak(aTHX_ "%s%s had compilation errors.\n", msg, name);
12532 "%sExecution of %s aborted due to compilation errors.\n", msg, name);
12534 NOT_REACHED; /* NOTREACHED */
12540 /* Called, after at least one error has been found, to abort the parse now,
12541 * instead of trying to forge ahead */
12543 yyerror_pvn(NULL, 0, 0);
12547 Perl_yyerror(pTHX_ const char *const s)
12549 PERL_ARGS_ASSERT_YYERROR;
12550 return yyerror_pvn(s, strlen(s), 0);
12554 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
12556 PERL_ARGS_ASSERT_YYERROR_PV;
12557 return yyerror_pvn(s, strlen(s), flags);
12561 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
12563 const char *context = NULL;
12566 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
12567 int yychar = PL_parser->yychar;
12569 /* Output error message 's' with length 'len'. 'flags' are SV flags that
12570 * apply. If the number of errors found is large enough, it abandons
12571 * parsing. If 's' is NULL, there is no message, and it abandons
12572 * processing unconditionally */
12575 if (!yychar || (yychar == PERLY_SEMICOLON && !PL_rsfp))
12576 sv_catpvs(where_sv, "at EOF");
12577 else if ( PL_oldoldbufptr
12578 && PL_bufptr > PL_oldoldbufptr
12579 && PL_bufptr - PL_oldoldbufptr < 200
12580 && PL_oldoldbufptr != PL_oldbufptr
12581 && PL_oldbufptr != PL_bufptr)
12583 while (isSPACE(*PL_oldoldbufptr))
12585 context = PL_oldoldbufptr;
12586 contlen = PL_bufptr - PL_oldoldbufptr;
12588 else if ( PL_oldbufptr
12589 && PL_bufptr > PL_oldbufptr
12590 && PL_bufptr - PL_oldbufptr < 200
12591 && PL_oldbufptr != PL_bufptr)
12593 while (isSPACE(*PL_oldbufptr))
12595 context = PL_oldbufptr;
12596 contlen = PL_bufptr - PL_oldbufptr;
12598 else if (yychar > 255)
12599 sv_catpvs(where_sv, "next token ???");
12600 else if (yychar == YYEMPTY) {
12601 if (PL_lex_state == LEX_NORMAL)
12602 sv_catpvs(where_sv, "at end of line");
12603 else if (PL_lex_inpat)
12604 sv_catpvs(where_sv, "within pattern");
12606 sv_catpvs(where_sv, "within string");
12609 sv_catpvs(where_sv, "next char ");
12611 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12612 else if (isPRINT_LC(yychar)) {
12613 const char string = yychar;
12614 sv_catpvn(where_sv, &string, 1);
12617 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12619 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
12620 Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ",
12621 OutCopFILE(PL_curcop),
12622 (IV)(PL_parser->preambling == NOLINE
12623 ? CopLINE(PL_curcop)
12624 : PL_parser->preambling));
12626 Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
12627 UTF8fARG(UTF, contlen, context));
12629 Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
12630 if ( PL_multi_start < PL_multi_end
12631 && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1)
12633 Perl_sv_catpvf(aTHX_ msg,
12634 " (Might be a runaway multi-line %c%c string starting on"
12635 " line %" IVdf ")\n",
12636 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
12639 if (PL_in_eval & EVAL_WARNONLY) {
12640 PL_in_eval &= ~EVAL_WARNONLY;
12641 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
12647 if (s == NULL || PL_error_count >= 10) {
12648 const char * msg = "";
12649 const char * const name = OutCopFILE(PL_curcop);
12652 SV * errsv = ERRSV;
12653 if (SvCUR(errsv)) {
12654 msg = Perl_form(aTHX_ "%" SVf, SVfARG(errsv));
12659 abort_execution(msg, name);
12662 Perl_croak(aTHX_ "%s%s has too many errors.\n", msg, name);
12666 PL_in_my_stash = NULL;
12671 S_swallow_bom(pTHX_ U8 *s)
12673 const STRLEN slen = SvCUR(PL_linestr);
12675 PERL_ARGS_ASSERT_SWALLOW_BOM;
12679 if (s[1] == 0xFE) {
12680 /* UTF-16 little-endian? (or UTF-32LE?) */
12681 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
12682 /* diag_listed_as: Unsupported script encoding %s */
12683 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
12684 #ifndef PERL_NO_UTF16_FILTER
12686 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
12689 if (PL_bufend > (char*)s) {
12690 s = add_utf16_textfilter(s, TRUE);
12693 /* diag_listed_as: Unsupported script encoding %s */
12694 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
12699 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
12700 #ifndef PERL_NO_UTF16_FILTER
12702 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12705 if (PL_bufend > (char *)s) {
12706 s = add_utf16_textfilter(s, FALSE);
12709 /* diag_listed_as: Unsupported script encoding %s */
12710 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
12714 case BOM_UTF8_FIRST_BYTE: {
12715 if (memBEGINs(s+1, slen - 1, BOM_UTF8_TAIL)) {
12717 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12719 s += sizeof(BOM_UTF8) - 1; /* UTF-8 */
12726 if (s[2] == 0xFE && s[3] == 0xFF) {
12727 /* UTF-32 big-endian */
12728 /* diag_listed_as: Unsupported script encoding %s */
12729 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
12732 else if (s[2] == 0 && s[3] != 0) {
12735 * are a good indicator of UTF-16BE. */
12736 #ifndef PERL_NO_UTF16_FILTER
12738 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
12740 s = add_utf16_textfilter(s, FALSE);
12742 /* diag_listed_as: Unsupported script encoding %s */
12743 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
12750 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12753 * are a good indicator of UTF-16LE. */
12754 #ifndef PERL_NO_UTF16_FILTER
12756 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12758 s = add_utf16_textfilter(s, TRUE);
12760 /* diag_listed_as: Unsupported script encoding %s */
12761 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
12769 #ifndef PERL_NO_UTF16_FILTER
12771 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12773 SV *const filter = FILTER_DATA(idx);
12774 /* We re-use this each time round, throwing the contents away before we
12776 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
12777 SV *const utf8_buffer = filter;
12778 IV status = IoPAGE(filter);
12779 const bool reverse = cBOOL(IoLINES(filter));
12782 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
12784 /* As we're automatically added, at the lowest level, and hence only called
12785 from this file, we can be sure that we're not called in block mode. Hence
12786 don't bother writing code to deal with block mode. */
12788 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
12791 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
12793 DEBUG_P(PerlIO_printf(Perl_debug_log,
12794 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
12795 FPTR2DPTR(void *, S_utf16_textfilter),
12796 reverse ? 'l' : 'b', idx, maxlen, status,
12797 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12804 /* First, look in our buffer of existing UTF-8 data: */
12805 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
12809 } else if (status == 0) {
12811 IoPAGE(filter) = 0;
12812 nl = SvEND(utf8_buffer);
12815 STRLEN got = nl - SvPVX(utf8_buffer);
12816 /* Did we have anything to append? */
12818 sv_catpvn(sv, SvPVX(utf8_buffer), got);
12819 /* Everything else in this code works just fine if SVp_POK isn't
12820 set. This, however, needs it, and we need it to work, else
12821 we loop infinitely because the buffer is never consumed. */
12822 sv_chop(utf8_buffer, nl);
12826 /* OK, not a complete line there, so need to read some more UTF-16.
12827 Read an extra octect if the buffer currently has an odd number. */
12831 if (SvCUR(utf16_buffer) >= 2) {
12832 /* Location of the high octet of the last complete code point.
12833 Gosh, UTF-16 is a pain. All the benefits of variable length,
12834 *coupled* with all the benefits of partial reads and
12836 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
12837 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
12839 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
12843 /* We have the first half of a surrogate. Read more. */
12844 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
12847 status = FILTER_READ(idx + 1, utf16_buffer,
12848 160 + (SvCUR(utf16_buffer) & 1));
12849 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
12850 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
12853 IoPAGE(filter) = status;
12858 /* 'chars' isn't quite the right name, as code points above 0xFFFF
12859 * require 4 bytes per char */
12860 chars = SvCUR(utf16_buffer) >> 1;
12861 have = SvCUR(utf8_buffer);
12863 /* Assume the worst case size as noted by the functions: twice the
12864 * number of input bytes */
12865 SvGROW(utf8_buffer, have + chars * 4 + 1);
12868 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
12869 (U8*)SvPVX_const(utf8_buffer) + have,
12870 chars * 2, &newlen);
12872 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
12873 (U8*)SvPVX_const(utf8_buffer) + have,
12874 chars * 2, &newlen);
12876 SvCUR_set(utf8_buffer, have + newlen);
12879 /* No need to keep this SV "well-formed" with a '\0' after the end, as
12880 it's private to us, and utf16_to_utf8{,reversed} take a
12881 (pointer,length) pair, rather than a NUL-terminated string. */
12882 if(SvCUR(utf16_buffer) & 1) {
12883 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
12884 SvCUR_set(utf16_buffer, 1);
12886 SvCUR_set(utf16_buffer, 0);
12889 DEBUG_P(PerlIO_printf(Perl_debug_log,
12890 "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
12892 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12893 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
12898 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
12900 SV *filter = filter_add(S_utf16_textfilter, NULL);
12902 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
12904 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
12906 IoLINES(filter) = reversed;
12907 IoPAGE(filter) = 1; /* Not EOF */
12909 /* Sadly, we have to return a valid pointer, come what may, so we have to
12910 ignore any error return from this. */
12911 SvCUR_set(PL_linestr, 0);
12912 if (FILTER_READ(0, PL_linestr, 0)) {
12913 SvUTF8_on(PL_linestr);
12915 SvUTF8_on(PL_linestr);
12917 PL_bufend = SvEND(PL_linestr);
12918 return (U8*)SvPVX(PL_linestr);
12923 =for apidoc scan_vstring
12925 Returns a pointer to the next character after the parsed
12926 vstring, as well as updating the passed in sv.
12928 Function must be called like
12930 sv = sv_2mortal(newSV(5));
12931 s = scan_vstring(s,e,sv);
12933 where s and e are the start and end of the string.
12934 The sv should already be large enough to store the vstring
12935 passed in, for performance reasons.
12937 This function may croak if fatal warnings are enabled in the
12938 calling scope, hence the sv_2mortal in the example (to prevent
12939 a leak). Make sure to do SvREFCNT_inc afterwards if you use
12946 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
12948 const char *pos = s;
12949 const char *start = s;
12951 PERL_ARGS_ASSERT_SCAN_VSTRING;
12953 if (*pos == 'v') pos++; /* get past 'v' */
12954 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12956 if ( *pos != '.') {
12957 /* this may not be a v-string if followed by => */
12958 const char *next = pos;
12959 while (next < e && isSPACE(*next))
12961 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
12962 /* return string not v-string */
12963 sv_setpvn(sv,(char *)s,pos-s);
12964 return (char *)pos;
12968 if (!isALPHA(*pos)) {
12969 U8 tmpbuf[UTF8_MAXBYTES+1];
12972 s++; /* get past 'v' */
12977 /* this is atoi() that tolerates underscores */
12980 const char *end = pos;
12982 while (--end >= s) {
12984 const UV orev = rev;
12985 rev += (*end - '0') * mult;
12988 /* diag_listed_as: Integer overflow in %s number */
12989 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12990 "Integer overflow in decimal number");
12994 /* Append native character for the rev point */
12995 tmpend = uvchr_to_utf8(tmpbuf, rev);
12996 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12997 if (!UVCHR_IS_INVARIANT(rev))
12999 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
13005 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
13009 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13016 Perl_keyword_plugin_standard(pTHX_
13017 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13019 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13020 PERL_UNUSED_CONTEXT;
13021 PERL_UNUSED_ARG(keyword_ptr);
13022 PERL_UNUSED_ARG(keyword_len);
13023 PERL_UNUSED_ARG(op_ptr);
13024 return KEYWORD_PLUGIN_DECLINE;
13028 =for apidoc_section $lexer
13029 =for apidoc wrap_keyword_plugin
13031 Puts a C function into the chain of keyword plugins. This is the
13032 preferred way to manipulate the L</PL_keyword_plugin> variable.
13033 C<new_plugin> is a pointer to the C function that is to be added to the
13034 keyword plugin chain, and C<old_plugin_p> points to the storage location
13035 where a pointer to the next function in the chain will be stored. The
13036 value of C<new_plugin> is written into the L</PL_keyword_plugin> variable,
13037 while the value previously stored there is written to C<*old_plugin_p>.
13039 L</PL_keyword_plugin> is global to an entire process, and a module wishing
13040 to hook keyword parsing may find itself invoked more than once per
13041 process, typically in different threads. To handle that situation, this
13042 function is idempotent. The location C<*old_plugin_p> must initially
13043 (once per process) contain a null pointer. A C variable of static
13044 duration (declared at file scope, typically also marked C<static> to give
13045 it internal linkage) will be implicitly initialised appropriately, if it
13046 does not have an explicit initialiser. This function will only actually
13047 modify the plugin chain if it finds C<*old_plugin_p> to be null. This
13048 function is also thread safe on the small scale. It uses appropriate
13049 locking to avoid race conditions in accessing L</PL_keyword_plugin>.
13051 When this function is called, the function referenced by C<new_plugin>
13052 must be ready to be called, except for C<*old_plugin_p> being unfilled.
13053 In a threading situation, C<new_plugin> may be called immediately, even
13054 before this function has returned. C<*old_plugin_p> will always be
13055 appropriately set before C<new_plugin> is called. If C<new_plugin>
13056 decides not to do anything special with the identifier that it is given
13057 (which is the usual case for most calls to a keyword plugin), it must
13058 chain the plugin function referenced by C<*old_plugin_p>.
13060 Taken all together, XS code to install a keyword plugin should typically
13061 look something like this:
13063 static Perl_keyword_plugin_t next_keyword_plugin;
13064 static OP *my_keyword_plugin(pTHX_
13065 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13067 if (memEQs(keyword_ptr, keyword_len,
13068 "my_new_keyword")) {
13071 return next_keyword_plugin(aTHX_
13072 keyword_ptr, keyword_len, op_ptr);
13076 wrap_keyword_plugin(my_keyword_plugin,
13077 &next_keyword_plugin);
13079 Direct access to L</PL_keyword_plugin> should be avoided.
13085 Perl_wrap_keyword_plugin(pTHX_
13086 Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p)
13089 PERL_UNUSED_CONTEXT;
13090 PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN;
13091 if (*old_plugin_p) return;
13092 KEYWORD_PLUGIN_MUTEX_LOCK;
13093 if (!*old_plugin_p) {
13094 *old_plugin_p = PL_keyword_plugin;
13095 PL_keyword_plugin = new_plugin;
13097 KEYWORD_PLUGIN_MUTEX_UNLOCK;
13100 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
13102 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
13104 SAVEI32(PL_lex_brackets);
13105 if (PL_lex_brackets > 100)
13106 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
13107 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
13108 SAVEI32(PL_lex_allbrackets);
13109 PL_lex_allbrackets = 0;
13110 SAVEI8(PL_lex_fakeeof);
13111 PL_lex_fakeeof = (U8)fakeeof;
13112 if(yyparse(gramtype) && !PL_parser->error_count)
13113 qerror(Perl_mess(aTHX_ "Parse error"));
13116 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
13118 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
13122 SAVEVPTR(PL_eval_root);
13123 PL_eval_root = NULL;
13124 parse_recdescent(gramtype, fakeeof);
13130 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
13132 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
13135 if (flags & ~PARSE_OPTIONAL)
13136 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
13137 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
13138 if (!exprop && !(flags & PARSE_OPTIONAL)) {
13139 if (!PL_parser->error_count)
13140 qerror(Perl_mess(aTHX_ "Parse error"));
13141 exprop = newOP(OP_NULL, 0);
13147 =for apidoc parse_arithexpr
13149 Parse a Perl arithmetic expression. This may contain operators of precedence
13150 down to the bit shift operators. The expression must be followed (and thus
13151 terminated) either by a comparison or lower-precedence operator or by
13152 something that would normally terminate an expression such as semicolon.
13153 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13154 otherwise it is mandatory. It is up to the caller to ensure that the
13155 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13156 the source of the code to be parsed and the lexical context for the
13159 The op tree representing the expression is returned. If an optional
13160 expression is absent, a null pointer is returned, otherwise the pointer
13163 If an error occurs in parsing or compilation, in most cases a valid op
13164 tree is returned anyway. The error is reflected in the parser state,
13165 normally resulting in a single exception at the top level of parsing
13166 which covers all the compilation errors that occurred. Some compilation
13167 errors, however, will throw an exception immediately.
13169 =for apidoc Amnh||PARSE_OPTIONAL
13176 Perl_parse_arithexpr(pTHX_ U32 flags)
13178 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
13182 =for apidoc parse_termexpr
13184 Parse a Perl term expression. This may contain operators of precedence
13185 down to the assignment operators. The expression must be followed (and thus
13186 terminated) either by a comma or lower-precedence operator or by
13187 something that would normally terminate an expression such as semicolon.
13188 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13189 otherwise it is mandatory. It is up to the caller to ensure that the
13190 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13191 the source of the code to be parsed and the lexical context for the
13194 The op tree representing the expression is returned. If an optional
13195 expression is absent, a null pointer is returned, otherwise the pointer
13198 If an error occurs in parsing or compilation, in most cases a valid op
13199 tree is returned anyway. The error is reflected in the parser state,
13200 normally resulting in a single exception at the top level of parsing
13201 which covers all the compilation errors that occurred. Some compilation
13202 errors, however, will throw an exception immediately.
13208 Perl_parse_termexpr(pTHX_ U32 flags)
13210 return parse_expr(LEX_FAKEEOF_COMMA, flags);
13214 =for apidoc parse_listexpr
13216 Parse a Perl list expression. This may contain operators of precedence
13217 down to the comma operator. The expression must be followed (and thus
13218 terminated) either by a low-precedence logic operator such as C<or> or by
13219 something that would normally terminate an expression such as semicolon.
13220 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
13221 otherwise it is mandatory. It is up to the caller to ensure that the
13222 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
13223 the source of the code to be parsed and the lexical context for the
13226 The op tree representing the expression is returned. If an optional
13227 expression is absent, a null pointer is returned, otherwise the pointer
13230 If an error occurs in parsing or compilation, in most cases a valid op
13231 tree is returned anyway. The error is reflected in the parser state,
13232 normally resulting in a single exception at the top level of parsing
13233 which covers all the compilation errors that occurred. Some compilation
13234 errors, however, will throw an exception immediately.
13240 Perl_parse_listexpr(pTHX_ U32 flags)
13242 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
13246 =for apidoc parse_fullexpr
13248 Parse a single complete Perl expression. This allows the full
13249 expression grammar, including the lowest-precedence operators such
13250 as C<or>. The expression must be followed (and thus terminated) by a
13251 token that an expression would normally be terminated by: end-of-file,
13252 closing bracketing punctuation, semicolon, or one of the keywords that
13253 signals a postfix expression-statement modifier. If C<flags> has the
13254 C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
13255 mandatory. It is up to the caller to ensure that the dynamic parser
13256 state (L</PL_parser> et al) is correctly set to reflect the source of
13257 the code to be parsed and the lexical context for the expression.
13259 The op tree representing the expression is returned. If an optional
13260 expression is absent, a null pointer is returned, otherwise the pointer
13263 If an error occurs in parsing or compilation, in most cases a valid op
13264 tree is returned anyway. The error is reflected in the parser state,
13265 normally resulting in a single exception at the top level of parsing
13266 which covers all the compilation errors that occurred. Some compilation
13267 errors, however, will throw an exception immediately.
13273 Perl_parse_fullexpr(pTHX_ U32 flags)
13275 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
13279 =for apidoc parse_block
13281 Parse a single complete Perl code block. This consists of an opening
13282 brace, a sequence of statements, and a closing brace. The block
13283 constitutes a lexical scope, so C<my> variables and various compile-time
13284 effects can be contained within it. It is up to the caller to ensure
13285 that the dynamic parser state (L</PL_parser> et al) is correctly set to
13286 reflect the source of the code to be parsed and the lexical context for
13289 The op tree representing the code block is returned. This is always a
13290 real op, never a null pointer. It will normally be a C<lineseq> list,
13291 including C<nextstate> or equivalent ops. No ops to construct any kind
13292 of runtime scope are included by virtue of it being a block.
13294 If an error occurs in parsing or compilation, in most cases a valid op
13295 tree (most likely null) is returned anyway. The error is reflected in
13296 the parser state, normally resulting in a single exception at the top
13297 level of parsing which covers all the compilation errors that occurred.
13298 Some compilation errors, however, will throw an exception immediately.
13300 The C<flags> parameter is reserved for future use, and must always
13307 Perl_parse_block(pTHX_ U32 flags)
13310 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
13311 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
13315 =for apidoc parse_barestmt
13317 Parse a single unadorned Perl statement. This may be a normal imperative
13318 statement or a declaration that has compile-time effect. It does not
13319 include any label or other affixture. It is up to the caller to ensure
13320 that the dynamic parser state (L</PL_parser> et al) is correctly set to
13321 reflect the source of the code to be parsed and the lexical context for
13324 The op tree representing the statement is returned. This may be a
13325 null pointer if the statement is null, for example if it was actually
13326 a subroutine definition (which has compile-time side effects). If not
13327 null, it will be ops directly implementing the statement, suitable to
13328 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
13329 equivalent op (except for those embedded in a scope contained entirely
13330 within the statement).
13332 If an error occurs in parsing or compilation, in most cases a valid op
13333 tree (most likely null) is returned anyway. The error is reflected in
13334 the parser state, normally resulting in a single exception at the top
13335 level of parsing which covers all the compilation errors that occurred.
13336 Some compilation errors, however, will throw an exception immediately.
13338 The C<flags> parameter is reserved for future use, and must always
13345 Perl_parse_barestmt(pTHX_ U32 flags)
13348 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
13349 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
13353 =for apidoc parse_label
13355 Parse a single label, possibly optional, of the type that may prefix a
13356 Perl statement. It is up to the caller to ensure that the dynamic parser
13357 state (L</PL_parser> et al) is correctly set to reflect the source of
13358 the code to be parsed. If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
13359 label is optional, otherwise it is mandatory.
13361 The name of the label is returned in the form of a fresh scalar. If an
13362 optional label is absent, a null pointer is returned.
13364 If an error occurs in parsing, which can only occur if the label is
13365 mandatory, a valid label is returned anyway. The error is reflected in
13366 the parser state, normally resulting in a single exception at the top
13367 level of parsing which covers all the compilation errors that occurred.
13373 Perl_parse_label(pTHX_ U32 flags)
13375 if (flags & ~PARSE_OPTIONAL)
13376 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
13378 PL_parser->yychar = yylex();
13379 if (PL_parser->yychar == LABEL) {
13380 SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv;
13381 PL_parser->yychar = YYEMPTY;
13382 cSVOPx(pl_yylval.opval)->op_sv = NULL;
13383 op_free(pl_yylval.opval);
13391 STRLEN wlen, bufptr_pos;
13394 if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
13396 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
13397 if (word_takes_any_delimiter(s, wlen))
13399 bufptr_pos = s - SvPVX(PL_linestr);
13401 lex_read_space(LEX_KEEP_PREVIOUS);
13403 s = SvPVX(PL_linestr) + bufptr_pos;
13404 if (t[0] == ':' && t[1] != ':') {
13405 PL_oldoldbufptr = PL_oldbufptr;
13408 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
13412 if (flags & PARSE_OPTIONAL) {
13415 qerror(Perl_mess(aTHX_ "Parse error"));
13416 return newSVpvs("x");
13423 =for apidoc parse_fullstmt
13425 Parse a single complete Perl statement. This may be a normal imperative
13426 statement or a declaration that has compile-time effect, and may include
13427 optional labels. It is up to the caller to ensure that the dynamic
13428 parser state (L</PL_parser> et al) is correctly set to reflect the source
13429 of the code to be parsed and the lexical context for the statement.
13431 The op tree representing the statement is returned. This may be a
13432 null pointer if the statement is null, for example if it was actually
13433 a subroutine definition (which has compile-time side effects). If not
13434 null, it will be the result of a L</newSTATEOP> call, normally including
13435 a C<nextstate> or equivalent op.
13437 If an error occurs in parsing or compilation, in most cases a valid op
13438 tree (most likely null) is returned anyway. The error is reflected in
13439 the parser state, normally resulting in a single exception at the top
13440 level of parsing which covers all the compilation errors that occurred.
13441 Some compilation errors, however, will throw an exception immediately.
13443 The C<flags> parameter is reserved for future use, and must always
13450 Perl_parse_fullstmt(pTHX_ U32 flags)
13453 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
13454 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
13458 =for apidoc parse_stmtseq
13460 Parse a sequence of zero or more Perl statements. These may be normal
13461 imperative statements, including optional labels, or declarations
13462 that have compile-time effect, or any mixture thereof. The statement
13463 sequence ends when a closing brace or end-of-file is encountered in a
13464 place where a new statement could have validly started. It is up to
13465 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
13466 is correctly set to reflect the source of the code to be parsed and the
13467 lexical context for the statements.
13469 The op tree representing the statement sequence is returned. This may
13470 be a null pointer if the statements were all null, for example if there
13471 were no statements or if there were only subroutine definitions (which
13472 have compile-time side effects). If not null, it will be a C<lineseq>
13473 list, normally including C<nextstate> or equivalent ops.
13475 If an error occurs in parsing or compilation, in most cases a valid op
13476 tree is returned anyway. The error is reflected in the parser state,
13477 normally resulting in a single exception at the top level of parsing
13478 which covers all the compilation errors that occurred. Some compilation
13479 errors, however, will throw an exception immediately.
13481 The C<flags> parameter is reserved for future use, and must always
13488 Perl_parse_stmtseq(pTHX_ U32 flags)
13493 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
13494 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
13495 c = lex_peek_unichar(0);
13496 if (c != -1 && c != /*{*/'}')
13497 qerror(Perl_mess(aTHX_ "Parse error"));
13502 =for apidoc parse_subsignature
13504 Parse a subroutine signature declaration. This is the contents of the
13505 parentheses following a named or anonymous subroutine declaration when the
13506 C<signatures> feature is enabled. Note that this function neither expects
13507 nor consumes the opening and closing parentheses around the signature; it
13508 is the caller's job to handle these.
13510 This function must only be called during parsing of a subroutine; after
13511 L</start_subparse> has been called. It might allocate lexical variables on
13512 the pad for the current subroutine.
13514 The op tree to unpack the arguments from the stack at runtime is returned.
13515 This op tree should appear at the beginning of the compiled function. The
13516 caller may wish to use L</op_append_list> to build their function body
13517 after it, or splice it together with the body before calling L</newATTRSUB>.
13519 The C<flags> parameter is reserved for future use, and must always
13526 Perl_parse_subsignature(pTHX_ U32 flags)
13529 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_subsignature");
13530 return parse_recdescent_for_op(GRAMSUBSIGNATURE, LEX_FAKEEOF_NONEXPR);
13534 * ex: set ts=8 sts=4 sw=4 et: