3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 =head1 Lexer interface
26 This is the lower layer of the Perl parser, managing characters and tokens.
28 =for apidoc AmU|yy_parser *|PL_parser
30 Pointer to a structure encapsulating the state of the parsing operation
31 currently in progress. The pointer can be locally changed to perform
32 a nested parse without interfering with the state of an outer parse.
33 Individual members of C<PL_parser> have their own documentation.
39 #define PERL_IN_TOKE_C
41 #include "dquote_inline.h"
42 #include "invlist_inline.h"
44 #define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
47 #define pl_yylval (PL_parser->yylval)
49 /* XXX temporary backwards compatibility */
50 #define PL_lex_brackets (PL_parser->lex_brackets)
51 #define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52 #define PL_lex_fakeeof (PL_parser->lex_fakeeof)
53 #define PL_lex_brackstack (PL_parser->lex_brackstack)
54 #define PL_lex_casemods (PL_parser->lex_casemods)
55 #define PL_lex_casestack (PL_parser->lex_casestack)
56 #define PL_lex_dojoin (PL_parser->lex_dojoin)
57 #define PL_lex_formbrack (PL_parser->lex_formbrack)
58 #define PL_lex_inpat (PL_parser->lex_inpat)
59 #define PL_lex_inwhat (PL_parser->lex_inwhat)
60 #define PL_lex_op (PL_parser->lex_op)
61 #define PL_lex_repl (PL_parser->lex_repl)
62 #define PL_lex_starts (PL_parser->lex_starts)
63 #define PL_lex_stuff (PL_parser->lex_stuff)
64 #define PL_multi_start (PL_parser->multi_start)
65 #define PL_multi_open (PL_parser->multi_open)
66 #define PL_multi_close (PL_parser->multi_close)
67 #define PL_preambled (PL_parser->preambled)
68 #define PL_linestr (PL_parser->linestr)
69 #define PL_expect (PL_parser->expect)
70 #define PL_copline (PL_parser->copline)
71 #define PL_bufptr (PL_parser->bufptr)
72 #define PL_oldbufptr (PL_parser->oldbufptr)
73 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
74 #define PL_linestart (PL_parser->linestart)
75 #define PL_bufend (PL_parser->bufend)
76 #define PL_last_uni (PL_parser->last_uni)
77 #define PL_last_lop (PL_parser->last_lop)
78 #define PL_last_lop_op (PL_parser->last_lop_op)
79 #define PL_lex_state (PL_parser->lex_state)
80 #define PL_rsfp (PL_parser->rsfp)
81 #define PL_rsfp_filters (PL_parser->rsfp_filters)
82 #define PL_in_my (PL_parser->in_my)
83 #define PL_in_my_stash (PL_parser->in_my_stash)
84 #define PL_tokenbuf (PL_parser->tokenbuf)
85 #define PL_multi_end (PL_parser->multi_end)
86 #define PL_error_count (PL_parser->error_count)
88 # define PL_nexttoke (PL_parser->nexttoke)
89 # define PL_nexttype (PL_parser->nexttype)
90 # define PL_nextval (PL_parser->nextval)
93 #define SvEVALED(sv) \
94 (SvTYPE(sv) >= SVt_PVNV \
95 && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen)
97 static const char* const ident_too_long = "Identifier too long";
99 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
101 #define XENUMMASK 0x3f
102 #define XFAKEEOF 0x40
103 #define XFAKEBRACK 0x80
105 #ifdef USE_UTF8_SCRIPTS
106 # define UTF cBOOL(!IN_BYTES)
108 # define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
111 /* The maximum number of characters preceding the unrecognized one to display */
112 #define UNRECOGNIZED_PRECEDE_COUNT 10
114 /* In variables named $^X, these are the legal values for X.
115 * 1999-02-27 mjd-perl-patch@plover.com */
116 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
118 #define SPACE_OR_TAB(c) isBLANK_A(c)
120 #define HEXFP_PEEK(s) \
122 (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
123 isALPHA_FOLD_EQ(s[0], 'p'))
125 /* LEX_* are values for PL_lex_state, the state of the lexer.
126 * They are arranged oddly so that the guard on the switch statement
127 * can get by with a single comparison (if the compiler is smart enough).
129 * These values refer to the various states within a sublex parse,
130 * i.e. within a double quotish string
133 /* #define LEX_NOTPARSING 11 is done in perl.h. */
135 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
136 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
137 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
138 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
139 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
141 /* at end of code, eg "$x" followed by: */
142 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
143 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
145 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
146 string or after \E, $foo, etc */
147 #define LEX_INTERPCONST 2 /* NOT USED */
148 #define LEX_FORMLINE 1 /* expecting a format line */
152 static const char* const lex_state_names[] = {
167 #include "keywords.h"
169 /* CLINE is a macro that ensures PL_copline has a sane value */
171 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
174 * Convenience functions to return different tokens and prime the
175 * lexer for the next token. They all take an argument.
177 * TOKEN : generic token (used for '(', DOLSHARP, etc)
178 * OPERATOR : generic operator
179 * AOPERATOR : assignment operator
180 * PREBLOCK : beginning the block after an if, while, foreach, ...
181 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
182 * PREREF : *EXPR where EXPR is not a simple identifier
183 * TERM : expression term
184 * POSTDEREF : postfix dereference (->$* ->@[...] etc.)
185 * LOOPX : loop exiting command (goto, last, dump, etc)
186 * FTST : file test operator
187 * FUN0 : zero-argument function
188 * FUN0OP : zero-argument function, with its op created in this file
189 * FUN1 : not used, except for not, which isn't a UNIOP
190 * BOop : bitwise or or xor
192 * BCop : bitwise complement
193 * SHop : shift operator
194 * PWop : power operator
195 * PMop : pattern-matching operator
196 * Aop : addition-level operator
197 * AopNOASSIGN : addition-level operator that is never part of .=
198 * Mop : multiplication-level operator
199 * Eop : equality-testing operator
200 * Rop : relational operator <= != gt
202 * Also see LOP and lop() below.
205 #ifdef DEBUGGING /* Serve -DT. */
206 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
208 # define REPORT(retval) (retval)
211 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
212 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
213 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
214 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
215 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
216 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
217 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
218 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
219 #define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \
221 PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
223 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
224 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
225 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
226 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
227 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
228 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
229 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
231 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
232 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
233 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
234 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
235 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
236 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
237 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
238 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
240 /* This bit of chicanery makes a unary function followed by
241 * a parenthesis into a function with one argument, highest precedence.
242 * The UNIDOR macro is for unary functions that can be followed by the //
243 * operator (such as C<shift // 0>).
245 #define UNI3(f,x,have_x) { \
246 pl_yylval.ival = f; \
247 if (have_x) PL_expect = x; \
249 PL_last_uni = PL_oldbufptr; \
250 PL_last_lop_op = (f) < 0 ? -(f) : (f); \
252 return REPORT( (int)FUNC1 ); \
254 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
256 #define UNI(f) UNI3(f,XTERM,1)
257 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
258 #define UNIPROTO(f,optional) { \
259 if (optional) PL_last_uni = PL_oldbufptr; \
263 #define UNIBRACK(f) UNI3(f,0,0)
265 /* grandfather return to old style */
268 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
269 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
270 pl_yylval.ival = (f); \
276 #define COPLINE_INC_WITH_HERELINES \
278 CopLINE_inc(PL_curcop); \
279 if (PL_parser->herelines) \
280 CopLINE(PL_curcop) += PL_parser->herelines, \
281 PL_parser->herelines = 0; \
283 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
284 * is no sublex_push to follow. */
285 #define COPLINE_SET_FROM_MULTI_END \
287 CopLINE_set(PL_curcop, PL_multi_end); \
288 if (PL_multi_end != PL_multi_start) \
289 PL_parser->herelines = 0; \
295 /* how to interpret the pl_yylval associated with the token */
299 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
304 static struct debug_tokens {
306 enum token_type type;
308 } const debug_tokens[] =
310 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
311 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
312 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
313 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
314 { ANON_SIGSUB, TOKENTYPE_IVAL, "ANON_SIGSUB" },
315 { ARROW, TOKENTYPE_NONE, "ARROW" },
316 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
317 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
318 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
319 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
320 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
321 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
322 { DO, TOKENTYPE_NONE, "DO" },
323 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
324 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
325 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
326 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
327 { ELSE, TOKENTYPE_NONE, "ELSE" },
328 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
329 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
330 { FOR, TOKENTYPE_IVAL, "FOR" },
331 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
332 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
333 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
334 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
335 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
336 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
337 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
338 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
339 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
340 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
341 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
342 { IF, TOKENTYPE_IVAL, "IF" },
343 { LABEL, TOKENTYPE_PVAL, "LABEL" },
344 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
345 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
346 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
347 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
348 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
349 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
350 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
351 { MY, TOKENTYPE_IVAL, "MY" },
352 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
353 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
354 { OROP, TOKENTYPE_IVAL, "OROP" },
355 { OROR, TOKENTYPE_NONE, "OROR" },
356 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
357 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
358 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
359 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
360 { POSTJOIN, TOKENTYPE_NONE, "POSTJOIN" },
361 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
362 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
363 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
364 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
365 { PREINC, TOKENTYPE_NONE, "PREINC" },
366 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
367 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
368 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
369 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
370 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
371 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
372 { SIGSUB, TOKENTYPE_NONE, "SIGSUB" },
373 { SUB, TOKENTYPE_NONE, "SUB" },
374 { THING, TOKENTYPE_OPVAL, "THING" },
375 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
376 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
377 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
378 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
379 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
380 { USE, TOKENTYPE_IVAL, "USE" },
381 { WHEN, TOKENTYPE_IVAL, "WHEN" },
382 { WHILE, TOKENTYPE_IVAL, "WHILE" },
383 { BAREWORD, TOKENTYPE_OPVAL, "BAREWORD" },
384 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
385 { 0, TOKENTYPE_NONE, NULL }
388 /* dump the returned token in rv, plus any optional arg in pl_yylval */
391 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
393 PERL_ARGS_ASSERT_TOKEREPORT;
396 const char *name = NULL;
397 enum token_type type = TOKENTYPE_NONE;
398 const struct debug_tokens *p;
399 SV* const report = newSVpvs("<== ");
401 for (p = debug_tokens; p->token; p++) {
402 if (p->token == (int)rv) {
409 Perl_sv_catpv(aTHX_ report, name);
410 else if (isGRAPH(rv))
412 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
414 sv_catpvs(report, " (pending identifier)");
417 sv_catpvs(report, "EOF");
419 Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
424 Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
426 case TOKENTYPE_OPNUM:
427 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
428 PL_op_name[lvalp->ival]);
431 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
433 case TOKENTYPE_OPVAL:
435 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
436 PL_op_name[lvalp->opval->op_type]);
437 if (lvalp->opval->op_type == OP_CONST) {
438 Perl_sv_catpvf(aTHX_ report, " %s",
439 SvPEEK(cSVOPx_sv(lvalp->opval)));
444 sv_catpvs(report, "(opval=null)");
447 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
453 /* print the buffer with suitable escapes */
456 S_printbuf(pTHX_ const char *const fmt, const char *const s)
458 SV* const tmp = newSVpvs("");
460 PERL_ARGS_ASSERT_PRINTBUF;
462 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */
463 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
464 GCC_DIAG_RESTORE_STMT;
473 * This subroutine looks for an '=' next to the operator that has just been
474 * parsed and turns it into an ASSIGNOP if it finds one.
478 S_ao(pTHX_ int toketype)
480 if (*PL_bufptr == '=') {
482 if (toketype == ANDAND)
483 pl_yylval.ival = OP_ANDASSIGN;
484 else if (toketype == OROR)
485 pl_yylval.ival = OP_ORASSIGN;
486 else if (toketype == DORDOR)
487 pl_yylval.ival = OP_DORASSIGN;
490 return REPORT(toketype);
495 * When Perl expects an operator and finds something else, no_op
496 * prints the warning. It always prints "<something> found where
497 * operator expected. It prints "Missing semicolon on previous line?"
498 * if the surprise occurs at the start of the line. "do you need to
499 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
500 * where the compiler doesn't know if foo is a method call or a function.
501 * It prints "Missing operator before end of line" if there's nothing
502 * after the missing operator, or "... before <...>" if there is something
503 * after the missing operator.
505 * PL_bufptr is expected to point to the start of the thing that was found,
506 * and s after the next token or partial token.
510 S_no_op(pTHX_ const char *const what, char *s)
512 char * const oldbp = PL_bufptr;
513 const bool is_first = (PL_oldbufptr == PL_linestart);
515 PERL_ARGS_ASSERT_NO_OP;
521 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
522 if (ckWARN_d(WARN_SYNTAX)) {
524 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
525 "\t(Missing semicolon on previous line?)\n");
526 else if (PL_oldoldbufptr && isIDFIRST_lazy_if_safe(PL_oldoldbufptr,
531 for (t = PL_oldoldbufptr;
532 (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
533 t += UTF ? UTF8SKIP(t) : 1)
537 if (t < PL_bufptr && isSPACE(*t))
538 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
539 "\t(Do you need to predeclare %" UTF8f "?)\n",
540 UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
544 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
545 "\t(Missing operator before %" UTF8f "?)\n",
546 UTF8fARG(UTF, s - oldbp, oldbp));
554 * Complain about missing quote/regexp/heredoc terminator.
555 * If it's called with NULL then it cauterizes the line buffer.
556 * If we're in a delimited string and the delimiter is a control
557 * character, it's reformatted into a two-char sequence like ^C.
562 S_missingterm(pTHX_ char *s, STRLEN len)
564 char tmpbuf[UTF8_MAXBYTES + 1];
569 char * const nl = (char *) my_memrchr(s, '\n', len);
576 else if (PL_multi_close < 32) {
578 tmpbuf[1] = (char)toCTRL(PL_multi_close);
584 if (LIKELY(PL_multi_close < 256)) {
585 *tmpbuf = (char)PL_multi_close;
590 char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close);
597 q = memchr(s, '"', len) ? '\'' : '"';
598 sv = sv_2mortal(newSVpvn(s, len));
601 Perl_croak(aTHX_ "Can't find string terminator %c%" SVf "%c"
602 " anywhere before EOF", q, SVfARG(sv), q);
608 * Check whether the named feature is enabled.
611 Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
613 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
615 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
617 assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
619 if (namelen > MAX_FEATURE_LEN)
621 memcpy(&he_name[8], name, namelen);
623 return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
624 REFCOUNTED_HE_EXISTS));
628 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
629 * utf16-to-utf8-reversed.
632 #ifdef PERL_CR_FILTER
636 const char *s = SvPVX_const(sv);
637 const char * const e = s + SvCUR(sv);
639 PERL_ARGS_ASSERT_STRIP_RETURN;
641 /* outer loop optimized to do nothing if there are no CR-LFs */
643 if (*s++ == '\r' && *s == '\n') {
644 /* hit a CR-LF, need to copy the rest */
648 if (*s == '\r' && s[1] == '\n')
659 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
661 const I32 count = FILTER_READ(idx+1, sv, maxlen);
662 if (count > 0 && !maxlen)
669 =for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
671 Creates and initialises a new lexer/parser state object, supplying
672 a context in which to lex and parse from a new source of Perl code.
673 A pointer to the new state object is placed in L</PL_parser>. An entry
674 is made on the save stack so that upon unwinding, the new state object
675 will be destroyed and the former value of L</PL_parser> will be restored.
676 Nothing else need be done to clean up the parsing context.
678 The code to be parsed comes from C<line> and C<rsfp>. C<line>, if
679 non-null, provides a string (in SV form) containing code to be parsed.
680 A copy of the string is made, so subsequent modification of C<line>
681 does not affect parsing. C<rsfp>, if non-null, provides an input stream
682 from which code will be read to be parsed. If both are non-null, the
683 code in C<line> comes first and must consist of complete lines of input,
684 and C<rsfp> supplies the remainder of the source.
686 The C<flags> parameter is reserved for future use. Currently it is only
687 used by perl internally, so extensions should always pass zero.
692 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
693 can share filters with the current parser.
694 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
695 caller, hence isn't owned by the parser, so shouldn't be closed on parser
696 destruction. This is used to handle the case of defaulting to reading the
697 script from the standard input because no filename was given on the command
698 line (without getting confused by situation where STDIN has been closed, so
699 the script handle is opened on fd 0) */
702 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
704 const char *s = NULL;
705 yy_parser *parser, *oparser;
707 if (flags && flags & ~LEX_START_FLAGS)
708 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
710 /* create and initialise a parser */
712 Newxz(parser, 1, yy_parser);
713 parser->old_parser = oparser = PL_parser;
716 parser->stack = NULL;
717 parser->stack_max1 = NULL;
720 /* on scope exit, free this parser and restore any outer one */
722 parser->saved_curcop = PL_curcop;
724 /* initialise lexer state */
726 parser->nexttoke = 0;
727 parser->error_count = oparser ? oparser->error_count : 0;
728 parser->copline = parser->preambling = NOLINE;
729 parser->lex_state = LEX_NORMAL;
730 parser->expect = XSTATE;
732 parser->recheck_utf8_validity = FALSE;
733 parser->rsfp_filters =
734 !(flags & LEX_START_SAME_FILTER) || !oparser
736 : MUTABLE_AV(SvREFCNT_inc(
737 oparser->rsfp_filters
738 ? oparser->rsfp_filters
739 : (oparser->rsfp_filters = newAV())
742 Newx(parser->lex_brackstack, 120, char);
743 Newx(parser->lex_casestack, 12, char);
744 *parser->lex_casestack = '\0';
745 Newxz(parser->lex_shared, 1, LEXSHARED);
749 const U8* first_bad_char_loc;
751 s = SvPV_const(line, len);
754 && UNLIKELY(! is_utf8_string_loc((U8 *) s,
756 &first_bad_char_loc)))
758 _force_out_malformed_utf8_message(first_bad_char_loc,
759 (U8 *) s + SvCUR(line),
761 1 /* 1 means die */ );
762 NOT_REACHED; /* NOTREACHED */
765 parser->linestr = flags & LEX_START_COPIED
766 ? SvREFCNT_inc_simple_NN(line)
767 : newSVpvn_flags(s, len, SvUTF8(line));
769 sv_catpvs(parser->linestr, "\n;");
771 parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
774 parser->oldoldbufptr =
777 parser->linestart = SvPVX(parser->linestr);
778 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
779 parser->last_lop = parser->last_uni = NULL;
781 STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
782 |LEX_DONT_CLOSE_RSFP));
783 parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
784 |LEX_DONT_CLOSE_RSFP));
786 parser->in_pod = parser->filtered = 0;
790 /* delete a parser object */
793 Perl_parser_free(pTHX_ const yy_parser *parser)
795 PERL_ARGS_ASSERT_PARSER_FREE;
797 PL_curcop = parser->saved_curcop;
798 SvREFCNT_dec(parser->linestr);
800 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
801 PerlIO_clearerr(parser->rsfp);
802 else if (parser->rsfp && (!parser->old_parser
803 || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
804 PerlIO_close(parser->rsfp);
805 SvREFCNT_dec(parser->rsfp_filters);
806 SvREFCNT_dec(parser->lex_stuff);
807 SvREFCNT_dec(parser->lex_sub_repl);
809 Safefree(parser->lex_brackstack);
810 Safefree(parser->lex_casestack);
811 Safefree(parser->lex_shared);
812 PL_parser = parser->old_parser;
817 Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab)
819 I32 nexttoke = parser->nexttoke;
820 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
822 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
823 && parser->nextval[nexttoke].opval
824 && parser->nextval[nexttoke].opval->op_slabbed
825 && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
826 op_free(parser->nextval[nexttoke].opval);
827 parser->nextval[nexttoke].opval = NULL;
834 =for apidoc AmxU|SV *|PL_parser-E<gt>linestr
836 Buffer scalar containing the chunk currently under consideration of the
837 text currently being lexed. This is always a plain string scalar (for
838 which C<SvPOK> is true). It is not intended to be used as a scalar by
839 normal scalar means; instead refer to the buffer directly by the pointer
840 variables described below.
842 The lexer maintains various C<char*> pointers to things in the
843 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
844 reallocated, all of these pointers must be updated. Don't attempt to
845 do this manually, but rather use L</lex_grow_linestr> if you need to
846 reallocate the buffer.
848 The content of the text chunk in the buffer is commonly exactly one
849 complete line of input, up to and including a newline terminator,
850 but there are situations where it is otherwise. The octets of the
851 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
852 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
853 flag on this scalar, which may disagree with it.
855 For direct examination of the buffer, the variable
856 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
857 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
858 of these pointers is usually preferable to examination of the scalar
859 through normal scalar means.
861 =for apidoc AmxU|char *|PL_parser-E<gt>bufend
863 Direct pointer to the end of the chunk of text currently being lexed, the
864 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
865 + SvCUR(PL_parser-E<gt>linestr)>. A C<NUL> character (zero octet) is
866 always located at the end of the buffer, and does not count as part of
867 the buffer's contents.
869 =for apidoc AmxU|char *|PL_parser-E<gt>bufptr
871 Points to the current position of lexing inside the lexer buffer.
872 Characters around this point may be freely examined, within
873 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
874 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
875 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
877 Lexing code (whether in the Perl core or not) moves this pointer past
878 the characters that it consumes. It is also expected to perform some
879 bookkeeping whenever a newline character is consumed. This movement
880 can be more conveniently performed by the function L</lex_read_to>,
881 which handles newlines appropriately.
883 Interpretation of the buffer's octets can be abstracted out by
884 using the slightly higher-level functions L</lex_peek_unichar> and
885 L</lex_read_unichar>.
887 =for apidoc AmxU|char *|PL_parser-E<gt>linestart
889 Points to the start of the current line inside the lexer buffer.
890 This is useful for indicating at which column an error occurred, and
891 not much else. This must be updated by any lexing code that consumes
892 a newline; the function L</lex_read_to> handles this detail.
898 =for apidoc Amx|bool|lex_bufutf8
900 Indicates whether the octets in the lexer buffer
901 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
902 of Unicode characters. If not, they should be interpreted as Latin-1
903 characters. This is analogous to the C<SvUTF8> flag for scalars.
905 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
906 contains valid UTF-8. Lexing code must be robust in the face of invalid
909 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
910 is significant, but not the whole story regarding the input character
911 encoding. Normally, when a file is being read, the scalar contains octets
912 and its C<SvUTF8> flag is off, but the octets should be interpreted as
913 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
914 however, the scalar may have the C<SvUTF8> flag on, and in this case its
915 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
916 is in effect. This logic may change in the future; use this function
917 instead of implementing the logic yourself.
923 Perl_lex_bufutf8(pTHX)
929 =for apidoc Amx|char *|lex_grow_linestr|STRLEN len
931 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
932 at least C<len> octets (including terminating C<NUL>). Returns a
933 pointer to the reallocated buffer. This is necessary before making
934 any direct modification of the buffer that would increase its length.
935 L</lex_stuff_pvn> provides a more convenient way to insert text into
938 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
939 this function updates all of the lexer's variables that point directly
946 Perl_lex_grow_linestr(pTHX_ STRLEN len)
950 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
951 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
954 linestr = PL_parser->linestr;
955 buf = SvPVX(linestr);
956 if (len <= SvLEN(linestr))
959 /* Is the lex_shared linestr SV the same as the current linestr SV?
960 * Only in this case does re_eval_start need adjusting, since it
961 * points within lex_shared->ls_linestr's buffer */
962 current = ( !PL_parser->lex_shared->ls_linestr
963 || linestr == PL_parser->lex_shared->ls_linestr);
965 bufend_pos = PL_parser->bufend - buf;
966 bufptr_pos = PL_parser->bufptr - buf;
967 oldbufptr_pos = PL_parser->oldbufptr - buf;
968 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
969 linestart_pos = PL_parser->linestart - buf;
970 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
971 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
972 re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ?
973 PL_parser->lex_shared->re_eval_start - buf : 0;
975 buf = sv_grow(linestr, len);
977 PL_parser->bufend = buf + bufend_pos;
978 PL_parser->bufptr = buf + bufptr_pos;
979 PL_parser->oldbufptr = buf + oldbufptr_pos;
980 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
981 PL_parser->linestart = buf + linestart_pos;
982 if (PL_parser->last_uni)
983 PL_parser->last_uni = buf + last_uni_pos;
984 if (PL_parser->last_lop)
985 PL_parser->last_lop = buf + last_lop_pos;
986 if (current && PL_parser->lex_shared->re_eval_start)
987 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
992 =for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
994 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
995 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
996 reallocating the buffer if necessary. This means that lexing code that
997 runs later will see the characters as if they had appeared in the input.
998 It is not recommended to do this as part of normal parsing, and most
999 uses of this facility run the risk of the inserted characters being
1000 interpreted in an unintended manner.
1002 The string to be inserted is represented by C<len> octets starting
1003 at C<pv>. These octets are interpreted as either UTF-8 or Latin-1,
1004 according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
1005 The characters are recoded for the lexer buffer, according to how the
1006 buffer is currently being interpreted (L</lex_bufutf8>). If a string
1007 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1008 function is more convenient.
1014 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1018 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1019 if (flags & ~(LEX_STUFF_UTF8))
1020 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1022 if (flags & LEX_STUFF_UTF8) {
1025 STRLEN highhalf = 0; /* Count of variants */
1026 const char *p, *e = pv+len;
1027 for (p = pv; p != e; p++) {
1028 if (! UTF8_IS_INVARIANT(*p)) {
1034 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1035 bufptr = PL_parser->bufptr;
1036 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1037 SvCUR_set(PL_parser->linestr,
1038 SvCUR(PL_parser->linestr) + len+highhalf);
1039 PL_parser->bufend += len+highhalf;
1040 for (p = pv; p != e; p++) {
1041 append_utf8_from_native_byte(*p, (U8 **) &bufptr);
1045 if (flags & LEX_STUFF_UTF8) {
1046 STRLEN highhalf = 0;
1047 const char *p, *e = pv+len;
1048 for (p = pv; p != e; p++) {
1050 if (UTF8_IS_ABOVE_LATIN1(c)) {
1051 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1052 "non-Latin-1 character into Latin-1 input");
1053 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1056 } else assert(UTF8_IS_INVARIANT(c));
1060 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1061 bufptr = PL_parser->bufptr;
1062 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1063 SvCUR_set(PL_parser->linestr,
1064 SvCUR(PL_parser->linestr) + len-highhalf);
1065 PL_parser->bufend += len-highhalf;
1068 if (UTF8_IS_INVARIANT(*p)) {
1074 *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
1080 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1081 bufptr = PL_parser->bufptr;
1082 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1083 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1084 PL_parser->bufend += len;
1085 Copy(pv, bufptr, len, char);
1091 =for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1093 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1094 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1095 reallocating the buffer if necessary. This means that lexing code that
1096 runs later will see the characters as if they had appeared in the input.
1097 It is not recommended to do this as part of normal parsing, and most
1098 uses of this facility run the risk of the inserted characters being
1099 interpreted in an unintended manner.
1101 The string to be inserted is represented by octets starting at C<pv>
1102 and continuing to the first nul. These octets are interpreted as either
1103 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1104 in C<flags>. The characters are recoded for the lexer buffer, according
1105 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1106 If it is not convenient to nul-terminate a string to be inserted, the
1107 L</lex_stuff_pvn> function is more appropriate.
1113 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1115 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1116 lex_stuff_pvn(pv, strlen(pv), flags);
1120 =for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1122 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1123 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1124 reallocating the buffer if necessary. This means that lexing code that
1125 runs later will see the characters as if they had appeared in the input.
1126 It is not recommended to do this as part of normal parsing, and most
1127 uses of this facility run the risk of the inserted characters being
1128 interpreted in an unintended manner.
1130 The string to be inserted is the string value of C<sv>. The characters
1131 are recoded for the lexer buffer, according to how the buffer is currently
1132 being interpreted (L</lex_bufutf8>). If a string to be inserted is
1133 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1134 need to construct a scalar.
1140 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1144 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1146 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1148 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1152 =for apidoc Amx|void|lex_unstuff|char *ptr
1154 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1155 C<ptr>. Text following C<ptr> will be moved, and the buffer shortened.
1156 This hides the discarded text from any lexing code that runs later,
1157 as if the text had never appeared.
1159 This is not the normal way to consume lexed text. For that, use
1166 Perl_lex_unstuff(pTHX_ char *ptr)
1170 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1171 buf = PL_parser->bufptr;
1173 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1176 bufend = PL_parser->bufend;
1178 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1179 unstuff_len = ptr - buf;
1180 Move(ptr, buf, bufend+1-ptr, char);
1181 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1182 PL_parser->bufend = bufend - unstuff_len;
1186 =for apidoc Amx|void|lex_read_to|char *ptr
1188 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1189 to C<ptr>. This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
1190 performing the correct bookkeeping whenever a newline character is passed.
1191 This is the normal way to consume lexed text.
1193 Interpretation of the buffer's octets can be abstracted out by
1194 using the slightly higher-level functions L</lex_peek_unichar> and
1195 L</lex_read_unichar>.
1201 Perl_lex_read_to(pTHX_ char *ptr)
1204 PERL_ARGS_ASSERT_LEX_READ_TO;
1205 s = PL_parser->bufptr;
1206 if (ptr < s || ptr > PL_parser->bufend)
1207 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1208 for (; s != ptr; s++)
1210 COPLINE_INC_WITH_HERELINES;
1211 PL_parser->linestart = s+1;
1213 PL_parser->bufptr = ptr;
1217 =for apidoc Amx|void|lex_discard_to|char *ptr
1219 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1220 up to C<ptr>. The remaining content of the buffer will be moved, and
1221 all pointers into the buffer updated appropriately. C<ptr> must not
1222 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1223 it is not permitted to discard text that has yet to be lexed.
1225 Normally it is not necessarily to do this directly, because it suffices to
1226 use the implicit discarding behaviour of L</lex_next_chunk> and things
1227 based on it. However, if a token stretches across multiple lines,
1228 and the lexing code has kept multiple lines of text in the buffer for
1229 that purpose, then after completion of the token it would be wise to
1230 explicitly discard the now-unneeded earlier lines, to avoid future
1231 multi-line tokens growing the buffer without bound.
1237 Perl_lex_discard_to(pTHX_ char *ptr)
1241 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1242 buf = SvPVX(PL_parser->linestr);
1244 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1247 if (ptr > PL_parser->bufptr)
1248 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1249 discard_len = ptr - buf;
1250 if (PL_parser->oldbufptr < ptr)
1251 PL_parser->oldbufptr = ptr;
1252 if (PL_parser->oldoldbufptr < ptr)
1253 PL_parser->oldoldbufptr = ptr;
1254 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1255 PL_parser->last_uni = NULL;
1256 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1257 PL_parser->last_lop = NULL;
1258 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1259 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1260 PL_parser->bufend -= discard_len;
1261 PL_parser->bufptr -= discard_len;
1262 PL_parser->oldbufptr -= discard_len;
1263 PL_parser->oldoldbufptr -= discard_len;
1264 if (PL_parser->last_uni)
1265 PL_parser->last_uni -= discard_len;
1266 if (PL_parser->last_lop)
1267 PL_parser->last_lop -= discard_len;
1271 Perl_notify_parser_that_changed_to_utf8(pTHX)
1273 /* Called when $^H is changed to indicate that HINT_UTF8 has changed from
1274 * off to on. At compile time, this has the effect of entering a 'use
1275 * utf8' section. This means that any input was not previously checked for
1276 * UTF-8 (because it was off), but now we do need to check it, or our
1277 * assumptions about the input being sane could be wrong, and we could
1278 * segfault. This routine just sets a flag so that the next time we look
1279 * at the input we do the well-formed UTF-8 check. If we aren't in the
1280 * proper phase, there may not be a parser object, but if there is, setting
1281 * the flag is harmless */
1284 PL_parser->recheck_utf8_validity = TRUE;
1289 =for apidoc Amx|bool|lex_next_chunk|U32 flags
1291 Reads in the next chunk of text to be lexed, appending it to
1292 L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1293 looked to the end of the current chunk and wants to know more. It is
1294 usual, but not necessary, for lexing to have consumed the entirety of
1295 the current chunk at this time.
1297 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1298 chunk (i.e., the current chunk has been entirely consumed), normally the
1299 current chunk will be discarded at the same time that the new chunk is
1300 read in. If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
1301 will not be discarded. If the current chunk has not been entirely
1302 consumed, then it will not be discarded regardless of the flag.
1304 Returns true if some new text was added to the buffer, or false if the
1305 buffer has reached the end of the input text.
1310 #define LEX_FAKE_EOF 0x80000000
1311 #define LEX_NO_TERM 0x40000000 /* here-doc */
1314 Perl_lex_next_chunk(pTHX_ U32 flags)
1318 STRLEN old_bufend_pos, new_bufend_pos;
1319 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1320 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1321 bool got_some_for_debugger = 0;
1324 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1325 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1326 if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1328 linestr = PL_parser->linestr;
1329 buf = SvPVX(linestr);
1330 if (!(flags & LEX_KEEP_PREVIOUS)
1331 && PL_parser->bufptr == PL_parser->bufend)
1333 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1335 if (PL_parser->last_uni != PL_parser->bufend)
1336 PL_parser->last_uni = NULL;
1337 if (PL_parser->last_lop != PL_parser->bufend)
1338 PL_parser->last_lop = NULL;
1339 last_uni_pos = last_lop_pos = 0;
1343 old_bufend_pos = PL_parser->bufend - buf;
1344 bufptr_pos = PL_parser->bufptr - buf;
1345 oldbufptr_pos = PL_parser->oldbufptr - buf;
1346 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1347 linestart_pos = PL_parser->linestart - buf;
1348 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1349 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1351 if (flags & LEX_FAKE_EOF) {
1353 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1355 } else if (filter_gets(linestr, old_bufend_pos)) {
1357 got_some_for_debugger = 1;
1358 } else if (flags & LEX_NO_TERM) {
1361 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1364 /* End of real input. Close filehandle (unless it was STDIN),
1365 * then add implicit termination.
1367 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1368 PerlIO_clearerr(PL_parser->rsfp);
1369 else if (PL_parser->rsfp)
1370 (void)PerlIO_close(PL_parser->rsfp);
1371 PL_parser->rsfp = NULL;
1372 PL_parser->in_pod = PL_parser->filtered = 0;
1373 if (!PL_in_eval && PL_minus_p) {
1375 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1376 PL_minus_n = PL_minus_p = 0;
1377 } else if (!PL_in_eval && PL_minus_n) {
1378 sv_catpvs(linestr, /*{*/";}");
1381 sv_catpvs(linestr, ";");
1384 buf = SvPVX(linestr);
1385 new_bufend_pos = SvCUR(linestr);
1386 PL_parser->bufend = buf + new_bufend_pos;
1387 PL_parser->bufptr = buf + bufptr_pos;
1390 const U8* first_bad_char_loc;
1391 if (UNLIKELY(! is_utf8_string_loc(
1392 (U8 *) PL_parser->bufptr,
1393 PL_parser->bufend - PL_parser->bufptr,
1394 &first_bad_char_loc)))
1396 _force_out_malformed_utf8_message(first_bad_char_loc,
1397 (U8 *) PL_parser->bufend,
1399 1 /* 1 means die */ );
1400 NOT_REACHED; /* NOTREACHED */
1404 PL_parser->oldbufptr = buf + oldbufptr_pos;
1405 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1406 PL_parser->linestart = buf + linestart_pos;
1407 if (PL_parser->last_uni)
1408 PL_parser->last_uni = buf + last_uni_pos;
1409 if (PL_parser->last_lop)
1410 PL_parser->last_lop = buf + last_lop_pos;
1411 if (PL_parser->preambling != NOLINE) {
1412 CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1413 PL_parser->preambling = NOLINE;
1415 if ( got_some_for_debugger
1416 && PERLDB_LINE_OR_SAVESRC
1417 && PL_curstash != PL_debstash)
1419 /* debugger active and we're not compiling the debugger code,
1420 * so store the line into the debugger's array of lines
1422 update_debugger_info(NULL, buf+old_bufend_pos,
1423 new_bufend_pos-old_bufend_pos);
1429 =for apidoc Amx|I32|lex_peek_unichar|U32 flags
1431 Looks ahead one (Unicode) character in the text currently being lexed.
1432 Returns the codepoint (unsigned integer value) of the next character,
1433 or -1 if lexing has reached the end of the input text. To consume the
1434 peeked character, use L</lex_read_unichar>.
1436 If the next character is in (or extends into) the next chunk of input
1437 text, the next chunk will be read in. Normally the current chunk will be
1438 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1439 bit set, then the current chunk will not be discarded.
1441 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1442 is encountered, an exception is generated.
1448 Perl_lex_peek_unichar(pTHX_ U32 flags)
1452 if (flags & ~(LEX_KEEP_PREVIOUS))
1453 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1454 s = PL_parser->bufptr;
1455 bufend = PL_parser->bufend;
1461 if (!lex_next_chunk(flags))
1463 s = PL_parser->bufptr;
1464 bufend = PL_parser->bufend;
1467 if (UTF8_IS_INVARIANT(head))
1469 if (UTF8_IS_START(head)) {
1470 len = UTF8SKIP(&head);
1471 while ((STRLEN)(bufend-s) < len) {
1472 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1474 s = PL_parser->bufptr;
1475 bufend = PL_parser->bufend;
1478 unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1479 if (retlen == (STRLEN)-1) {
1480 _force_out_malformed_utf8_message((U8 *) s,
1483 1 /* 1 means die */ );
1484 NOT_REACHED; /* NOTREACHED */
1489 if (!lex_next_chunk(flags))
1491 s = PL_parser->bufptr;
1498 =for apidoc Amx|I32|lex_read_unichar|U32 flags
1500 Reads the next (Unicode) character in the text currently being lexed.
1501 Returns the codepoint (unsigned integer value) of the character read,
1502 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1503 if lexing has reached the end of the input text. To non-destructively
1504 examine the next character, use L</lex_peek_unichar> instead.
1506 If the next character is in (or extends into) the next chunk of input
1507 text, the next chunk will be read in. Normally the current chunk will be
1508 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1509 bit set, then the current chunk will not be discarded.
1511 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1512 is encountered, an exception is generated.
1518 Perl_lex_read_unichar(pTHX_ U32 flags)
1521 if (flags & ~(LEX_KEEP_PREVIOUS))
1522 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1523 c = lex_peek_unichar(flags);
1526 COPLINE_INC_WITH_HERELINES;
1528 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1530 ++(PL_parser->bufptr);
1536 =for apidoc Amx|void|lex_read_space|U32 flags
1538 Reads optional spaces, in Perl style, in the text currently being
1539 lexed. The spaces may include ordinary whitespace characters and
1540 Perl-style comments. C<#line> directives are processed if encountered.
1541 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1542 at a non-space character (or the end of the input text).
1544 If spaces extend into the next chunk of input text, the next chunk will
1545 be read in. Normally the current chunk will be discarded at the same
1546 time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
1547 chunk will not be discarded.
1552 #define LEX_NO_INCLINE 0x40000000
1553 #define LEX_NO_NEXT_CHUNK 0x80000000
1556 Perl_lex_read_space(pTHX_ U32 flags)
1559 const bool can_incline = !(flags & LEX_NO_INCLINE);
1560 bool need_incline = 0;
1561 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1562 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1563 s = PL_parser->bufptr;
1564 bufend = PL_parser->bufend;
1570 } while (!(c == '\n' || (c == 0 && s == bufend)));
1571 } else if (c == '\n') {
1574 PL_parser->linestart = s;
1580 } else if (isSPACE(c)) {
1582 } else if (c == 0 && s == bufend) {
1585 if (flags & LEX_NO_NEXT_CHUNK)
1587 PL_parser->bufptr = s;
1588 l = CopLINE(PL_curcop);
1589 CopLINE(PL_curcop) += PL_parser->herelines + 1;
1590 got_more = lex_next_chunk(flags);
1591 CopLINE_set(PL_curcop, l);
1592 s = PL_parser->bufptr;
1593 bufend = PL_parser->bufend;
1596 if (can_incline && need_incline && PL_parser->rsfp) {
1606 PL_parser->bufptr = s;
1611 =for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1613 This function performs syntax checking on a prototype, C<proto>.
1614 If C<warn> is true, any illegal characters or mismatched brackets
1615 will trigger illegalproto warnings, declaring that they were
1616 detected in the prototype for C<name>.
1618 The return value is C<true> if this is a valid prototype, and
1619 C<false> if it is not, regardless of whether C<warn> was C<true> or
1622 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1629 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash)
1631 STRLEN len, origlen;
1633 bool bad_proto = FALSE;
1634 bool in_brackets = FALSE;
1635 bool after_slash = FALSE;
1636 char greedy_proto = ' ';
1637 bool proto_after_greedy_proto = FALSE;
1638 bool must_be_last = FALSE;
1639 bool underscore = FALSE;
1640 bool bad_proto_after_underscore = FALSE;
1642 PERL_ARGS_ASSERT_VALIDATE_PROTO;
1647 p = SvPV(proto, len);
1649 for (; len--; p++) {
1652 proto_after_greedy_proto = TRUE;
1654 if (!strchr(";@%", *p))
1655 bad_proto_after_underscore = TRUE;
1658 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1665 in_brackets = FALSE;
1666 else if ((*p == '@' || *p == '%')
1670 must_be_last = TRUE;
1679 after_slash = FALSE;
1684 SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1687 ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1688 origlen, UNI_DISPLAY_ISPRINT)
1689 : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1691 if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) {
1692 SV *name2 = sv_2mortal(newSVsv(PL_curstname));
1693 sv_catpvs(name2, "::");
1694 sv_catsv(name2, (SV *)name);
1698 if (proto_after_greedy_proto)
1699 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1700 "Prototype after '%c' for %" SVf " : %s",
1701 greedy_proto, SVfARG(name), p);
1703 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1704 "Missing ']' in prototype for %" SVf " : %s",
1707 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1708 "Illegal character in prototype for %" SVf " : %s",
1710 if (bad_proto_after_underscore)
1711 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1712 "Illegal character after '_' in prototype for %" SVf " : %s",
1716 return (! (proto_after_greedy_proto || bad_proto) );
1721 * This subroutine has nothing to do with tilting, whether at windmills
1722 * or pinball tables. Its name is short for "increment line". It
1723 * increments the current line number in CopLINE(PL_curcop) and checks
1724 * to see whether the line starts with a comment of the form
1725 * # line 500 "foo.pm"
1726 * If so, it sets the current line number and file to the values in the comment.
1730 S_incline(pTHX_ const char *s, const char *end)
1738 PERL_ARGS_ASSERT_INCLINE;
1742 COPLINE_INC_WITH_HERELINES;
1743 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1744 && s+1 == PL_bufend && *s == ';') {
1745 /* fake newline in string eval */
1746 CopLINE_dec(PL_curcop);
1751 while (SPACE_OR_TAB(*s))
1753 if (memBEGINs(s, (STRLEN) (end - s), "line"))
1754 s += sizeof("line") - 1;
1757 if (SPACE_OR_TAB(*s))
1761 while (SPACE_OR_TAB(*s))
1769 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1771 while (SPACE_OR_TAB(*s))
1773 if (*s == '"' && (t = (char *) memchr(s+1, '"', end - s))) {
1779 while (*t && !isSPACE(*t))
1783 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1785 if (*e != '\n' && *e != '\0')
1786 return; /* false alarm */
1788 if (!grok_atoUV(n, &uv, &e))
1790 line_num = ((line_t)uv) - 1;
1793 const STRLEN len = t - s;
1795 if (!PL_rsfp && !PL_parser->filtered) {
1796 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1797 * to *{"::_<newfilename"} */
1798 /* However, the long form of evals is only turned on by the
1799 debugger - usually they're "(eval %lu)" */
1800 GV * const cfgv = CopFILEGV(PL_curcop);
1803 STRLEN tmplen2 = len;
1807 if (tmplen2 + 2 <= sizeof smallbuf)
1810 Newx(tmpbuf2, tmplen2 + 2, char);
1815 memcpy(tmpbuf2 + 2, s, tmplen2);
1818 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1820 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1821 /* adjust ${"::_<newfilename"} to store the new file name */
1822 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1823 /* The line number may differ. If that is the case,
1824 alias the saved lines that are in the array.
1825 Otherwise alias the whole array. */
1826 if (CopLINE(PL_curcop) == line_num) {
1827 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1828 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1830 else if (GvAV(cfgv)) {
1831 AV * const av = GvAV(cfgv);
1832 const line_t start = CopLINE(PL_curcop)+1;
1833 SSize_t items = AvFILLp(av) - start;
1835 AV * const av2 = GvAVn(gv2);
1836 SV **svp = AvARRAY(av) + start;
1837 Size_t l = line_num+1;
1838 while (items-- && l < SSize_t_MAX && l == (line_t)l)
1839 av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++));
1844 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1847 CopFILE_free(PL_curcop);
1848 CopFILE_setn(PL_curcop, s, len);
1850 CopLINE_set(PL_curcop, line_num);
1854 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1856 AV *av = CopFILEAVx(PL_curcop);
1859 if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1861 sv = *av_fetch(av, 0, 1);
1862 SvUPGRADE(sv, SVt_PVMG);
1864 if (!SvPOK(sv)) SvPVCLEAR(sv);
1866 sv_catsv(sv, orig_sv);
1868 sv_catpvn(sv, buf, len);
1873 if (PL_parser->preambling == NOLINE)
1874 av_store(av, CopLINE(PL_curcop), sv);
1880 * Called to gobble the appropriate amount and type of whitespace.
1881 * Skips comments as well.
1882 * Returns the next character after the whitespace that is skipped.
1885 * Same thing, but look ahead without incrementing line numbers or
1886 * adjusting PL_linestart.
1889 #define skipspace(s) skipspace_flags(s, 0)
1890 #define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
1893 S_skipspace_flags(pTHX_ char *s, U32 flags)
1895 PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1896 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1897 while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
1900 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1902 lex_read_space(flags | LEX_KEEP_PREVIOUS |
1903 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
1904 LEX_NO_NEXT_CHUNK : 0));
1906 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1907 if (PL_linestart > PL_bufptr)
1908 PL_bufptr = PL_linestart;
1916 * Check the unary operators to ensure there's no ambiguity in how they're
1917 * used. An ambiguous piece of code would be:
1919 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1920 * the +5 is its argument.
1928 if (PL_oldoldbufptr != PL_last_uni)
1930 while (isSPACE(*PL_last_uni))
1933 while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
1934 s += UTF ? UTF8SKIP(s) : 1;
1935 if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s))
1938 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1939 "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
1940 UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
1944 * LOP : macro to build a list operator. Its behaviour has been replaced
1945 * with a subroutine, S_lop() for which LOP is just another name.
1948 #define LOP(f,x) return lop(f,x,s)
1952 * Build a list operator (or something that might be one). The rules:
1953 * - if we have a next token, then it's a list operator (no parens) for
1954 * which the next token has already been parsed; e.g.,
1957 * - if the next thing is an opening paren, then it's a function
1958 * - else it's a list operator
1962 S_lop(pTHX_ I32 f, U8 x, char *s)
1964 PERL_ARGS_ASSERT_LOP;
1969 PL_last_lop = PL_oldbufptr;
1970 PL_last_lop_op = (OPCODE)f;
1975 return REPORT(FUNC);
1978 return REPORT(FUNC);
1981 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1982 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1983 return REPORT(LSTOP);
1989 * When the lexer realizes it knows the next token (for instance,
1990 * it is reordering tokens for the parser) then it can call S_force_next
1991 * to know what token to return the next time the lexer is called. Caller
1992 * will need to set PL_nextval[] and possibly PL_expect to ensure
1993 * the lexer handles the token correctly.
1997 S_force_next(pTHX_ I32 type)
2001 PerlIO_printf(Perl_debug_log, "### forced token:\n");
2002 tokereport(type, &NEXTVAL_NEXTTOKE);
2005 assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
2006 PL_nexttype[PL_nexttoke] = type;
2013 * This subroutine handles postfix deref syntax after the arrow has already
2014 * been emitted. @* $* etc. are emitted as two separate tokens right here.
2015 * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2016 * only the first, leaving yylex to find the next.
2020 S_postderef(pTHX_ int const funny, char const next)
2022 assert(funny == DOLSHARP || strchr("$@%&*", funny));
2024 PL_expect = XOPERATOR;
2025 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2026 assert('@' == funny || '$' == funny || DOLSHARP == funny);
2027 PL_lex_state = LEX_INTERPEND;
2029 force_next(POSTJOIN);
2035 if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
2036 && !PL_lex_brackets)
2038 PL_expect = XOPERATOR;
2047 int yyc = PL_parser->yychar;
2048 if (yyc != YYEMPTY) {
2050 NEXTVAL_NEXTTOKE = PL_parser->yylval;
2051 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2052 PL_lex_allbrackets--;
2054 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2055 } else if (yyc == '('/*)*/) {
2056 PL_lex_allbrackets--;
2061 PL_parser->yychar = YYEMPTY;
2066 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2068 SV * const sv = newSVpvn_utf8(start, len,
2071 && is_utf8_non_invariant_string((const U8*)start, len));
2077 * When the lexer knows the next thing is a word (for instance, it has
2078 * just seen -> and it knows that the next char is a word char, then
2079 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2083 * char *start : buffer position (must be within PL_linestr)
2084 * int token : PL_next* will be this type of bare word
2085 * (e.g., METHOD,BAREWORD)
2086 * int check_keyword : if true, Perl checks to make sure the word isn't
2087 * a keyword (do this if the word is a label, e.g. goto FOO)
2088 * int allow_pack : if true, : characters will also be allowed (require,
2089 * use, etc. do this)
2093 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2098 PERL_ARGS_ASSERT_FORCE_WORD;
2100 start = skipspace(start);
2102 if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
2103 || (allow_pack && *s == ':' && s[1] == ':') )
2105 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2106 if (check_keyword) {
2107 char *s2 = PL_tokenbuf;
2109 if (allow_pack && memBEGINPs(s2, len, "CORE::")) {
2110 s2 += sizeof("CORE::") - 1;
2111 len2 -= sizeof("CORE::") - 1;
2113 if (keyword(s2, len2, 0))
2116 if (token == METHOD) {
2121 PL_expect = XOPERATOR;
2124 NEXTVAL_NEXTTOKE.opval
2125 = newSVOP(OP_CONST,0,
2126 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2127 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2135 * Called when the lexer wants $foo *foo &foo etc, but the program
2136 * text only contains the "foo" portion. The first argument is a pointer
2137 * to the "foo", and the second argument is the type symbol to prefix.
2138 * Forces the next token to be a "BAREWORD".
2139 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2143 S_force_ident(pTHX_ const char *s, int kind)
2145 PERL_ARGS_ASSERT_FORCE_IDENT;
2148 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2149 OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2150 UTF ? SVf_UTF8 : 0));
2151 NEXTVAL_NEXTTOKE.opval = o;
2152 force_next(BAREWORD);
2154 o->op_private = OPpCONST_ENTERED;
2155 /* XXX see note in pp_entereval() for why we forgo typo
2156 warnings if the symbol must be introduced in an eval.
2158 gv_fetchpvn_flags(s, len,
2159 (PL_in_eval ? GV_ADDMULTI
2160 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2161 kind == '$' ? SVt_PV :
2162 kind == '@' ? SVt_PVAV :
2163 kind == '%' ? SVt_PVHV :
2171 S_force_ident_maybe_lex(pTHX_ char pit)
2173 NEXTVAL_NEXTTOKE.ival = pit;
2178 Perl_str_to_version(pTHX_ SV *sv)
2183 const char *start = SvPV_const(sv,len);
2184 const char * const end = start + len;
2185 const bool utf = cBOOL(SvUTF8(sv));
2187 PERL_ARGS_ASSERT_STR_TO_VERSION;
2189 while (start < end) {
2193 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2198 retval += ((NV)n)/nshift;
2207 * Forces the next token to be a version number.
2208 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2209 * and if "guessing" is TRUE, then no new token is created (and the caller
2210 * must use an alternative parsing method).
2214 S_force_version(pTHX_ char *s, int guessing)
2219 PERL_ARGS_ASSERT_FORCE_VERSION;
2227 while (isDIGIT(*d) || *d == '_' || *d == '.')
2229 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2231 s = scan_num(s, &pl_yylval);
2232 version = pl_yylval.opval;
2233 ver = cSVOPx(version)->op_sv;
2234 if (SvPOK(ver) && !SvNIOK(ver)) {
2235 SvUPGRADE(ver, SVt_PVNV);
2236 SvNV_set(ver, str_to_version(ver));
2237 SvNOK_on(ver); /* hint that it is a version */
2240 else if (guessing) {
2245 /* NOTE: The parser sees the package name and the VERSION swapped */
2246 NEXTVAL_NEXTTOKE.opval = version;
2247 force_next(BAREWORD);
2253 * S_force_strict_version
2254 * Forces the next token to be a version number using strict syntax rules.
2258 S_force_strict_version(pTHX_ char *s)
2261 const char *errstr = NULL;
2263 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2265 while (isSPACE(*s)) /* leading whitespace */
2268 if (is_STRICT_VERSION(s,&errstr)) {
2270 s = (char *)scan_version(s, ver, 0);
2271 version = newSVOP(OP_CONST, 0, ver);
2273 else if ((*s != ';' && *s != '{' && *s != '}' )
2274 && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
2278 yyerror(errstr); /* version required */
2282 /* NOTE: The parser sees the package name and the VERSION swapped */
2283 NEXTVAL_NEXTTOKE.opval = version;
2284 force_next(BAREWORD);
2291 * Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv',
2292 * modified as necessary. However, if HINT_NEW_STRING is on, 'sv' is
2293 * unchanged, and a new SV containing the modified input is returned.
2297 S_tokeq(pTHX_ SV *sv)
2304 PERL_ARGS_ASSERT_TOKEQ;
2308 assert (!SvIsCOW(sv));
2309 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2313 /* This is relying on the SV being "well formed" with a trailing '\0' */
2314 while (s < send && !(*s == '\\' && s[1] == '\\'))
2319 if ( PL_hints & HINT_NEW_STRING ) {
2320 pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2321 SVs_TEMP | SvUTF8(sv));
2325 if (s + 1 < send && (s[1] == '\\'))
2326 s++; /* all that, just for this */
2331 SvCUR_set(sv, d - SvPVX_const(sv));
2333 if ( PL_hints & HINT_NEW_STRING )
2334 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
2339 * Now come three functions related to double-quote context,
2340 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2341 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2342 * interact with PL_lex_state, and create fake ( ... ) argument lists
2343 * to handle functions and concatenation.
2347 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
2352 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2354 * Pattern matching will set PL_lex_op to the pattern-matching op to
2355 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2357 * OP_CONST is easy--just make the new op and return.
2359 * Everything else becomes a FUNC.
2361 * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we
2362 * had an OP_CONST. This just sets us up for a
2363 * call to S_sublex_push().
2367 S_sublex_start(pTHX)
2369 const I32 op_type = pl_yylval.ival;
2371 if (op_type == OP_NULL) {
2372 pl_yylval.opval = PL_lex_op;
2376 if (op_type == OP_CONST) {
2377 SV *sv = PL_lex_stuff;
2378 PL_lex_stuff = NULL;
2381 if (SvTYPE(sv) == SVt_PVIV) {
2382 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2384 const char * const p = SvPV_const(sv, len);
2385 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2389 pl_yylval.opval = newSVOP(op_type, 0, sv);
2393 PL_parser->lex_super_state = PL_lex_state;
2394 PL_parser->lex_sub_inwhat = (U16)op_type;
2395 PL_parser->lex_sub_op = PL_lex_op;
2396 PL_parser->sub_no_recover = FALSE;
2397 PL_parser->sub_error_count = PL_error_count;
2398 PL_lex_state = LEX_INTERPPUSH;
2402 pl_yylval.opval = PL_lex_op;
2412 * Create a new scope to save the lexing state. The scope will be
2413 * ended in S_sublex_done. Returns a '(', starting the function arguments
2414 * to the uc, lc, etc. found before.
2415 * Sets PL_lex_state to LEX_INTERPCONCAT.
2422 const bool is_heredoc = PL_multi_close == '<';
2425 PL_lex_state = PL_parser->lex_super_state;
2426 SAVEI8(PL_lex_dojoin);
2427 SAVEI32(PL_lex_brackets);
2428 SAVEI32(PL_lex_allbrackets);
2429 SAVEI32(PL_lex_formbrack);
2430 SAVEI8(PL_lex_fakeeof);
2431 SAVEI32(PL_lex_casemods);
2432 SAVEI32(PL_lex_starts);
2433 SAVEI8(PL_lex_state);
2434 SAVESPTR(PL_lex_repl);
2435 SAVEVPTR(PL_lex_inpat);
2436 SAVEI16(PL_lex_inwhat);
2439 SAVECOPLINE(PL_curcop);
2440 SAVEI32(PL_multi_end);
2441 SAVEI32(PL_parser->herelines);
2442 PL_parser->herelines = 0;
2444 SAVEIV(PL_multi_close);
2445 SAVEPPTR(PL_bufptr);
2446 SAVEPPTR(PL_bufend);
2447 SAVEPPTR(PL_oldbufptr);
2448 SAVEPPTR(PL_oldoldbufptr);
2449 SAVEPPTR(PL_last_lop);
2450 SAVEPPTR(PL_last_uni);
2451 SAVEPPTR(PL_linestart);
2452 SAVESPTR(PL_linestr);
2453 SAVEGENERICPV(PL_lex_brackstack);
2454 SAVEGENERICPV(PL_lex_casestack);
2455 SAVEGENERICPV(PL_parser->lex_shared);
2456 SAVEBOOL(PL_parser->lex_re_reparsing);
2457 SAVEI32(PL_copline);
2459 /* The here-doc parser needs to be able to peek into outer lexing
2460 scopes to find the body of the here-doc. So we put PL_linestr and
2461 PL_bufptr into lex_shared, to ‘share’ those values.
2463 PL_parser->lex_shared->ls_linestr = PL_linestr;
2464 PL_parser->lex_shared->ls_bufptr = PL_bufptr;
2466 PL_linestr = PL_lex_stuff;
2467 PL_lex_repl = PL_parser->lex_sub_repl;
2468 PL_lex_stuff = NULL;
2469 PL_parser->lex_sub_repl = NULL;
2471 /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2472 set for an inner quote-like operator and then an error causes scope-
2473 popping. We must not have a PL_lex_stuff value left dangling, as
2474 that breaks assumptions elsewhere. See bug #123617. */
2475 SAVEGENERICSV(PL_lex_stuff);
2476 SAVEGENERICSV(PL_parser->lex_sub_repl);
2478 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2479 = SvPVX(PL_linestr);
2480 PL_bufend += SvCUR(PL_linestr);
2481 PL_last_lop = PL_last_uni = NULL;
2482 SAVEFREESV(PL_linestr);
2483 if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2485 PL_lex_dojoin = FALSE;
2486 PL_lex_brackets = PL_lex_formbrack = 0;
2487 PL_lex_allbrackets = 0;
2488 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2489 Newx(PL_lex_brackstack, 120, char);
2490 Newx(PL_lex_casestack, 12, char);
2491 PL_lex_casemods = 0;
2492 *PL_lex_casestack = '\0';
2494 PL_lex_state = LEX_INTERPCONCAT;
2496 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2497 PL_copline = NOLINE;
2499 Newxz(shared, 1, LEXSHARED);
2500 shared->ls_prev = PL_parser->lex_shared;
2501 PL_parser->lex_shared = shared;
2503 PL_lex_inwhat = PL_parser->lex_sub_inwhat;
2504 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2505 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2506 PL_lex_inpat = PL_parser->lex_sub_op;
2508 PL_lex_inpat = NULL;
2510 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2511 PL_in_eval &= ~EVAL_RE_REPARSING;
2518 * Restores lexer state after a S_sublex_push.
2524 if (!PL_lex_starts++) {
2525 SV * const sv = newSVpvs("");
2526 if (SvUTF8(PL_linestr))
2528 PL_expect = XOPERATOR;
2529 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
2533 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2534 PL_lex_state = LEX_INTERPCASEMOD;
2538 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2539 assert(PL_lex_inwhat != OP_TRANSR);
2541 assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2542 PL_linestr = PL_lex_repl;
2544 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2545 PL_bufend += SvCUR(PL_linestr);
2546 PL_last_lop = PL_last_uni = NULL;
2547 PL_lex_dojoin = FALSE;
2548 PL_lex_brackets = 0;
2549 PL_lex_allbrackets = 0;
2550 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2551 PL_lex_casemods = 0;
2552 *PL_lex_casestack = '\0';
2554 if (SvEVALED(PL_lex_repl)) {
2555 PL_lex_state = LEX_INTERPNORMAL;
2557 /* we don't clear PL_lex_repl here, so that we can check later
2558 whether this is an evalled subst; that means we rely on the
2559 logic to ensure sublex_done() is called again only via the
2560 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2563 PL_lex_state = LEX_INTERPCONCAT;
2566 if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2567 CopLINE(PL_curcop) +=
2568 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines
2569 + PL_parser->herelines;
2570 PL_parser->herelines = 0;
2575 const line_t l = CopLINE(PL_curcop);
2577 if (PL_parser->sub_error_count != PL_error_count) {
2578 if (PL_parser->sub_no_recover) {
2583 if (PL_multi_close == '<')
2584 PL_parser->herelines += l - PL_multi_end;
2585 PL_bufend = SvPVX(PL_linestr);
2586 PL_bufend += SvCUR(PL_linestr);
2587 PL_expect = XOPERATOR;
2593 S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2595 /* <s> points to first character of interior of \N{}, <e> to one beyond the
2596 * interior, hence to the "}". Finds what the name resolves to, returning
2597 * an SV* containing it; NULL if no valid one found */
2600 SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
2607 const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2609 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2612 SvREFCNT_dec_NN(res);
2613 /* diag_listed_as: Unknown charname '%s' */
2614 yyerror("Unknown charname ''");
2618 res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2619 /* include the <}> */
2620 e - backslash_ptr + 1);
2622 SvREFCNT_dec_NN(res);
2626 /* See if the charnames handler is the Perl core's, and if so, we can skip
2627 * the validation needed for a user-supplied one, as Perl's does its own
2629 table = GvHV(PL_hintgv); /* ^H */
2630 cvp = hv_fetchs(table, "charnames", FALSE);
2631 if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2632 SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2634 const char * const name = HvNAME(stash);
2635 if (memEQs(name, HvNAMELEN(stash), "_charnames")) {
2640 /* Here, it isn't Perl's charname handler. We can't rely on a
2641 * user-supplied handler to validate the input name. For non-ut8 input,
2642 * look to see that the first character is legal. Then loop through the
2643 * rest checking that each is a continuation */
2645 /* This code makes the reasonable assumption that the only Latin1-range
2646 * characters that begin a character name alias are alphabetic, otherwise
2647 * would have to create a isCHARNAME_BEGIN macro */
2650 if (! isALPHAU(*s)) {
2655 if (! isCHARNAME_CONT(*s)) {
2658 if (*s == ' ' && *(s-1) == ' ') {
2665 /* Similarly for utf8. For invariants can check directly; for other
2666 * Latin1, can calculate their code point and check; otherwise use a
2668 if (UTF8_IS_INVARIANT(*s)) {
2669 if (! isALPHAU(*s)) {
2673 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2674 if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
2680 if (! _invlist_contains_cp(PL_utf8_charname_begin,
2681 utf8_to_uvchr_buf((U8 *) s,
2691 if (UTF8_IS_INVARIANT(*s)) {
2692 if (! isCHARNAME_CONT(*s)) {
2695 if (*s == ' ' && *(s-1) == ' ') {
2700 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2701 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
2708 if (! _invlist_contains_cp(PL_utf8_charname_continue,
2709 utf8_to_uvchr_buf((U8 *) s,
2719 if (*(s-1) == ' ') {
2720 /* diag_listed_as: charnames alias definitions may not contain
2721 trailing white-space; marked by <-- HERE in %s
2725 "charnames alias definitions may not contain trailing "
2726 "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2727 (int)(s - backslash_ptr + 1), backslash_ptr,
2728 (int)(e - s + 1), s + 1
2730 UTF ? SVf_UTF8 : 0);
2734 if (SvUTF8(res)) { /* Don't accept malformed input */
2735 const U8* first_bad_char_loc;
2737 const char* const str = SvPV_const(res, len);
2738 if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len,
2739 &first_bad_char_loc)))
2741 _force_out_malformed_utf8_message(first_bad_char_loc,
2742 (U8 *) PL_parser->bufend,
2744 0 /* 0 means don't die */ );
2745 /* diag_listed_as: Malformed UTF-8 returned by \N{%s}
2746 immediately after '%s' */
2749 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2750 (int) (e - backslash_ptr + 1), backslash_ptr,
2751 (int) ((char *) first_bad_char_loc - str), str
2762 /* The final %.*s makes sure that should the trailing NUL be missing
2763 * that this print won't run off the end of the string */
2764 /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
2768 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2769 (int)(s - backslash_ptr + 1), backslash_ptr,
2770 (int)(e - s + 1), s + 1
2772 UTF ? SVf_UTF8 : 0);
2777 /* diag_listed_as: charnames alias definitions may not contain a
2778 sequence of multiple spaces; marked by <-- HERE
2782 "charnames alias definitions may not contain a sequence of "
2783 "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
2784 (int)(s - backslash_ptr + 1), backslash_ptr,
2785 (int)(e - s + 1), s + 1
2787 UTF ? SVf_UTF8 : 0);
2794 Extracts the next constant part of a pattern, double-quoted string,
2795 or transliteration. This is terrifying code.
2797 For example, in parsing the double-quoted string "ab\x63$d", it would
2798 stop at the '$' and return an OP_CONST containing 'abc'.
2800 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2801 processing a pattern (PL_lex_inpat is true), a transliteration
2802 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2804 Returns a pointer to the character scanned up to. If this is
2805 advanced from the start pointer supplied (i.e. if anything was
2806 successfully parsed), will leave an OP_CONST for the substring scanned
2807 in pl_yylval. Caller must intuit reason for not parsing further
2808 by looking at the next characters herself.
2812 \N{FOO} => \N{U+hex_for_character_FOO}
2813 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2816 all other \-char, including \N and \N{ apart from \N{ABC}
2819 @ and $ where it appears to be a var, but not for $ as tail anchor
2823 In transliterations:
2824 characters are VERY literal, except for - not at the start or end
2825 of the string, which indicates a range. However some backslash sequences
2826 are recognized: \r, \n, and the like
2827 \007 \o{}, \x{}, \N{}
2828 If all elements in the transliteration are below 256,
2829 scan_const expands the range to the full set of intermediate
2830 characters. If the range is in utf8, the hyphen is replaced with
2831 a certain range mark which will be handled by pmtrans() in op.c.
2833 In double-quoted strings:
2835 all those recognized in transliterations
2836 deprecated backrefs: \1 (in substitution replacements)
2837 case and quoting: \U \Q \E
2840 scan_const does *not* construct ops to handle interpolated strings.
2841 It stops processing as soon as it finds an embedded $ or @ variable
2842 and leaves it to the caller to work out what's going on.
2844 embedded arrays (whether in pattern or not) could be:
2845 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2847 $ in double-quoted strings must be the symbol of an embedded scalar.
2849 $ in pattern could be $foo or could be tail anchor. Assumption:
2850 it's a tail anchor if $ is the last thing in the string, or if it's
2851 followed by one of "()| \r\n\t"
2853 \1 (backreferences) are turned into $1 in substitutions
2855 The structure of the code is
2856 while (there's a character to process) {
2857 handle transliteration ranges
2858 skip regexp comments /(?#comment)/ and codes /(?{code})/
2859 skip #-initiated comments in //x patterns
2860 check for embedded arrays
2861 check for embedded scalars
2863 deprecate \1 in substitution replacements
2864 handle string-changing backslashes \l \U \Q \E, etc.
2865 switch (what was escaped) {
2866 handle \- in a transliteration (becomes a literal -)
2867 if a pattern and not \N{, go treat as regular character
2868 handle \132 (octal characters)
2869 handle \x15 and \x{1234} (hex characters)
2870 handle \N{name} (named characters, also \N{3,5} in a pattern)
2871 handle \cV (control characters)
2872 handle printf-style backslashes (\f, \r, \n, etc)
2875 } (end if backslash)
2876 handle regular character
2877 } (end while character to read)
2882 S_scan_const(pTHX_ char *start)
2884 char *send = PL_bufend; /* end of the constant */
2885 SV *sv = newSV(send - start); /* sv for the constant. See note below
2887 char *s = start; /* start of the constant */
2888 char *d = SvPVX(sv); /* destination for copies */
2889 bool dorange = FALSE; /* are we in a translit range? */
2890 bool didrange = FALSE; /* did we just finish a range? */
2891 bool in_charclass = FALSE; /* within /[...]/ */
2892 bool has_utf8 = FALSE; /* Output constant is UTF8 */
2893 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed to be
2894 UTF8? But, this can show as true
2895 when the source isn't utf8, as for
2896 example when it is entirely composed
2898 STRLEN utf8_variant_count = 0; /* When not in UTF-8, this counts the
2899 number of characters found so far
2900 that will expand (into 2 bytes)
2901 should we have to convert to
2903 SV *res; /* result from charnames */
2904 STRLEN offset_to_max = 0; /* The offset in the output to where the range
2905 high-end character is temporarily placed */
2907 /* Does something require special handling in tr/// ? This avoids extra
2908 * work in a less likely case. As such, khw didn't feel it was worth
2909 * adding any branches to the more mainline code to handle this, which
2910 * means that this doesn't get set in some circumstances when things like
2911 * \x{100} get expanded out. As a result there needs to be extra testing
2912 * done in the tr code */
2913 bool has_above_latin1 = FALSE;
2915 /* Note on sizing: The scanned constant is placed into sv, which is
2916 * initialized by newSV() assuming one byte of output for every byte of
2917 * input. This routine expects newSV() to allocate an extra byte for a
2918 * trailing NUL, which this routine will append if it gets to the end of
2919 * the input. There may be more bytes of input than output (eg., \N{LATIN
2920 * CAPITAL LETTER A}), or more output than input if the constant ends up
2921 * recoded to utf8, but each time a construct is found that might increase
2922 * the needed size, SvGROW() is called. Its size parameter each time is
2923 * based on the best guess estimate at the time, namely the length used so
2924 * far, plus the length the current construct will occupy, plus room for
2925 * the trailing NUL, plus one byte for every input byte still unscanned */
2927 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
2930 int backslash_N = 0; /* ? was the character from \N{} */
2931 int non_portable_endpoint = 0; /* ? In a range is an endpoint
2932 platform-specific like \x65 */
2935 PERL_ARGS_ASSERT_SCAN_CONST;
2937 assert(PL_lex_inwhat != OP_TRANSR);
2938 if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
2939 /* If we are doing a trans and we know we want UTF8 set expectation */
2940 has_utf8 = PL_parser->lex_sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2941 this_utf8 = PL_parser->lex_sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2944 /* Protect sv from errors and fatal warnings. */
2945 ENTER_with_name("scan_const");
2949 || dorange /* Handle tr/// range at right edge of input */
2952 /* get transliterations out of the way (they're most literal) */
2953 if (PL_lex_inwhat == OP_TRANS) {
2955 /* But there isn't any special handling necessary unless there is a
2956 * range, so for most cases we just drop down and handle the value
2957 * as any other. There are two exceptions.
2959 * 1. A hyphen indicates that we are actually going to have a
2960 * range. In this case, skip the '-', set a flag, then drop
2961 * down to handle what should be the end range value.
2962 * 2. After we've handled that value, the next time through, that
2963 * flag is set and we fix up the range.
2965 * Ranges entirely within Latin1 are expanded out entirely, in
2966 * order to make the transliteration a simple table look-up.
2967 * Ranges that extend above Latin1 have to be done differently, so
2968 * there is no advantage to expanding them here, so they are
2969 * stored here as Min, ILLEGAL_UTF8_BYTE, Max. The illegal byte
2970 * signifies a hyphen without any possible ambiguity. On EBCDIC
2971 * machines, if the range is expressed as Unicode, the Latin1
2972 * portion is expanded out even if the range extends above
2973 * Latin1. This is because each code point in it has to be
2974 * processed here individually to get its native translation */
2978 /* Here, we don't think we're in a range. If the new character
2979 * is not a hyphen; or if it is a hyphen, but it's too close to
2980 * either edge to indicate a range, or if we haven't output any
2981 * characters yet then it's a regular character. */
2982 if (*s != '-' || s >= send - 1 || s == start || d == SvPVX(sv)) {
2984 /* A regular character. Process like any other, but first
2985 * clear any flags */
2989 non_portable_endpoint = 0;
2992 /* The tests here for being above Latin1 and similar ones
2993 * in the following 'else' suffice to find all such
2994 * occurences in the constant, except those added by a
2995 * backslash escape sequence, like \x{100}. Mostly, those
2996 * set 'has_above_latin1' as appropriate */
2997 if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
2998 has_above_latin1 = TRUE;
3001 /* Drops down to generic code to process current byte */
3003 else { /* Is a '-' in the context where it means a range */
3004 if (didrange) { /* Something like y/A-C-Z// */
3005 Perl_croak(aTHX_ "Ambiguous range in transliteration"
3011 s++; /* Skip past the hyphen */
3013 /* d now points to where the end-range character will be
3014 * placed. Save it so won't have to go finding it later,
3015 * and drop down to get that character. (Actually we
3016 * instead save the offset, to handle the case where a
3017 * realloc in the meantime could change the actual
3018 * pointer). We'll finish processing the range the next
3019 * time through the loop */
3020 offset_to_max = d - SvPVX_const(sv);
3022 if (this_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3023 has_above_latin1 = TRUE;
3026 /* Drops down to generic code to process current byte */
3028 } /* End of not a range */
3030 /* Here we have parsed a range. Now must handle it. At this
3032 * 'sv' is a SV* that contains the output string we are
3033 * constructing. The final two characters in that string
3034 * are the range start and range end, in order.
3035 * 'd' points to just beyond the range end in the 'sv' string,
3036 * where we would next place something
3037 * 'offset_to_max' is the offset in 'sv' at which the character
3038 * (the range's maximum end point) before 'd' begins.
3040 char * max_ptr = SvPVX(sv) + offset_to_max;
3043 IV range_max; /* last character in range */
3045 Size_t offset_to_min = 0;
3048 bool convert_unicode;
3049 IV real_range_max = 0;
3051 /* Get the code point values of the range ends. */
3053 /* We know the utf8 is valid, because we just constructed
3054 * it ourselves in previous loop iterations */
3055 min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
3056 range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
3057 range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
3059 /* This compensates for not all code setting
3060 * 'has_above_latin1', so that we don't skip stuff that
3061 * should be executed */
3062 if (range_max > 255) {
3063 has_above_latin1 = TRUE;
3067 min_ptr = max_ptr - 1;
3068 range_min = * (U8*) min_ptr;
3069 range_max = * (U8*) max_ptr;
3072 /* If the range is just a single code point, like tr/a-a/.../,
3073 * that code point is already in the output, twice. We can
3074 * just back up over the second instance and avoid all the rest
3075 * of the work. But if it is a variant character, it's been
3076 * counted twice, so decrement. (This unlikely scenario is
3077 * special cased, like the one for a range of 2 code points
3078 * below, only because the main-line code below needs a range
3079 * of 3 or more to work without special casing. Might as well
3080 * get it out of the way now.) */
3081 if (UNLIKELY(range_max == range_min)) {
3083 if (! has_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
3084 utf8_variant_count--;
3090 /* On EBCDIC platforms, we may have to deal with portable
3091 * ranges. These happen if at least one range endpoint is a
3092 * Unicode value (\N{...}), or if the range is a subset of
3093 * [A-Z] or [a-z], and both ends are literal characters,
3094 * like 'A', and not like \x{C1} */
3096 cBOOL(backslash_N) /* \N{} forces Unicode,
3097 hence portable range */
3098 || ( ! non_portable_endpoint
3099 && (( isLOWER_A(range_min) && isLOWER_A(range_max))
3100 || (isUPPER_A(range_min) && isUPPER_A(range_max))));
3101 if (convert_unicode) {
3103 /* Special handling is needed for these portable ranges.
3104 * They are defined to be in Unicode terms, which includes
3105 * all the Unicode code points between the end points.
3106 * Convert to Unicode to get the Unicode range. Later we
3107 * will convert each code point in the range back to
3109 range_min = NATIVE_TO_UNI(range_min);
3110 range_max = NATIVE_TO_UNI(range_max);
3114 if (range_min > range_max) {
3116 if (convert_unicode) {
3117 /* Need to convert back to native for meaningful
3118 * messages for this platform */
3119 range_min = UNI_TO_NATIVE(range_min);
3120 range_max = UNI_TO_NATIVE(range_max);
3123 /* Use the characters themselves for the error message if
3124 * ASCII printables; otherwise some visible representation
3126 if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
3128 "Invalid range \"%c-%c\" in transliteration operator",
3129 (char)range_min, (char)range_max);
3132 else if (convert_unicode) {
3133 /* diag_listed_as: Invalid range "%s" in transliteration operator */
3135 "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04"
3136 UVXf "}\" in transliteration operator",
3137 range_min, range_max);
3141 /* diag_listed_as: Invalid range "%s" in transliteration operator */
3143 "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
3144 " in transliteration operator",
3145 range_min, range_max);
3149 /* If the range is exactly two code points long, they are
3150 * already both in the output */
3151 if (UNLIKELY(range_min + 1 == range_max)) {
3155 /* Here the range contains at least 3 code points */
3159 /* If everything in the transliteration is below 256, we
3160 * can avoid special handling later. A translation table
3161 * for each of those bytes is created by op.c. So we
3162 * expand out all ranges to their constituent code points.
3163 * But if we've encountered something above 255, the
3164 * expanding won't help, so skip doing that. But if it's
3165 * EBCDIC, we may have to look at each character below 256
3166 * if we have to convert to/from Unicode values */
3167 if ( has_above_latin1
3169 && (range_min > 255 || ! convert_unicode)
3172 /* Move the high character one byte to the right; then
3173 * insert between it and the range begin, an illegal
3174 * byte which serves to indicate this is a range (using
3175 * a '-' would be ambiguous). */
3177 while (e-- > max_ptr) {
3180 *(e + 1) = (char) ILLEGAL_UTF8_BYTE;
3184 /* Here, we're going to expand out the range. For EBCDIC
3185 * the range can extend above 255 (not so in ASCII), so
3186 * for EBCDIC, split it into the parts above and below
3189 if (range_max > 255) {
3190 real_range_max = range_max;
3196 /* Here we need to expand out the string to contain each
3197 * character in the range. Grow the output to handle this.
3198 * For non-UTF8, we need a byte for each code point in the
3199 * range, minus the three that we've already allocated for: the
3200 * hyphen, the min, and the max. For UTF-8, we need this
3201 * plus an extra byte for each code point that occupies two
3202 * bytes (is variant) when in UTF-8 (except we've already
3203 * allocated for the end points, including if they are
3204 * variants). For ASCII platforms and Unicode ranges on EBCDIC
3205 * platforms, it's easy to calculate a precise number. To
3206 * start, we count the variants in the range, which we need
3207 * elsewhere in this function anyway. (For the case where it
3208 * isn't easy to calculate, 'extras' has been initialized to 0,
3209 * and the calculation is done in a loop further down.) */
3211 if (convert_unicode)
3214 /* This is executed unconditionally on ASCII, and for
3215 * Unicode ranges on EBCDIC. Under these conditions, all
3216 * code points above a certain value are variant; and none
3217 * under that value are. We just need to find out how much
3218 * of the range is above that value. We don't count the
3219 * end points here, as they will already have been counted
3220 * as they were parsed. */
3221 if (range_min >= UTF_CONTINUATION_MARK) {
3223 /* The whole range is made up of variants */
3224 extras = (range_max - 1) - (range_min + 1) + 1;
3226 else if (range_max >= UTF_CONTINUATION_MARK) {
3228 /* Only the higher portion of the range is variants */
3229 extras = (range_max - 1) - UTF_CONTINUATION_MARK + 1;
3232 utf8_variant_count += extras;
3235 /* The base growth is the number of code points in the range,
3236 * not including the endpoints, which have already been sized
3237 * for (and output). We don't subtract for the hyphen, as it
3238 * has been parsed but not output, and the SvGROW below is
3239 * based only on what's been output plus what's left to parse.
3241 grow = (range_max - 1) - (range_min + 1) + 1;
3245 /* In some cases in EBCDIC, we haven't yet calculated a
3246 * precise amount needed for the UTF-8 variants. Just
3247 * assume the worst case, that everything will expand by a
3249 if (! convert_unicode) {
3255 /* Otherwise we know exactly how many variants there
3256 * are in the range. */
3261 /* Grow, but position the output to overwrite the range min end
3262 * point, because in some cases we overwrite that */
3263 SvCUR_set(sv, d - SvPVX_const(sv));
3264 offset_to_min = min_ptr - SvPVX_const(sv);
3266 /* See Note on sizing above. */
3267 d = offset_to_min + SvGROW(sv, SvCUR(sv)
3270 + 1 /* Trailing NUL */ );
3272 /* Now, we can expand out the range. */
3274 if (convert_unicode) {
3277 /* Recall that the min and max are now in Unicode terms, so
3278 * we have to convert each character to its native
3281 for (i = range_min; i <= range_max; i++) {
3282 append_utf8_from_native_byte(
3283 LATIN1_TO_NATIVE((U8) i),
3288 for (i = range_min; i <= range_max; i++) {
3289 *d++ = (char)LATIN1_TO_NATIVE((U8) i);
3295 /* Always gets run for ASCII, and sometimes for EBCDIC. */
3297 /* Here, no conversions are necessary, which means that the
3298 * first character in the range is already in 'd' and
3299 * valid, so we can skip overwriting it */
3303 for (i = range_min + 1; i <= range_max; i++) {
3304 append_utf8_from_native_byte((U8) i, (U8 **) &d);
3310 assert(range_min + 1 <= range_max);
3311 for (i = range_min + 1; i < range_max; i++) {
3313 /* In this case on EBCDIC, we haven't calculated
3314 * the variants. Do it here, as we go along */
3315 if (! UVCHR_IS_INVARIANT(i)) {
3316 utf8_variant_count++;
3322 /* The range_max is done outside the loop so as to
3323 * avoid having to special case not incrementing
3324 * 'utf8_variant_count' on EBCDIC (it's already been
3325 * counted when originally parsed) */
3326 *d++ = (char) range_max;
3331 /* If the original range extended above 255, add in that
3333 if (real_range_max) {
3334 *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
3335 *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
3336 if (real_range_max > 0x100) {
3337 if (real_range_max > 0x101) {
3338 *d++ = (char) ILLEGAL_UTF8_BYTE;
3340 d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
3346 /* mark the range as done, and continue */
3350 non_portable_endpoint = 0;
3354 } /* End of is a range */
3355 } /* End of transliteration. Joins main code after these else's */
3356 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3359 while (s1 >= start && *s1-- == '\\')
3362 in_charclass = TRUE;
3364 else if (*s == ']' && PL_lex_inpat && in_charclass) {
3367 while (s1 >= start && *s1-- == '\\')
3370 in_charclass = FALSE;
3372 /* skip for regexp comments /(?#comment)/, except for the last
3373 * char, which will be done separately. Stop on (?{..}) and
3375 else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3377 while (s+1 < send && *s != ')')
3380 else if (!PL_lex_casemods
3381 && ( s[2] == '{' /* This should match regcomp.c */
3382 || (s[2] == '?' && s[3] == '{')))
3387 /* likewise skip #-initiated comments in //x patterns */
3391 && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
3393 while (s < send && *s != '\n')
3396 /* no further processing of single-quoted regex */
3397 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3398 goto default_action;
3400 /* check for embedded arrays
3401 * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3403 else if (*s == '@' && s[1]) {
3405 ? isIDFIRST_utf8_safe(s+1, send)
3406 : isWORDCHAR_A(s[1]))
3410 if (strchr(":'{$", s[1]))
3412 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3413 break; /* in regexp, neither @+ nor @- are interpolated */
3415 /* check for embedded scalars. only stop if we're sure it's a
3417 else if (*s == '$') {
3418 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
3420 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3422 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3423 "Possible unintended interpolation of $\\ in regex");
3425 break; /* in regexp, $ might be tail anchor */
3429 /* End of else if chain - OP_TRANS rejoin rest */
3431 if (UNLIKELY(s >= send)) {
3437 if (*s == '\\' && s+1 < send) {
3438 char* e; /* Can be used for ending '}', etc. */
3442 /* warn on \1 - \9 in substitution replacements, but note that \11
3443 * is an octal; and \19 is \1 followed by '9' */
3444 if (PL_lex_inwhat == OP_SUBST
3450 /* diag_listed_as: \%d better written as $%d */
3451 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3456 /* string-change backslash escapes */
3457 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3461 /* In a pattern, process \N, but skip any other backslash escapes.
3462 * This is because we don't want to translate an escape sequence
3463 * into a meta symbol and have the regex compiler use the meta
3464 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3465 * in spite of this, we do have to process \N here while the proper
3466 * charnames handler is in scope. See bugs #56444 and #62056.
3468 * There is a complication because \N in a pattern may also stand
3469 * for 'match a non-nl', and not mean a charname, in which case its
3470 * processing should be deferred to the regex compiler. To be a
3471 * charname it must be followed immediately by a '{', and not look
3472 * like \N followed by a curly quantifier, i.e., not something like
3473 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3475 else if (PL_lex_inpat
3478 || regcurly(s + 1)))
3481 goto default_action;
3487 if ((isALPHANUMERIC(*s)))
3488 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3489 "Unrecognized escape \\%c passed through",
3491 /* default action is to copy the quoted character */
3492 goto default_action;
3495 /* eg. \132 indicates the octal constant 0132 */
3496 case '0': case '1': case '2': case '3':
3497 case '4': case '5': case '6': case '7':
3499 I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3501 uv = grok_oct(s, &len, &flags, NULL);
3503 if (len < 3 && s < send && isDIGIT(*s)
3504 && ckWARN(WARN_MISC))
3506 Perl_warner(aTHX_ packWARN(WARN_MISC),
3507 "%s", form_short_octal_warning(s, len));
3510 goto NUM_ESCAPE_INSERT;
3512 /* eg. \o{24} indicates the octal constant \024 */
3517 bool valid = grok_bslash_o(&s, PL_bufend,
3519 TRUE, /* Output warning */
3520 FALSE, /* Not strict */
3521 TRUE, /* Output warnings for
3526 uv = 0; /* drop through to ensure range ends are set */
3528 goto NUM_ESCAPE_INSERT;
3531 /* eg. \x24 indicates the hex constant 0x24 */
3536 bool valid = grok_bslash_x(&s, PL_bufend,
3538 TRUE, /* Output warning */
3539 FALSE, /* Not strict */
3540 TRUE, /* Output warnings for
3545 uv = 0; /* drop through to ensure range ends are set */
3550 /* Insert oct or hex escaped character. */
3552 /* Here uv is the ordinal of the next character being added */
3553 if (UVCHR_IS_INVARIANT(uv)) {
3557 if (!has_utf8 && uv > 255) {
3559 /* Here, 'uv' won't fit unless we convert to UTF-8.
3560 * If we've only seen invariants so far, all we have to
3561 * do is turn on the flag */
3562 if (utf8_variant_count == 0) {
3566 SvCUR_set(sv, d - SvPVX_const(sv));
3570 sv_utf8_upgrade_flags_grow(
3572 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3574 /* Since we're having to grow here,
3575 * make sure we have enough room for
3576 * this escape and a NUL, so the
3577 * code immediately below won't have
3578 * to actually grow again */
3580 + (STRLEN)(send - s) + 1);
3581 d = SvPVX(sv) + SvCUR(sv);
3584 has_above_latin1 = TRUE;
3590 utf8_variant_count++;
3593 /* Usually, there will already be enough room in 'sv'
3594 * since such escapes are likely longer than any UTF-8
3595 * sequence they can end up as. This isn't the case on
3596 * EBCDIC where \x{40000000} contains 12 bytes, and the
3597 * UTF-8 for it contains 14. And, we have to allow for
3598 * a trailing NUL. It probably can't happen on ASCII
3599 * platforms, but be safe. See Note on sizing above. */
3600 const STRLEN needed = d - SvPVX(sv)
3604 if (UNLIKELY(needed > SvLEN(sv))) {
3605 SvCUR_set(sv, d - SvPVX_const(sv));
3606 d = SvCUR(sv) + SvGROW(sv, needed);
3609 d = (char*)uvchr_to_utf8((U8*)d, uv);
3610 if (PL_lex_inwhat == OP_TRANS
3611 && PL_parser->lex_sub_op)
3613 PL_parser->lex_sub_op->op_private |=
3614 (PL_lex_repl ? OPpTRANS_FROM_UTF
3620 non_portable_endpoint++;
3625 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3626 * named character, like \N{LATIN SMALL LETTER A}, or a named
3627 * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3628 * GRAVE} (except y/// can't handle the latter, croaking). For
3629 * convenience all three forms are referred to as "named
3630 * characters" below.
3632 * For patterns, \N also can mean to match a non-newline. Code
3633 * before this 'switch' statement should already have handled
3634 * this situation, and hence this code only has to deal with
3635 * the named character cases.
3637 * For non-patterns, the named characters are converted to
3638 * their string equivalents. In patterns, named characters are
3639 * not converted to their ultimate forms for the same reasons
3640 * that other escapes aren't (mainly that the ultimate
3641 * character could be considered a meta-symbol by the regex
3642 * compiler). Instead, they are converted to the \N{U+...}
3643 * form to get the value from the charnames that is in effect
3644 * right now, while preserving the fact that it was a named
3645 * character, so that the regex compiler knows this.
3647 * The structure of this section of code (besides checking for
3648 * errors and upgrading to utf8) is:
3649 * If the named character is of the form \N{U+...}, pass it
3650 * through if a pattern; otherwise convert the code point
3652 * Otherwise must be some \N{NAME}: convert to
3653 * \N{U+c1.c2...} if a pattern; otherwise convert to utf8
3655 * Transliteration is an exception. The conversion to utf8 is
3656 * only done if the code point requires it to be representable.
3658 * Here, 's' points to the 'N'; the test below is guaranteed to
3659 * succeed if we are being called on a pattern, as we already
3660 * know from a test above that the next character is a '{'. A
3661 * non-pattern \N must mean 'named character', which requires
3665 yyerror("Missing braces on \\N{}");
3671 /* If there is no matching '}', it is an error. */
3672 if (! (e = (char *) memchr(s, '}', send - s))) {
3673 if (! PL_lex_inpat) {
3674 yyerror("Missing right brace on \\N{}");
3676 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3678 yyquit(); /* Have exhausted the input. */
3681 /* Here it looks like a named character */
3683 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3684 s += 2; /* Skip to next char after the 'U+' */
3687 /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3688 /* Check the syntax. */
3691 if (!isXDIGIT(*s)) {
3694 "Invalid hexadecimal number in \\N{U+...}"
3703 else if ((*s == '.' || *s == '_')
3709 /* Pass everything through unchanged.
3710 * +1 is for the '}' */
3711 Copy(orig_s, d, e - orig_s + 1, char);
3712 d += e - orig_s + 1;
3714 else { /* Not a pattern: convert the hex to string */
3715 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3716 | PERL_SCAN_SILENT_ILLDIGIT
3717 | PERL_SCAN_DISALLOW_PREFIX;
3719 uv = grok_hex(s, &len, &flags, NULL);
3720 if (len == 0 || (len != (STRLEN)(e - s)))
3723 /* For non-tr///, if the destination is not in utf8,
3724 * unconditionally recode it to be so. This is
3725 * because \N{} implies Unicode semantics, and scalars
3726 * have to be in utf8 to guarantee those semantics.
3727 * tr/// doesn't care about Unicode rules, so no need
3728 * there to upgrade to UTF-8 for small enough code
3730 if (! has_utf8 && ( uv > 0xFF
3731 || PL_lex_inwhat != OP_TRANS))
3733 /* See Note on sizing above. */
3734 const STRLEN extra = OFFUNISKIP(uv) + (send - e) + 1;
3736 SvCUR_set(sv, d - SvPVX_const(sv));
3740 if (utf8_variant_count == 0) {
3742 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
3745 sv_utf8_upgrade_flags_grow(
3747 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3749 d = SvPVX(sv) + SvCUR(sv);
3753 has_above_latin1 = TRUE;
3756 /* Add the (Unicode) code point to the output. */
3757 if (! has_utf8 || OFFUNI_IS_INVARIANT(uv)) {
3758 *d++ = (char) LATIN1_TO_NATIVE(uv);
3761 d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3765 else /* Here is \N{NAME} but not \N{U+...}. */
3766 if ((res = get_and_check_backslash_N_name(s, e)))
3769 const char *str = SvPV_const(res, len);
3772 if (! len) { /* The name resolved to an empty string */
3773 Copy("\\N{}", d, 4, char);
3777 /* In order to not lose information for the regex
3778 * compiler, pass the result in the specially made
3779 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3780 * the code points in hex of each character
3781 * returned by charnames */
3783 const char *str_end = str + len;
3784 const STRLEN off = d - SvPVX_const(sv);
3786 if (! SvUTF8(res)) {
3787 /* For the non-UTF-8 case, we can determine the
3788 * exact length needed without having to parse
3789 * through the string. Each character takes up
3790 * 2 hex digits plus either a trailing dot or
3792 const char initial_text[] = "\\N{U+";
3793 const STRLEN initial_len = sizeof(initial_text)
3795 d = off + SvGROW(sv, off
3798 /* +1 for trailing NUL */
3801 + (STRLEN)(send - e));
3802 Copy(initial_text, d, initial_len, char);
3804 while (str < str_end) {
3807 my_snprintf(hex_string,
3811 /* The regex compiler is
3812 * expecting Unicode, not
3814 NATIVE_TO_LATIN1(*str));
3815 PERL_MY_SNPRINTF_POST_GUARD(len,
3816 sizeof(hex_string));
3817 Copy(hex_string, d, 3, char);
3821 d--; /* Below, we will overwrite the final
3822 dot with a right brace */
3825 STRLEN char_length; /* cur char's byte length */
3827 /* and the number of bytes after this is
3828 * translated into hex digits */
3829 STRLEN output_length;
3831 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3832 * for max('U+', '.'); and 1 for NUL */
3833 char hex_string[2 * UTF8_MAXBYTES + 5];
3835 /* Get the first character of the result. */
3836 U32 uv = utf8n_to_uvchr((U8 *) str,
3840 /* Convert first code point to Unicode hex,
3841 * including the boiler plate before it. */
3843 my_snprintf(hex_string, sizeof(hex_string),
3845 (unsigned int) NATIVE_TO_UNI(uv));
3847 /* Make sure there is enough space to hold it */
3848 d = off + SvGROW(sv, off
3850 + (STRLEN)(send - e)
3851 + 2); /* '}' + NUL */
3853 Copy(hex_string, d, output_length, char);
3856 /* For each subsequent character, append dot and
3857 * its Unicode code point in hex */
3858 while ((str += char_length) < str_end) {
3859 const STRLEN off = d - SvPVX_const(sv);
3860 U32 uv = utf8n_to_uvchr((U8 *) str,
3865 my_snprintf(hex_string,
3868 (unsigned int) NATIVE_TO_UNI(uv));
3870 d = off + SvGROW(sv, off
3872 + (STRLEN)(send - e)
3873 + 2); /* '}' + NUL */
3874 Copy(hex_string, d, output_length, char);
3879 *d++ = '}'; /* Done. Add the trailing brace */
3882 else { /* Here, not in a pattern. Convert the name to a
3885 if (PL_lex_inwhat == OP_TRANS) {
3886 str = SvPV_const(res, len);
3887 if (len > ((SvUTF8(res))
3891 yyerror(Perl_form(aTHX_
3892 "%.*s must not be a named sequence"
3893 " in transliteration operator",
3894 /* +1 to include the "}" */
3895 (int) (e + 1 - start), start));
3897 goto end_backslash_N;
3900 if (SvUTF8(res) && UTF8_IS_ABOVE_LATIN1(*str)) {
3901 has_above_latin1 = TRUE;
3905 else if (! SvUTF8(res)) {
3906 /* Make sure \N{} return is UTF-8. This is because
3907 * \N{} implies Unicode semantics, and scalars have
3908 * to be in utf8 to guarantee those semantics; but
3909 * not needed in tr/// */
3910 sv_utf8_upgrade_flags(res, 0);
3911 str = SvPV_const(res, len);
3914 /* Upgrade destination to be utf8 if this new
3916 if (! has_utf8 && SvUTF8(res)) {
3917 /* See Note on sizing above. */
3918 const STRLEN extra = len + (send - s) + 1;
3920 SvCUR_set(sv, d - SvPVX_const(sv));
3924 if (utf8_variant_count == 0) {
3926 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
3929 sv_utf8_upgrade_flags_grow(sv,
3930 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3932 d = SvPVX(sv) + SvCUR(sv);
3935 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3937 /* See Note on sizing above. (NOTE: SvCUR() is not
3938 * set correctly here). */
3939 const STRLEN extra = len + (send - e) + 1;
3940 const STRLEN off = d - SvPVX_const(sv);
3941 d = off + SvGROW(sv, off + extra);
3943 Copy(str, d, len, char);
3949 } /* End \N{NAME} */
3953 backslash_N++; /* \N{} is defined to be Unicode */
3955 s = e + 1; /* Point to just after the '}' */
3958 /* \c is a control character */
3962 *d++ = grok_bslash_c(*s, 1);
3965 yyerror("Missing control char name in \\c");
3966 yyquit(); /* Are at end of input, no sense continuing */
3969 non_portable_endpoint++;
3973 /* printf-style backslashes, formfeeds, newlines, etc */
3999 } /* end if (backslash) */
4002 /* Just copy the input to the output, though we may have to convert
4005 * If the input has the same representation in UTF-8 as not, it will be
4006 * a single byte, and we don't care about UTF8ness; just copy the byte */
4007 if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
4010 else if (! this_utf8 && ! has_utf8) {
4011 /* If neither source nor output is UTF-8, is also a single byte,
4012 * just copy it; but this byte counts should we later have to
4013 * convert to UTF-8 */
4015 utf8_variant_count++;
4017 else if (this_utf8 && has_utf8) { /* Both UTF-8, can just copy */
4018 const STRLEN len = UTF8SKIP(s);
4020 /* We expect the source to have already been checked for
4022 assert(isUTF8_CHAR((U8 *) s, (U8 *) send));
4024 Copy(s, d, len, U8);
4028 else { /* UTF8ness matters and doesn't match, need to convert */
4030 const UV nextuv = (this_utf8)
4031 ? utf8n_to_uvchr((U8*)s, send - s, &len, 0)
4033 STRLEN need = UVCHR_SKIP(nextuv);
4036 SvCUR_set(sv, d - SvPVX_const(sv));
4040 /* See Note on sizing above. */
4041 need += (STRLEN)(send - s) + 1;
4043 if (utf8_variant_count == 0) {
4045 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
4048 sv_utf8_upgrade_flags_grow(sv,
4049 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4051 d = SvPVX(sv) + SvCUR(sv);
4054 } else if (need > len) {
4055 /* encoded value larger than old, may need extra space (NOTE:
4056 * SvCUR() is not set correctly here). See Note on sizing
4058 const STRLEN extra = need + (send - s) + 1;
4059 const STRLEN off = d - SvPVX_const(sv);
4060 d = off + SvGROW(sv, off + extra);
4064 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
4066 } /* while loop to process each character */
4068 /* terminate the string and set up the sv */
4070 SvCUR_set(sv, d - SvPVX_const(sv));
4071 if (SvCUR(sv) >= SvLEN(sv))
4072 Perl_croak(aTHX_ "panic: constant overflowed allocated space, %" UVuf
4073 " >= %" UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
4078 if (PL_lex_inwhat == OP_TRANS && PL_parser->lex_sub_op) {
4079 PL_parser->lex_sub_op->op_private |=
4080 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
4084 /* shrink the sv if we allocated more than we used */
4085 if (SvCUR(sv) + 5 < SvLEN(sv)) {
4086 SvPV_shrink_to_cur(sv);
4089 /* return the substring (via pl_yylval) only if we parsed anything */
4092 for (; s2 < s; s2++) {
4094 COPLINE_INC_WITH_HERELINES;
4096 SvREFCNT_inc_simple_void_NN(sv);
4097 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
4098 && ! PL_parser->lex_re_reparsing)
4100 const char *const key = PL_lex_inpat ? "qr" : "q";
4101 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
4105 if (PL_lex_inwhat == OP_TRANS) {
4108 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
4111 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
4119 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
4122 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
4124 LEAVE_with_name("scan_const");
4129 * Returns TRUE if there's more to the expression (e.g., a subscript),
4132 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
4134 * ->[ and ->{ return TRUE
4135 * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
4136 * { and [ outside a pattern are always subscripts, so return TRUE
4137 * if we're outside a pattern and it's not { or [, then return FALSE
4138 * if we're in a pattern and the first char is a {
4139 * {4,5} (any digits around the comma) returns FALSE
4140 * if we're in a pattern and the first char is a [
4142 * [SOMETHING] has a funky algorithm to decide whether it's a
4143 * character class or not. It has to deal with things like
4144 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
4145 * anything else returns TRUE
4148 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
4151 S_intuit_more(pTHX_ char *s, char *e)
4153 PERL_ARGS_ASSERT_INTUIT_MORE;
4155 if (PL_lex_brackets)
4157 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
4159 if (*s == '-' && s[1] == '>'
4160 && FEATURE_POSTDEREF_QQ_IS_ENABLED
4161 && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
4162 ||(s[2] == '@' && strchr("*[{",s[3])) ))
4164 if (*s != '{' && *s != '[')
4166 PL_parser->sub_no_recover = TRUE;
4170 /* In a pattern, so maybe we have {n,m}. */
4178 /* On the other hand, maybe we have a character class */
4181 if (*s == ']' || *s == '^')
4184 /* this is terrifying, and it works */
4187 const char * const send = (char *) memchr(s, ']', e - s);
4188 unsigned char un_char, last_un_char;
4189 char tmpbuf[sizeof PL_tokenbuf * 4];
4191 if (!send) /* has to be an expression */
4193 weight = 2; /* let's weigh the evidence */
4197 else if (isDIGIT(*s)) {
4199 if (isDIGIT(s[1]) && s[2] == ']')
4205 Zero(seen,256,char);
4207 for (; s < send; s++) {
4208 last_un_char = un_char;
4209 un_char = (unsigned char)*s;
4214 weight -= seen[un_char] * 10;
4215 if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
4217 scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4218 len = (int)strlen(tmpbuf);
4219 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
4220 UTF ? SVf_UTF8 : 0, SVt_PV))
4227 && strchr("[#!%*<>()-=",s[1]))
4229 if (/*{*/ strchr("])} =",s[2]))
4238 if (strchr("wds]",s[1]))
4240 else if (seen[(U8)'\''] || seen[(U8)'"'])
4242 else if (strchr("rnftbxcav",s[1]))
4244 else if (isDIGIT(s[1])) {
4246 while (s[1] && isDIGIT(s[1]))
4256 if (strchr("aA01! ",last_un_char))
4258 if (strchr("zZ79~",s[1]))
4260 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4261 weight -= 5; /* cope with negative subscript */
4264 if (!isWORDCHAR(last_un_char)
4265 && !(last_un_char == '$' || last_un_char == '@'
4266 || last_un_char == '&')
4267 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4271 if (keyword(d, s - d, 0))
4274 if (un_char == last_un_char + 1)
4276 weight -= seen[un_char];
4281 if (weight >= 0) /* probably a character class */
4291 * Does all the checking to disambiguate
4293 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
4294 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4296 * First argument is the stuff after the first token, e.g. "bar".
4298 * Not a method if foo is a filehandle.
4299 * Not a method if foo is a subroutine prototyped to take a filehandle.
4300 * Not a method if it's really "Foo $bar"
4301 * Method if it's "foo $bar"
4302 * Not a method if it's really "print foo $bar"
4303 * Method if it's really "foo package::" (interpreted as package->foo)
4304 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4305 * Not a method if bar is a filehandle or package, but is quoted with
4310 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
4312 char *s = start + (*start == '$');
4313 char tmpbuf[sizeof PL_tokenbuf];
4316 /* Mustn't actually add anything to a symbol table.
4317 But also don't want to "initialise" any placeholder
4318 constants that might already be there into full
4319 blown PVGVs with attached PVCV. */
4321 ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
4323 PERL_ARGS_ASSERT_INTUIT_METHOD;
4325 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4327 if (cv && SvPOK(cv)) {
4328 const char *proto = CvPROTO(cv);
4330 while (*proto && (isSPACE(*proto) || *proto == ';'))
4337 if (*start == '$') {
4338 SSize_t start_off = start - SvPVX(PL_linestr);
4339 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
4340 || isUPPER(*PL_tokenbuf))
4342 /* this could be $# */
4345 PL_bufptr = SvPVX(PL_linestr) + start_off;
4347 return *s == '(' ? FUNCMETH : METHOD;
4350 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4351 /* start is the beginning of the possible filehandle/object,
4352 * and s is the end of it
4353 * tmpbuf is a copy of it (but with single quotes as double colons)
4356 if (!keyword(tmpbuf, len, 0)) {
4357 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4362 indirgv = gv_fetchpvn_flags(tmpbuf, len,
4363 GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
4365 if (indirgv && SvTYPE(indirgv) != SVt_NULL
4366 && (!isGV(indirgv) || GvCVu(indirgv)))
4368 /* filehandle or package name makes it a method */
4369 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4371 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4372 return 0; /* no assumptions -- "=>" quotes bareword */
4374 NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0,
4375 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4376 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4378 force_next(BAREWORD);
4380 return *s == '(' ? FUNCMETH : METHOD;
4386 /* Encoded script support. filter_add() effectively inserts a
4387 * 'pre-processing' function into the current source input stream.
4388 * Note that the filter function only applies to the current source file
4389 * (e.g., it will not affect files 'require'd or 'use'd by this one).
4391 * The datasv parameter (which may be NULL) can be used to pass
4392 * private data to this instance of the filter. The filter function
4393 * can recover the SV using the FILTER_DATA macro and use it to
4394 * store private buffers and state information.
4396 * The supplied datasv parameter is upgraded to a PVIO type
4397 * and the IoDIRP/IoANY field is used to store the function pointer,
4398 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4399 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4400 * private use must be set using malloc'd pointers.
4404 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4412 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4413 Perl_croak(aTHX_ "Source filters apply only to byte streams");
4415 if (!PL_rsfp_filters)
4416 PL_rsfp_filters = newAV();
4419 SvUPGRADE(datasv, SVt_PVIO);
4420 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4421 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4422 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4423 FPTR2DPTR(void *, IoANY(datasv)),
4424 SvPV_nolen(datasv)));
4425 av_unshift(PL_rsfp_filters, 1);
4426 av_store(PL_rsfp_filters, 0, datasv) ;
4428 !PL_parser->filtered
4429 && PL_parser->lex_flags & LEX_EVALBYTES
4430 && PL_bufptr < PL_bufend
4432 const char *s = PL_bufptr;
4433 while (s < PL_bufend) {
4435 SV *linestr = PL_parser->linestr;
4436 char *buf = SvPVX(linestr);
4437 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4438 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4439 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4440 STRLEN const linestart_pos = PL_parser->linestart - buf;
4441 STRLEN const last_uni_pos =
4442 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4443 STRLEN const last_lop_pos =
4444 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4445 av_push(PL_rsfp_filters, linestr);
4446 PL_parser->linestr =
4447 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4448 buf = SvPVX(PL_parser->linestr);
4449 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4450 PL_parser->bufptr = buf + bufptr_pos;
4451 PL_parser->oldbufptr = buf + oldbufptr_pos;
4452 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4453 PL_parser->linestart = buf + linestart_pos;
4454 if (PL_parser->last_uni)
4455 PL_parser->last_uni = buf + last_uni_pos;
4456 if (PL_parser->last_lop)
4457 PL_parser->last_lop = buf + last_lop_pos;
4458 SvLEN_set(linestr, SvCUR(linestr));
4459 SvCUR_set(linestr, s - SvPVX(linestr));
4460 PL_parser->filtered = 1;
4470 /* Delete most recently added instance of this filter function. */
4472 Perl_filter_del(pTHX_ filter_t funcp)
4476 PERL_ARGS_ASSERT_FILTER_DEL;
4479 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4480 FPTR2DPTR(void*, funcp)));
4482 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4484 /* if filter is on top of stack (usual case) just pop it off */
4485 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4486 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4487 sv_free(av_pop(PL_rsfp_filters));
4491 /* we need to search for the correct entry and clear it */
4492 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4496 /* Invoke the idxth filter function for the current rsfp. */
4497 /* maxlen 0 = read one text line */
4499 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4504 /* This API is bad. It should have been using unsigned int for maxlen.
4505 Not sure if we want to change the API, but if not we should sanity
4506 check the value here. */
4507 unsigned int correct_length = maxlen < 0 ? PERL_INT_MAX : maxlen;
4509 PERL_ARGS_ASSERT_FILTER_READ;
4511 if (!PL_parser || !PL_rsfp_filters)
4513 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
4514 /* Provide a default input filter to make life easy. */
4515 /* Note that we append to the line. This is handy. */
4516 DEBUG_P(PerlIO_printf(Perl_debug_log,
4517 "filter_read %d: from rsfp\n", idx));
4518 if (correct_length) {
4521 const int old_len = SvCUR(buf_sv);
4523 /* ensure buf_sv is large enough */
4524 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4525 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4526 correct_length)) <= 0) {
4527 if (PerlIO_error(PL_rsfp))
4528 return -1; /* error */
4530 return 0 ; /* end of file */
4532 SvCUR_set(buf_sv, old_len + len) ;
4533 SvPVX(buf_sv)[old_len + len] = '\0';
4536 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4537 if (PerlIO_error(PL_rsfp))
4538 return -1; /* error */
4540 return 0 ; /* end of file */
4543 return SvCUR(buf_sv);
4545 /* Skip this filter slot if filter has been deleted */
4546 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4547 DEBUG_P(PerlIO_printf(Perl_debug_log,
4548 "filter_read %d: skipped (filter deleted)\n",
4550 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4552 if (SvTYPE(datasv) != SVt_PVIO) {
4553 if (correct_length) {
4555 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4556 if (!remainder) return 0; /* eof */
4557 if (correct_length > remainder) correct_length = remainder;
4558 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4559 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4562 const char *s = SvEND(datasv);
4563 const char *send = SvPVX(datasv) + SvLEN(datasv);
4571 if (s == send) return 0; /* eof */
4572 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4573 SvCUR_set(datasv, s-SvPVX(datasv));
4575 return SvCUR(buf_sv);
4577 /* Get function pointer hidden within datasv */
4578 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4579 DEBUG_P(PerlIO_printf(Perl_debug_log,
4580 "filter_read %d: via function %p (%s)\n",
4581 idx, (void*)datasv, SvPV_nolen_const(datasv)));
4582 /* Call function. The function is expected to */
4583 /* call "FILTER_READ(idx+1, buf_sv)" first. */
4584 /* Return: <0:error, =0:eof, >0:not eof */
4586 save_scalar(PL_errgv);
4587 ret = (*funcp)(aTHX_ idx, buf_sv, correct_length);
4593 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4595 PERL_ARGS_ASSERT_FILTER_GETS;
4597 #ifdef PERL_CR_FILTER
4598 if (!PL_rsfp_filters) {
4599 filter_add(S_cr_textfilter,NULL);
4602 if (PL_rsfp_filters) {
4604 SvCUR_set(sv, 0); /* start with empty line */
4605 if (FILTER_READ(0, sv, 0) > 0)
4606 return ( SvPVX(sv) ) ;
4611 return (sv_gets(sv, PL_rsfp, append));
4615 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4619 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4621 if (memEQs(pkgname, len, "__PACKAGE__"))
4625 && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
4626 && (gv = gv_fetchpvn_flags(pkgname,
4628 ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4630 return GvHV(gv); /* Foo:: */
4633 /* use constant CLASS => 'MyClass' */
4634 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4635 if (gv && GvCV(gv)) {
4636 SV * const sv = cv_const_sv(GvCV(gv));
4638 return gv_stashsv(sv, 0);
4641 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4646 S_tokenize_use(pTHX_ int is_use, char *s) {
4647 PERL_ARGS_ASSERT_TOKENIZE_USE;
4649 if (PL_expect != XSTATE)
4650 /* diag_listed_as: "use" not allowed in expression */
4651 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4652 is_use ? "use" : "no"));
4655 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4656 s = force_version(s, TRUE);
4657 if (*s == ';' || *s == '}'
4658 || (s = skipspace(s), (*s == ';' || *s == '}'))) {
4659 NEXTVAL_NEXTTOKE.opval = NULL;
4660 force_next(BAREWORD);
4662 else if (*s == 'v') {
4663 s = force_word(s,BAREWORD,FALSE,TRUE);
4664 s = force_version(s, FALSE);
4668 s = force_word(s,BAREWORD,FALSE,TRUE);
4669 s = force_version(s, FALSE);
4671 pl_yylval.ival = is_use;
4675 static const char* const exp_name[] =
4676 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4677 "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
4678 "SIGVAR", "TERMORDORDOR"
4682 #define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
4684 S_word_takes_any_delimiter(char *p, STRLEN len)
4686 return (len == 1 && strchr("msyq", p[0]))
4688 && ((p[0] == 't' && p[1] == 'r')
4689 || (p[0] == 'q' && strchr("qwxr", p[1]))));
4693 S_check_scalar_slice(pTHX_ char *s)
4696 while (SPACE_OR_TAB(*s)) s++;
4697 if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2,
4703 while ( isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
4704 || (*s && strchr(" \t$#+-'\"", *s)))
4706 s += UTF ? UTF8SKIP(s) : 1;
4708 if (*s == '}' || *s == ']')
4709 pl_yylval.ival = OPpSLICEWARNING;
4712 #define lex_token_boundary() S_lex_token_boundary(aTHX)
4714 S_lex_token_boundary(pTHX)
4716 PL_oldoldbufptr = PL_oldbufptr;
4717 PL_oldbufptr = PL_bufptr;
4720 #define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s)
4722 S_vcs_conflict_marker(pTHX_ char *s)
4724 lex_token_boundary();
4726 yyerror("Version control conflict marker");
4727 while (s < PL_bufend && *s != '\n')
4735 Works out what to call the token just pulled out of the input
4736 stream. The yacc parser takes care of taking the ops we return and
4737 stitching them into a tree.
4740 The type of the next token
4743 Check if we have already built the token; if so, use it.
4744 Switch based on the current state:
4745 - if we have a case modifier in a string, deal with that
4746 - handle other cases of interpolation inside a string
4747 - scan the next line if we are inside a format
4748 In the normal state, switch on the next character:
4750 if alphabetic, go to key lookup
4751 unrecognized character - croak
4752 - 0/4/26: handle end-of-line or EOF
4753 - cases for whitespace
4754 - \n and #: handle comments and line numbers
4755 - various operators, brackets and sigils
4758 - 'v': vstrings (or go to key lookup)
4759 - 'x' repetition operator (or go to key lookup)
4760 - other ASCII alphanumerics (key lookup begins here):
4763 scan built-in keyword (but do nothing with it yet)
4764 check for statement label
4765 check for lexical subs
4766 goto just_a_word if there is one
4767 see whether built-in keyword is overridden
4768 switch on keyword number:
4769 - default: just_a_word:
4770 not a built-in keyword; handle bareword lookup
4771 disambiguate between method and sub call
4772 fall back to bareword
4773 - cases for built-in keywords
4781 char *s = PL_bufptr;
4785 const bool saw_infix_sigil = cBOOL(PL_parser->saw_infix_sigil);
4789 /* orig_keyword, gvp, and gv are initialized here because
4790 * jump to the label just_a_word_zero can bypass their
4791 * initialization later. */
4792 I32 orig_keyword = 0;
4796 if (UNLIKELY(PL_parser->recheck_utf8_validity)) {
4797 const U8* first_bad_char_loc;
4798 if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr,
4799 PL_bufend - PL_bufptr,
4800 &first_bad_char_loc)))
4802 _force_out_malformed_utf8_message(first_bad_char_loc,
4805 1 /* 1 means die */ );
4806 NOT_REACHED; /* NOTREACHED */
4808 PL_parser->recheck_utf8_validity = FALSE;
4811 SV* tmp = newSVpvs("");
4812 PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n",
4813 (IV)CopLINE(PL_curcop),
4814 lex_state_names[PL_lex_state],
4815 exp_name[PL_expect],
4816 pv_display(tmp, s, strlen(s), 0, 60));
4820 /* when we've already built the next token, just pull it out of the queue */
4823 pl_yylval = PL_nextval[PL_nexttoke];
4826 next_type = PL_nexttype[PL_nexttoke];
4827 if (next_type & (7<<24)) {
4828 if (next_type & (1<<24)) {
4829 if (PL_lex_brackets > 100)
4830 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4831 PL_lex_brackstack[PL_lex_brackets++] =
4832 (char) ((next_type >> 16) & 0xff);
4834 if (next_type & (2<<24))
4835 PL_lex_allbrackets++;
4836 if (next_type & (4<<24))
4837 PL_lex_allbrackets--;
4838 next_type &= 0xffff;
4840 return REPORT(next_type == 'p' ? pending_ident() : next_type);
4844 switch (PL_lex_state) {
4846 case LEX_INTERPNORMAL:
4849 /* interpolated case modifiers like \L \U, including \Q and \E.
4850 when we get here, PL_bufptr is at the \
4852 case LEX_INTERPCASEMOD:
4854 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
4856 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4857 PL_bufptr, PL_bufend, *PL_bufptr);
4859 /* handle \E or end of string */
4860 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
4862 if (PL_lex_casemods) {
4863 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
4864 PL_lex_casestack[PL_lex_casemods] = '\0';
4866 if (PL_bufptr != PL_bufend
4867 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4868 || oldmod == 'F')) {
4870 PL_lex_state = LEX_INTERPCONCAT;
4872 PL_lex_allbrackets--;
4875 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4876 /* Got an unpaired \E */
4877 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
4878 "Useless use of \\E");
4880 if (PL_bufptr != PL_bufend)
4882 PL_lex_state = LEX_INTERPCONCAT;
4886 DEBUG_T({ PerlIO_printf(Perl_debug_log,
4887 "### Saw case modifier\n"); });
4889 if (s[1] == '\\' && s[2] == 'E') {
4891 PL_lex_state = LEX_INTERPCONCAT;
4896 if ( memBEGINs(s, (STRLEN) (PL_bufend - s), "L\\u")
4897 || memBEGINs(s, (STRLEN) (PL_bufend - s), "U\\l"))
4899 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
4901 if ((*s == 'L' || *s == 'U' || *s == 'F')
4902 && (strpbrk(PL_lex_casestack, "LUF")))
4904 PL_lex_casestack[--PL_lex_casemods] = '\0';
4905 PL_lex_allbrackets--;
4908 if (PL_lex_casemods > 10)
4909 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4910 PL_lex_casestack[PL_lex_casemods++] = *s;
4911 PL_lex_casestack[PL_lex_casemods] = '\0';
4912 PL_lex_state = LEX_INTERPCONCAT;
4913 NEXTVAL_NEXTTOKE.ival = 0;
4914 force_next((2<<24)|'(');
4916 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
4918 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
4920 NEXTVAL_NEXTTOKE.ival = OP_LC;
4922 NEXTVAL_NEXTTOKE.ival = OP_UC;
4924 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
4926 NEXTVAL_NEXTTOKE.ival = OP_FC;
4928 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
4932 if (PL_lex_starts) {
4935 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4936 if (PL_lex_casemods == 1 && PL_lex_inpat)
4939 AopNOASSIGN(OP_CONCAT);
4945 case LEX_INTERPPUSH:
4946 return REPORT(sublex_push());
4948 case LEX_INTERPSTART:
4949 if (PL_bufptr == PL_bufend)
4950 return REPORT(sublex_done());
4951 DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
4952 "### Interpolated variable\n"); });
4954 /* for /@a/, we leave the joining for the regex engine to do
4955 * (unless we're within \Q etc) */
4956 PL_lex_dojoin = (*PL_bufptr == '@'
4957 && (!PL_lex_inpat || PL_lex_casemods));
4958 PL_lex_state = LEX_INTERPNORMAL;
4959 if (PL_lex_dojoin) {
4960 NEXTVAL_NEXTTOKE.ival = 0;
4962 force_ident("\"", '$');
4963 NEXTVAL_NEXTTOKE.ival = 0;
4965 NEXTVAL_NEXTTOKE.ival = 0;
4966 force_next((2<<24)|'(');
4967 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
4970 /* Convert (?{...}) and friends to 'do {...}' */
4971 if (PL_lex_inpat && *PL_bufptr == '(') {
4972 PL_parser->lex_shared->re_eval_start = PL_bufptr;
4974 if (*PL_bufptr != '{')
4976 PL_expect = XTERMBLOCK;
4980 if (PL_lex_starts++) {
4982 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4983 if (!PL_lex_casemods && PL_lex_inpat)
4986 AopNOASSIGN(OP_CONCAT);
4990 case LEX_INTERPENDMAYBE:
4991 if (intuit_more(PL_bufptr, PL_bufend)) {
4992 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
4998 if (PL_lex_dojoin) {
4999 const U8 dojoin_was = PL_lex_dojoin;
5000 PL_lex_dojoin = FALSE;
5001 PL_lex_state = LEX_INTERPCONCAT;
5002 PL_lex_allbrackets--;
5003 return REPORT(dojoin_was == 1 ? (int)')' : (int)POSTJOIN);
5005 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
5006 && SvEVALED(PL_lex_repl))
5008 if (PL_bufptr != PL_bufend)
5009 Perl_croak(aTHX_ "Bad evalled substitution pattern");
5012 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
5013 re_eval_str. If the here-doc body’s length equals the previous
5014 value of re_eval_start, re_eval_start will now be null. So
5015 check re_eval_str as well. */
5016 if (PL_parser->lex_shared->re_eval_start
5017 || PL_parser->lex_shared->re_eval_str) {
5019 if (*PL_bufptr != ')')
5020 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
5022 /* having compiled a (?{..}) expression, return the original
5023 * text too, as a const */
5024 if (PL_parser->lex_shared->re_eval_str) {
5025 sv = PL_parser->lex_shared->re_eval_str;
5026 PL_parser->lex_shared->re_eval_str = NULL;
5028 PL_bufptr - PL_parser->lex_shared->re_eval_start);
5029 SvPV_shrink_to_cur(sv);
5031 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
5032 PL_bufptr - PL_parser->lex_shared->re_eval_start);
5033 NEXTVAL_NEXTTOKE.opval =
5034 newSVOP(OP_CONST, 0,
5037 PL_parser->lex_shared->re_eval_start = NULL;
5043 case LEX_INTERPCONCAT:
5045 if (PL_lex_brackets)
5046 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
5047 (long) PL_lex_brackets);
5049 if (PL_bufptr == PL_bufend)
5050 return REPORT(sublex_done());
5052 /* m'foo' still needs to be parsed for possible (?{...}) */
5053 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
5054 SV *sv = newSVsv(PL_linestr);
5056 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
5060 int save_error_count = PL_error_count;
5062 s = scan_const(PL_bufptr);
5064 /* Set flag if this was a pattern and there were errors. op.c will
5065 * refuse to compile a pattern with this flag set. Otherwise, we
5066 * could get segfaults, etc. */
5067 if (PL_lex_inpat && PL_error_count > save_error_count) {
5068 ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR;
5071 PL_lex_state = LEX_INTERPCASEMOD;
5073 PL_lex_state = LEX_INTERPSTART;
5076 if (s != PL_bufptr) {
5077 NEXTVAL_NEXTTOKE = pl_yylval;
5080 if (PL_lex_starts++) {
5081 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5082 if (!PL_lex_casemods && PL_lex_inpat)
5085 AopNOASSIGN(OP_CONCAT);
5095 if (PL_parser->sub_error_count != PL_error_count) {
5096 /* There was an error parsing a formline, which tends to
5098 Unlike interpolated sub-parsing, we can't treat any of
5099 these as recoverable, so no need to check sub_no_recover.
5103 assert(PL_lex_formbrack);
5104 s = scan_formline(PL_bufptr);
5105 if (!PL_lex_formbrack)
5114 /* We really do *not* want PL_linestr ever becoming a COW. */
5115 assert (!SvIsCOW(PL_linestr));
5117 PL_oldoldbufptr = PL_oldbufptr;
5119 PL_parser->saw_infix_sigil = 0;
5121 if (PL_in_my == KEY_sigvar) {
5122 /* we expect the sigil and optional var name part of a
5123 * signature element here. Since a '$' is not necessarily
5124 * followed by a var name, handle it specially here; the general
5125 * yylex code would otherwise try to interpret whatever follows
5126 * as a var; e.g. ($, ...) would be seen as the var '$,'
5133 PL_bufptr = s; /* for error reporting */
5138 /* spot stuff that looks like an prototype */
5139 if (strchr("$:@%&*;\\[]", *s)) {
5140 yyerror("Illegal character following sigil in a subroutine signature");
5143 /* '$#' is banned, while '$ # comment' isn't */
5145 yyerror("'#' not allowed immediately following a sigil in a subroutine signature");
5149 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5150 char *dest = PL_tokenbuf + 1;
5151 /* read var name, including sigil, into PL_tokenbuf */
5152 PL_tokenbuf[0] = sigil;
5153 parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
5154 0, cBOOL(UTF), FALSE, FALSE);
5156 assert(PL_tokenbuf[1]); /* we have a variable name */
5164 /* parse the = for the default ourselves to avoid '+=' etc being accepted here
5165 * as the ASSIGNOP, and exclude other tokens that start with =
5167 if (*s == '=' && (!s[1] || strchr("=~>", s[1]) == 0)) {
5168 /* save now to report with the same context as we did when
5169 * all ASSIGNOPS were accepted */
5173 NEXTVAL_NEXTTOKE.ival = 0;
5174 force_next(ASSIGNOP);
5177 else if (*s == ',' || *s == ')') {
5178 PL_expect = XOPERATOR;
5181 /* make sure the context shows the unexpected character and
5182 * hopefully a bit more */
5184 while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5186 PL_bufptr = s; /* for error reporting */
5187 yyerror("Illegal operator following parameter in a subroutine signature");
5191 NEXTVAL_NEXTTOKE.ival = sigil;
5192 force_next('p'); /* force a signature pending identifier */
5199 case ',': /* handle ($a,,$b) */
5204 yyerror("A signature parameter must start with '$', '@' or '%'");
5205 /* very crude error recovery: skip to likely next signature
5207 while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
5218 if (isIDFIRST_utf8_safe(s, PL_bufend)) {
5222 else if (isALNUMC(*s)) {
5226 SV *dsv = newSVpvs_flags("", SVs_TEMP);
5229 STRLEN skiplen = UTF8SKIP(s);
5230 STRLEN stravail = PL_bufend - s;
5231 c = sv_uni_display(dsv, newSVpvn_flags(s,
5232 skiplen > stravail ? stravail : skiplen,
5233 SVs_TEMP | SVf_UTF8),
5234 10, UNI_DISPLAY_ISPRINT);
5237 c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
5240 if (s >= PL_linestart) {
5244 /* somehow (probably due to a parse failure), PL_linestart has advanced
5245 * pass PL_bufptr, get a reasonable beginning of line
5248 while (d > SvPVX(PL_linestr) && d[-1] && d[-1] != '\n')
5251 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d);
5252 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
5253 d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT;
5256 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
5257 UTF8fARG(UTF, (s - d), d),
5262 goto fake_eof; /* emulate EOF on ^D or ^Z */
5264 if ((!PL_rsfp || PL_lex_inwhat)
5265 && (!PL_parser->filtered || s+1 < PL_bufend)) {
5269 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
5271 yyerror((const char *)
5273 ? "Format not terminated"
5274 : "Missing right curly or square bracket"));
5276 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5277 "### Tokener got EOF\n");
5281 if (s++ < PL_bufend)
5282 goto retry; /* ignore stray nulls */
5285 if (!PL_in_eval && !PL_preambled) {
5286 PL_preambled = TRUE;
5288 /* Generate a string of Perl code to load the debugger.
5289 * If PERL5DB is set, it will return the contents of that,
5290 * otherwise a compile-time require of perl5db.pl. */
5292 const char * const pdb = PerlEnv_getenv("PERL5DB");
5295 sv_setpv(PL_linestr, pdb);
5296 sv_catpvs(PL_linestr,";");
5298 SETERRNO(0,SS_NORMAL);
5299 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
5301 PL_parser->preambling = CopLINE(PL_curcop);
5303 SvPVCLEAR(PL_linestr);
5304 if (PL_preambleav) {
5305 SV **svp = AvARRAY(PL_preambleav);
5306 SV **const end = svp + AvFILLp(PL_preambleav);
5308 sv_catsv(PL_linestr, *svp);
5310 sv_catpvs(PL_linestr, ";");
5312 sv_free(MUTABLE_SV(PL_preambleav));
5313 PL_preambleav = NULL;
5316 sv_catpvs(PL_linestr,
5317 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
5318 if (PL_minus_n || PL_minus_p) {
5319 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
5321 sv_catpvs(PL_linestr,"chomp;");
5324 if ( ( *PL_splitstr == '/'
5325 || *PL_splitstr == '\''
5326 || *PL_splitstr == '"')
5327 && strchr(PL_splitstr + 1, *PL_splitstr))
5329 /* strchr is ok, because -F pattern can't contain
5331 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
5334 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
5335 bytes can be used as quoting characters. :-) */
5336 const char *splits = PL_splitstr;
5337 sv_catpvs(PL_linestr, "our @F=split(q\0");
5340 if (*splits == '\\')
5341 sv_catpvn(PL_linestr, splits, 1);
5342 sv_catpvn(PL_linestr, splits, 1);
5343 } while (*splits++);
5344 /* This loop will embed the trailing NUL of
5345 PL_linestr as the last thing it does before
5347 sv_catpvs(PL_linestr, ");");
5351 sv_catpvs(PL_linestr,"our @F=split(' ');");
5354 sv_catpvs(PL_linestr, "\n");
5355 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5356 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5357 PL_last_lop = PL_last_uni = NULL;
5358 if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
5359 update_debugger_info(PL_linestr, NULL, 0);
5364 bof = cBOOL(PL_rsfp);
5367 fake_eof = LEX_FAKE_EOF;
5369 PL_bufptr = PL_bufend;
5370 COPLINE_INC_WITH_HERELINES;
5371 if (!lex_next_chunk(fake_eof)) {
5372 CopLINE_dec(PL_curcop);
5374 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
5376 CopLINE_dec(PL_curcop);
5378 /* If it looks like the start of a BOM or raw UTF-16,
5379 * check if it in fact is. */
5382 || *(U8*)s == BOM_UTF8_FIRST_BYTE
5386 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
5387 bof = (offset == (Off_t)SvCUR(PL_linestr));
5388 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5389 /* offset may include swallowed CR */
5391 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
5394 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5395 s = swallow_bom((U8*)s);
5398 if (PL_parser->in_pod) {
5399 /* Incest with pod. */
5400 if ( memBEGINPs(s, (STRLEN) (PL_bufend - s), "=cut")
5403 SvPVCLEAR(PL_linestr);
5404 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5405 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5406 PL_last_lop = PL_last_uni = NULL;
5407 PL_parser->in_pod = 0;
5410 if (PL_rsfp || PL_parser->filtered)
5411 incline(s, PL_bufend);
5412 } while (PL_parser->in_pod);
5413 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
5414 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5415 PL_last_lop = PL_last_uni = NULL;
5416 if (CopLINE(PL_curcop) == 1) {
5417 while (s < PL_bufend && isSPACE(*s))
5419 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
5423 if (*s == '#' && *(s+1) == '!')
5425 #ifdef ALTERNATE_SHEBANG
5427 static char const as[] = ALTERNATE_SHEBANG;
5428 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5429 d = s + (sizeof(as) - 1);
5431 #endif /* ALTERNATE_SHEBANG */
5440 while (*d && !isSPACE(*d))
5444 #ifdef ARG_ZERO_IS_SCRIPT
5445 if (ipathend > ipath) {
5447 * HP-UX (at least) sets argv[0] to the script name,
5448 * which makes $^X incorrect. And Digital UNIX and Linux,
5449 * at least, set argv[0] to the basename of the Perl
5450 * interpreter. So, having found "#!", we'll set it right.
5452 SV* copfilesv = CopFILESV(PL_curcop);
5455 GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5457 assert(SvPOK(x) || SvGMAGICAL(x));
5458 if (sv_eq(x, copfilesv)) {
5459 sv_setpvn(x, ipath, ipathend - ipath);
5465 const char *bstart = SvPV_const(copfilesv, blen);
5466 const char * const lstart = SvPV_const(x, llen);
5468 bstart += blen - llen;
5469 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5470 sv_setpvn(x, ipath, ipathend - ipath);
5477 /* Anything to do if no copfilesv? */
5479 TAINT_NOT; /* $^X is always tainted, but that's OK */
5481 #endif /* ARG_ZERO_IS_SCRIPT */
5486 d = instr(s,"perl -");
5488 d = instr(s,"perl");
5490 /* avoid getting into infinite loops when shebang
5491 * line contains "Perl" rather than "perl" */
5493 for (d = ipathend-4; d >= ipath; --d) {
5494 if (isALPHA_FOLD_EQ(*d, 'p')
5495 && !ibcmp(d, "perl", 4))
5505 #ifdef ALTERNATE_SHEBANG
5507 * If the ALTERNATE_SHEBANG on this system starts with a
5508 * character that can be part of a Perl expression, then if
5509 * we see it but not "perl", we're probably looking at the
5510 * start of Perl code, not a request to hand off to some
5511 * other interpreter. Similarly, if "perl" is there, but
5512 * not in the first 'word' of the line, we assume the line
5513 * contains the start of the Perl program.
5515 if (d && *s != '#') {
5516 const char *c = ipath;
5517 while (*c && !strchr("; \t\r\n\f\v#", *c))
5520 d = NULL; /* "perl" not in first word; ignore */
5522 *s = '#'; /* Don't try to parse shebang line */
5524 #endif /* ALTERNATE_SHEBANG */
5529 && !instr(s,"indir")
5530 && instr(PL_origargv[0],"perl"))
5537 while (s < PL_bufend && isSPACE(*s))
5539 if (s < PL_bufend) {
5540 Newx(newargv,PL_origargc+3,char*);
5542 while (s < PL_bufend && !isSPACE(*s))
5545 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
5548 newargv = PL_origargv;
5551 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
5553 Perl_croak(aTHX_ "Can't exec %s", ipath);
5556 while (*d && !isSPACE(*d))
5558 while (SPACE_OR_TAB(*d))
5562 const bool switches_done = PL_doswitches;
5563 const U32 oldpdb = PL_perldb;
5564 const bool oldn = PL_minus_n;
5565 const bool oldp = PL_minus_p;
5569 bool baduni = FALSE;
5571 const char *d2 = d1 + 1;
5572 if (parse_unicode_opts((const char **)&d2)
5576 if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
5577 const char * const m = d1;
5578 while (*d1 && !isSPACE(*d1))
5580 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
5583 d1 = moreswitches(d1);
5585 if (PL_doswitches && !switches_done) {
5586 int argc = PL_origargc;
5587 char **argv = PL_origargv;
5590 } while (argc && argv[0][0] == '-' && argv[0][1]);
5591 init_argv_symbols(argc,argv);
5593 if ( (PERLDB_LINE_OR_SAVESRC && !oldpdb)
5594 || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
5595 /* if we have already added "LINE: while (<>) {",
5596 we must not do it again */
5598 SvPVCLEAR(PL_linestr);
5599 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5600 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
5601 PL_last_lop = PL_last_uni = NULL;
5602 PL_preambled = FALSE;
5603 if (PERLDB_LINE_OR_SAVESRC)
5604 (void)gv_fetchfile(PL_origfilename);
5611 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5612 PL_lex_state = LEX_FORMLINE;
5613 force_next(FORMRBRACK);
5618 #ifdef PERL_STRICT_CR
5619 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
5621 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
5623 case ' ': case '\t': case '\f': case '\v':
5628 if (PL_lex_state != LEX_NORMAL
5629 || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
5631 const bool in_comment = *s == '#';
5632 if (*s == '#' && s == PL_linestart && PL_in_eval
5633 && !PL_rsfp && !PL_parser->filtered) {
5634 /* handle eval qq[#line 1 "foo"\n ...] */
5635 CopLINE_dec(PL_curcop);
5636 incline(s, PL_bufend);
5639 while (d < PL_bufend && *d != '\n')
5644 if (in_comment && d == PL_bufend
5645 && PL_lex_state == LEX_INTERPNORMAL
5646 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
5647 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5649 incline(s, PL_bufend);
5650 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5651 PL_lex_state = LEX_FORMLINE;
5652 force_next(FORMRBRACK);
5657 while (s < PL_bufend && *s != '\n')
5663 incline(s, PL_bufend);
5668 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5676 while (s < PL_bufend && SPACE_OR_TAB(*s))
5679 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=>")) {
5680 s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE);
5681 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5682 OPERATOR('-'); /* unary minus */
5685 case 'r': ftst = OP_FTEREAD; break;
5686 case 'w': ftst = OP_FTEWRITE; break;
5687 case 'x': ftst = OP_FTEEXEC; break;
5688 case 'o': ftst = OP_FTEOWNED; break;
5689 case 'R': ftst = OP_FTRREAD; break;
5690 case 'W': ftst = OP_FTRWRITE; break;
5691 case 'X': ftst = OP_FTREXEC; break;
5692 case 'O': ftst = OP_FTROWNED; break;
5693 case 'e': ftst = OP_FTIS; break;
5694 case 'z': ftst = OP_FTZERO; break;
5695 case 's': ftst = OP_FTSIZE; break;
5696 case 'f': ftst = OP_FTFILE; break;
5697 case 'd': ftst = OP_FTDIR; break;
5698 case 'l': ftst = OP_FTLINK; break;
5699 case 'p': ftst = OP_FTPIPE; break;
5700 case 'S': ftst = OP_FTSOCK; break;
5701 case 'u': ftst = OP_FTSUID; break;
5702 case 'g': ftst = OP_FTSGID; break;
5703 case 'k': ftst = OP_FTSVTX; break;
5704 case 'b': ftst = OP_FTBLK; break;
5705 case 'c': ftst = OP_FTCHR; break;
5706 case 't': ftst = OP_FTTTY; break;
5707 case 'T': ftst = OP_FTTEXT; break;
5708 case 'B': ftst = OP_FTBINARY; break;
5709 case 'M': case 'A': case 'C':
5710 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5712 case 'M': ftst = OP_FTMTIME; break;
5713 case 'A': ftst = OP_FTATIME; break;
5714 case 'C': ftst = OP_FTCTIME; break;
5722 PL_last_uni = PL_oldbufptr;
5723 PL_last_lop_op = (OPCODE)ftst;
5724 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5725 "### Saw file test %c\n", (int)tmp);
5730 /* Assume it was a minus followed by a one-letter named
5731 * subroutine call (or a -bareword), then. */
5732 DEBUG_T( { PerlIO_printf(Perl_debug_log,
5733 "### '-%c' looked like a file test but was not\n",
5740 const char tmp = *s++;
5743 if (PL_expect == XOPERATOR)
5748 else if (*s == '>') {
5751 if (((*s == '$' || *s == '&') && s[1] == '*')
5752 ||(*s == '$' && s[1] == '#' && s[2] == '*')
5753 ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
5754 ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5757 PL_expect = XPOSTDEREF;
5760 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5761 s = force_word(s,METHOD,FALSE,TRUE);
5769 if (PL_expect == XOPERATOR) {
5771 && !PL_lex_allbrackets
5772 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5780 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5782 OPERATOR('-'); /* unary minus */
5788 const char tmp = *s++;
5791 if (PL_expect == XOPERATOR)
5796 if (PL_expect == XOPERATOR) {
5798 && !PL_lex_allbrackets
5799 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5807 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5814 if (PL_expect == XPOSTDEREF) POSTDEREF('*');
5815 if (PL_expect != XOPERATOR) {
5816 s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5817 PL_expect = XOPERATOR;
5818 force_ident(PL_tokenbuf, '*');
5826 if (*s == '=' && !PL_lex_allbrackets
5827 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5835 && !PL_lex_allbrackets
5836 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5841 PL_parser->saw_infix_sigil = 1;
5846 if (PL_expect == XOPERATOR) {
5848 && !PL_lex_allbrackets
5849 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5854 PL_parser->saw_infix_sigil = 1;
5857 else if (PL_expect == XPOSTDEREF) POSTDEREF('%');
5858 PL_tokenbuf[0] = '%';
5859 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5861 if (!PL_tokenbuf[1]) {
5864 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5865 && intuit_more(s, PL_bufend)) {
5867 PL_tokenbuf[0] = '@';
5869 PL_expect = XOPERATOR;
5870 force_ident_maybe_lex('%');
5875 bof = FEATURE_BITWISE_IS_ENABLED;
5876 if (bof && s[1] == '.')
5878 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5879 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5885 BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
5887 if (PL_lex_brackets > 100)
5888 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5889 PL_lex_brackstack[PL_lex_brackets++] = 0;
5890 PL_lex_allbrackets++;
5892 const char tmp = *s++;
5897 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
5899 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5902 Perl_ck_warner_d(aTHX_
5903 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5904 "Smartmatch is experimental");
5908 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
5910 BCop(OP_SCOMPLEMENT);
5912 BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
5914 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5921 goto just_a_word_zero_gv;
5927 switch (PL_expect) {
5929 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5931 PL_bufptr = s; /* update in case we back off */
5934 "Use of := for an empty attribute list is not allowed");
5941 PL_expect = XTERMBLOCK;
5943 /* NB: as well as parsing normal attributes, we also end up
5944 * here if there is something looking like attributes
5945 * following a signature (which is illegal, but used to be
5946 * legal in 5.20..5.26). If the latter, we still parse the
5947 * attributes so that error messages(s) are less confusing,
5948 * but ignore them (parser->sig_seen).
5952 while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5953 bool sig = PL_parser->sig_seen;
5956 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5957 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5958 if (tmp < 0) tmp = -tmp;
5973 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5975 d = scan_str(d,TRUE,TRUE,FALSE,NULL);
5980 Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list");
5982 COPLINE_SET_FROM_MULTI_END;
5985 sv_catsv(sv, PL_lex_stuff);
5986 attrs = op_append_elem(OP_LIST, attrs,
5987 newSVOP(OP_CONST, 0, sv));
5988 SvREFCNT_dec_NN(PL_lex_stuff);
5989 PL_lex_stuff = NULL;
5992 /* NOTE: any CV attrs applied here need to be part of
5993 the CVf_BUILTIN_ATTRS define in cv.h! */
5994 if (!PL_in_my && memEQs(SvPVX(sv), len, "lvalue")) {
5997 CvLVALUE_on(PL_compcv);
5999 else if (!PL_in_my && memEQs(SvPVX(sv), len, "method")) {
6002 CvMETHOD_on(PL_compcv);
6004 else if (!PL_in_my && memEQs(SvPVX(sv), len, "const"))
6008 Perl_ck_warner_d(aTHX_
6009 packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
6010 ":const is experimental"
6012 CvANONCONST_on(PL_compcv);
6013 if (!CvANON(PL_compcv))
6014 yyerror(":const is not permitted on named "
6018 /* After we've set the flags, it could be argued that
6019 we don't need to do the attributes.pm-based setting
6020 process, and shouldn't bother appending recognized
6021 flags. To experiment with that, uncomment the
6022 following "else". (Note that's already been
6023 uncommented. That keeps the above-applied built-in
6024 attributes from being intercepted (and possibly
6025 rejected) by a package's attribute routines, but is
6026 justified by the performance win for the common case
6027 of applying only built-in attributes.) */
6029 attrs = op_append_elem(OP_LIST, attrs,
6030 newSVOP(OP_CONST, 0,
6034 if (*s == ':' && s[1] != ':')
6037 break; /* require real whitespace or :'s */
6038 /* XXX losing whitespace on sequential attributes here */
6043 && !(PL_expect == XOPERATOR
6044 ? (*s == '=' || *s == ')')
6045 : (*s == '{' || *s == '(')))
6047 const char q = ((*s == '\'') ? '"' : '\'');
6048 /* If here for an expression, and parsed no attrs, back
6050 if (PL_expect == XOPERATOR && !attrs) {
6054 /* MUST advance bufptr here to avoid bogus "at end of line"
6055 context messages from yyerror().
6058 yyerror( (const char *)
6060 ? Perl_form(aTHX_ "Invalid separator character "
6061 "%c%c%c in attribute list", q, *s, q)
6062 : "Unterminated attribute list" ) );
6069 if (PL_parser->sig_seen) {
6070 /* see comment about about sig_seen and parser error
6074 Perl_croak(aTHX_ "Subroutine attributes must come "
6075 "before the signature");
6078 NEXTVAL_NEXTTOKE.opval = attrs;
6084 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
6088 PL_lex_allbrackets--;
6092 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6093 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
6097 PL_lex_allbrackets++;
6100 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6107 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6110 PL_lex_allbrackets--;
6116 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6119 if (PL_lex_brackets <= 0)
6120 /* diag_listed_as: Unmatched right %s bracket */
6121 yyerror("Unmatched right square bracket");
6124 PL_lex_allbrackets--;
6125 if (PL_lex_state == LEX_INTERPNORMAL) {
6126 if (PL_lex_brackets == 0) {
6127 if (*s == '-' && s[1] == '>')
6128 PL_lex_state = LEX_INTERPENDMAYBE;
6129 else if (*s != '[' && *s != '{')
6130 PL_lex_state = LEX_INTERPEND;
6137 if (PL_lex_brackets > 100) {
6138 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6140 switch (PL_expect) {
6143 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6144 PL_lex_allbrackets++;
6145 OPERATOR(HASHBRACK);
6147 while (s < PL_bufend && SPACE_OR_TAB(*s))
6150 PL_tokenbuf[0] = '\0';
6151 if (d < PL_bufend && *d == '-') {
6152 PL_tokenbuf[0] = '-';
6154 while (d < PL_bufend && SPACE_OR_TAB(*d))
6157 if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
6158 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6160 while (d < PL_bufend && SPACE_OR_TAB(*d))
6163 const char minus = (PL_tokenbuf[0] == '-');
6164 s = force_word(s + minus, BAREWORD, FALSE, TRUE);
6172 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6173 PL_lex_allbrackets++;
6178 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6179 PL_lex_allbrackets++;
6183 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6184 PL_lex_allbrackets++;
6189 if (PL_oldoldbufptr == PL_last_lop)
6190 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6192 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6193 PL_lex_allbrackets++;
6196 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6198 /* This hack is to get the ${} in the message. */
6200 yyerror("syntax error");
6203 OPERATOR(HASHBRACK);
6205 if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
6206 /* ${...} or @{...} etc., but not print {...}
6207 * Skip the disambiguation and treat this as a block.
6209 goto block_expectation;
6211 /* This hack serves to disambiguate a pair of curlies
6212 * as being a block or an anon hash. Normally, expectation
6213 * determines that, but in cases where we're not in a
6214 * position to expect anything in particular (like inside
6215 * eval"") we have to resolve the ambiguity. This code
6216 * covers the case where the first term in the curlies is a
6217 * quoted string. Most other cases need to be explicitly
6218 * disambiguated by prepending a "+" before the opening
6219 * curly in order to force resolution as an anon hash.
6221 * XXX should probably propagate the outer expectation
6222 * into eval"" to rely less on this hack, but that could
6223 * potentially break current behavior of eval"".
6227 if (*s == '\'' || *s == '"' || *s == '`') {
6228 /* common case: get past first string, handling escapes */
6229 for (t++; t < PL_bufend && *t != *s;)
6234 else if (*s == 'q') {
6237 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6238 && !isWORDCHAR(*t))))
6240 /* skip q//-like construct */
6242 char open, close, term;
6245 while (t < PL_bufend && isSPACE(*t))
6247 /* check for q => */
6248 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6249 OPERATOR(HASHBRACK);
6253 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
6257 for (t++; t < PL_bufend; t++) {
6258 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6260 else if (*t == open)
6264 for (t++; t < PL_bufend; t++) {
6265 if (*t == '\\' && t+1 < PL_bufend)
6267 else if (*t == close && --brackets <= 0)
6269 else if (*t == open)
6276 /* skip plain q word */
6277 while ( t < PL_bufend
6278 && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6280 t += UTF ? UTF8SKIP(t) : 1;
6283 else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
6284 t += UTF ? UTF8SKIP(t) : 1;
6285 while ( t < PL_bufend
6286 && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6288 t += UTF ? UTF8SKIP(t) : 1;
6291 while (t < PL_bufend && isSPACE(*t))
6293 /* if comma follows first term, call it an anon hash */
6294 /* XXX it could be a comma expression with loop modifiers */
6295 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6296 || (*t == '=' && t[1] == '>')))
6297 OPERATOR(HASHBRACK);
6298 if (PL_expect == XREF)
6301 /* If there is an opening brace or 'sub:', treat it
6302 as a term to make ${{...}}{k} and &{sub:attr...}
6303 dwim. Otherwise, treat it as a statement, so
6304 map {no strict; ...} works.
6311 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "sub")) {
6324 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6330 pl_yylval.ival = CopLINE(PL_curcop);
6331 PL_copline = NOLINE; /* invalidate current command line number */
6332 TOKEN(formbrack ? '=' : '{');
6334 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6337 assert(s != PL_bufend);
6339 if (PL_lex_brackets <= 0)
6340 /* diag_listed_as: Unmatched right %s bracket */
6341 yyerror("Unmatched right curly bracket");
6343 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6344 PL_lex_allbrackets--;
6345 if (PL_lex_state == LEX_INTERPNORMAL) {
6346 if (PL_lex_brackets == 0) {
6347 if (PL_expect & XFAKEBRACK) {
6348 PL_expect &= XENUMMASK;
6349 PL_lex_state = LEX_INTERPEND;
6351 return yylex(); /* ignore fake brackets */
6353 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6354 && SvEVALED(PL_lex_repl))
6355 PL_lex_state = LEX_INTERPEND;
6356 else if (*s == '-' && s[1] == '>')
6357 PL_lex_state = LEX_INTERPENDMAYBE;
6358 else if (*s != '[' && *s != '{')
6359 PL_lex_state = LEX_INTERPEND;
6362 if (PL_expect & XFAKEBRACK) {
6363 PL_expect &= XENUMMASK;
6365 return yylex(); /* ignore fake brackets */
6367 force_next(formbrack ? '.' : '}');
6368 if (formbrack) LEAVE_with_name("lex_format");
6369 if (formbrack == 2) { /* means . where arguments were expected */
6375 if (PL_expect == XPOSTDEREF) POSTDEREF('&');
6378 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6379 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6386 if (PL_expect == XOPERATOR) {
6387 if ( PL_bufptr == PL_linestart
6388 && ckWARN(WARN_SEMICOLON)
6389 && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
6391 CopLINE_dec(PL_curcop);
6392 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6393 CopLINE_inc(PL_curcop);
6396 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6398 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6399 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6405 PL_parser->saw_infix_sigil = 1;
6406 BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
6412 PL_tokenbuf[0] = '&';
6413 s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
6414 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6415 if (PL_tokenbuf[1]) {
6416 force_ident_maybe_lex('&');
6425 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6426 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6434 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6436 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6437 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6441 BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
6445 const char tmp = *s++;
6447 if ( (s == PL_linestart+2 || s[-3] == '\n')
6448 && memBEGINs(s, (STRLEN) (PL_bufend - s), "====="))
6450 s = vcs_conflict_marker(s + 5);
6453 if (!PL_lex_allbrackets
6454 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6462 if (!PL_lex_allbrackets
6463 && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
6472 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6473 && strchr("+-*/%.^&|<",tmp))
6474 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6475 "Reversed %c= operator",(int)tmp);
6477 if (PL_expect == XSTATE
6479 && (s == PL_linestart+1 || s[-2] == '\n') )
6481 if ( (PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6482 || PL_lex_state != LEX_NORMAL)
6487 incline(s, PL_bufend);
6488 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut"))
6490 s = (char *) memchr(s,'\n', d - s);
6495 incline(s, PL_bufend);
6503 PL_parser->in_pod = 1;
6507 if (PL_expect == XBLOCK) {
6509 #ifdef PERL_STRICT_CR
6510 while (SPACE_OR_TAB(*t))
6512 while (SPACE_OR_TAB(*t) || *t == '\r')
6515 if (*t == '\n' || *t == '#') {
6517 ENTER_with_name("lex_format");
6518 SAVEI8(PL_parser->form_lex_state);
6519 SAVEI32(PL_lex_formbrack);
6520 PL_parser->form_lex_state = PL_lex_state;
6521 PL_lex_formbrack = PL_lex_brackets + 1;
6522 PL_parser->sub_error_count = PL_error_count;
6526 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6535 const char tmp = *s++;
6537 /* was this !=~ where !~ was meant?
6538 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6540 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6541 const char *t = s+1;
6543 while (t < PL_bufend && isSPACE(*t))
6546 if (*t == '/' || *t == '?'
6547 || ((*t == 'm' || *t == 's' || *t == 'y')
6548 && !isWORDCHAR(t[1]))
6549 || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6550 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6551 "!=~ should be !~");
6553 if (!PL_lex_allbrackets
6554 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6567 if (PL_expect != XOPERATOR) {
6568 if (s[1] != '<' && !memchr(s,'>', PL_bufend - s))
6570 if (s[1] == '<' && s[2] != '>') {
6571 if ( (s == PL_linestart || s[-1] == '\n')
6572 && memBEGINs(s+2, (STRLEN) (PL_bufend - (s+2)), "<<<<<"))
6574 s = vcs_conflict_marker(s + 7);
6577 s = scan_heredoc(s);
6580 s = scan_inputsymbol(s);
6581 PL_expect = XOPERATOR;
6582 TOKEN(sublex_start());
6588 if ( (s == PL_linestart+2 || s[-3] == '\n')
6589 && memBEGINs(s, (STRLEN) (PL_bufend - s), "<<<<<"))
6591 s = vcs_conflict_marker(s + 5);
6594 if (*s == '=' && !PL_lex_allbrackets
6595 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6600 SHop(OP_LEFT_SHIFT);
6605 if (!PL_lex_allbrackets
6606 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6614 if (!PL_lex_allbrackets
6615 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6624 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6632 const char tmp = *s++;
6634 if ( (s == PL_linestart+2 || s[-3] == '\n')
6635 && memBEGINs(s, (STRLEN) (PL_bufend - s), ">>>>>"))
6637 s = vcs_conflict_marker(s + 5);
6640 if (*s == '=' && !PL_lex_allbrackets
6641 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6646 SHop(OP_RIGHT_SHIFT);
6648 else if (tmp == '=') {
6649 if (!PL_lex_allbrackets
6650 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6659 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6668 if (PL_expect == XPOSTDEREF) {
6671 POSTDEREF(DOLSHARP);
6677 && ( isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
6678 || strchr("{$:+-@", s[2])))
6680 PL_tokenbuf[0] = '@';
6681 s = scan_ident(s + 1, PL_tokenbuf + 1,
6682 sizeof PL_tokenbuf - 1, FALSE);
6683 if (PL_expect == XOPERATOR) {
6685 if (PL_bufptr > s) {
6687 PL_bufptr = PL_oldbufptr;
6689 no_op("Array length", d);
6691 if (!PL_tokenbuf[1])
6693 PL_expect = XOPERATOR;
6694 force_ident_maybe_lex('#');
6698 PL_tokenbuf[0] = '$';
6699 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6700 if (PL_expect == XOPERATOR) {
6702 if (PL_bufptr > s) {
6704 PL_bufptr = PL_oldbufptr;
6708 if (!PL_tokenbuf[1]) {
6710 yyerror("Final $ should be \\$ or $name");
6716 const char tmp = *s;
6717 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6720 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6721 && intuit_more(s, PL_bufend)) {
6723 PL_tokenbuf[0] = '@';
6724 if (ckWARN(WARN_SYNTAX)) {
6728 || isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)
6731 t += UTF ? UTF8SKIP(t) : 1;
6734 PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
6735 while (t < PL_bufend && *t != ']')
6737 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6738 "Multidimensional syntax %" UTF8f " not supported",
6739 UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
6743 else if (*s == '{') {
6745 PL_tokenbuf[0] = '%';
6746 if ( strEQ(PL_tokenbuf+1, "SIG")
6747 && ckWARN(WARN_SYNTAX)
6748 && (t = (char *) memchr(s, '}', PL_bufend - s))
6749 && (t = (char *) memchr(t, '=', PL_bufend - t)))
6751 char tmpbuf[sizeof PL_tokenbuf];
6754 } while (isSPACE(*t));
6755 if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
6757 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
6762 && get_cvn_flags(tmpbuf, len, UTF
6766 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6767 "You need to quote \"%" UTF8f "\"",
6768 UTF8fARG(UTF, len, tmpbuf));
6775 PL_expect = XOPERATOR;
6776 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6777 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6778 if (!islop || PL_last_lop_op == OP_GREPSTART)
6779 PL_expect = XOPERATOR;
6780 else if (strchr("$@\"'`q", *s))
6781 PL_expect = XTERM; /* e.g. print $fh "foo" */
6782 else if ( strchr("&*<%", *s)
6783 && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
6785 PL_expect = XTERM; /* e.g. print $fh &sub */
6787 else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6788 char tmpbuf[sizeof PL_tokenbuf];
6790 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
6791 if ((t2 = keyword(tmpbuf, len, 0))) {
6792 /* binary operators exclude handle interpretations */
6804 PL_expect = XTERM; /* e.g. print $fh length() */
6809 PL_expect = XTERM; /* e.g. print $fh subr() */
6812 else if (isDIGIT(*s))
6813 PL_expect = XTERM; /* e.g. print $fh 3 */
6814 else if (*s == '.' && isDIGIT(s[1]))
6815 PL_expect = XTERM; /* e.g. print $fh .3 */
6816 else if ((*s == '?' || *s == '-' || *s == '+')
6817 && !isSPACE(s[1]) && s[1] != '=')
6818 PL_expect = XTERM; /* e.g. print $fh -1 */
6819 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6821 PL_expect = XTERM; /* e.g. print $fh /.../
6822 XXX except DORDOR operator
6824 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6826 PL_expect = XTERM; /* print $fh <<"EOF" */
6829 force_ident_maybe_lex('$');
6833 if (PL_expect == XPOSTDEREF)
6835 PL_tokenbuf[0] = '@';
6836 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6837 if (PL_expect == XOPERATOR) {
6839 if (PL_bufptr > s) {
6841 PL_bufptr = PL_oldbufptr;
6846 if (!PL_tokenbuf[1]) {
6849 if (PL_lex_state == LEX_NORMAL)
6851 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6852 && intuit_more(s, PL_bufend))
6855 PL_tokenbuf[0] = '%';
6857 /* Warn about @ where they meant $. */
6858 if (*s == '[' || *s == '{') {
6859 if (ckWARN(WARN_SYNTAX)) {
6860 S_check_scalar_slice(aTHX_ s);
6864 PL_expect = XOPERATOR;
6865 force_ident_maybe_lex('@');
6868 case '/': /* may be division, defined-or, or pattern */
6869 if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6870 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6871 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6876 else if (PL_expect == XOPERATOR) {
6878 if (*s == '=' && !PL_lex_allbrackets
6879 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6887 /* Disable warning on "study /blah/" */
6888 if ( PL_oldoldbufptr == PL_last_uni
6889 && ( *PL_last_uni != 's' || s - PL_last_uni < 5
6890 || memNE(PL_last_uni, "study", 5)
6891 || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF)
6894 s = scan_pat(s,OP_MATCH);
6895 TERM(sublex_start());
6898 case '?': /* conditional */
6900 if (!PL_lex_allbrackets
6901 && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
6906 PL_lex_allbrackets++;
6910 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6911 #ifdef PERL_STRICT_CR
6914 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6916 && (s == PL_linestart || s[-1] == '\n') )
6919 formbrack = 2; /* dot seen where arguments expected */
6922 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6926 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
6929 if (!PL_lex_allbrackets
6930 && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
6938 pl_yylval.ival = OPf_SPECIAL;
6944 if (*s == '=' && !PL_lex_allbrackets
6945 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6953 case '0': case '1': case '2': case '3': case '4':
6954 case '5': case '6': case '7': case '8': case '9':
6955 s = scan_num(s, &pl_yylval);
6956 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
6957 if (PL_expect == XOPERATOR)
6962 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6964 missingterm(NULL, 0);
6965 COPLINE_SET_FROM_MULTI_END;
6966 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6967 if (PL_expect == XOPERATOR) {
6970 pl_yylval.ival = OP_CONST;
6971 TERM(sublex_start());
6974 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6977 printbuf("### Saw string before %s\n", s);
6979 PerlIO_printf(Perl_debug_log,
6980 "### Saw unterminated string\n");
6982 if (PL_expect == XOPERATOR) {
6986 missingterm(NULL, 0);
6987 pl_yylval.ival = OP_CONST;
6988 /* FIXME. I think that this can be const if char *d is replaced by
6989 more localised variables. */
6990 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6991 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6992 pl_yylval.ival = OP_STRINGIFY;
6996 if (pl_yylval.ival == OP_CONST)
6997 COPLINE_SET_FROM_MULTI_END;
6998 TERM(sublex_start());
7001 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7004 printbuf("### Saw backtick string before %s\n", s);
7006 PerlIO_printf(Perl_debug_log,
7007 "### Saw unterminated backtick string\n");
7009 if (PL_expect == XOPERATOR)
7010 no_op("Backticks",s);
7012 missingterm(NULL, 0);
7013 pl_yylval.ival = OP_BACKTICK;
7014 TERM(sublex_start());
7018 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
7020 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
7022 if (PL_expect == XOPERATOR)
7023 no_op("Backslash",s);
7027 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
7028 char *start = s + 2;
7029 while (isDIGIT(*start) || *start == '_')
7031 if (*start == '.' && isDIGIT(start[1])) {
7032 s = scan_num(s, &pl_yylval);
7035 else if ((*start == ':' && start[1] == ':')
7036 || (PL_expect == XSTATE && *start == ':'))
7038 else if (PL_expect == XSTATE) {
7040 while (d < PL_bufend && isSPACE(*d)) d++;
7041 if (*d == ':') goto keylookup;
7043 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
7044 if (!isALPHA(*start) && (PL_expect == XTERM
7045 || PL_expect == XREF || PL_expect == XSTATE
7046 || PL_expect == XTERMORDORDOR)) {
7047 GV *const gv = gv_fetchpvn_flags(s, start - s,
7048 UTF ? SVf_UTF8 : 0, SVt_PVCV);
7050 s = scan_num(s, &pl_yylval);
7057 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
7110 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7112 /* Some keywords can be followed by any delimiter, including ':' */
7113 anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
7115 /* x::* is just a word, unless x is "CORE" */
7116 if (!anydelim && *s == ':' && s[1] == ':') {
7117 if (memEQs(PL_tokenbuf, len, "CORE")) goto case_KEY_CORE;
7122 while (d < PL_bufend && isSPACE(*d))
7123 d++; /* no comments skipped here, or s### is misparsed */
7125 /* Is this a word before a => operator? */
7126 if (*d == '=' && d[1] == '>') {
7130 = newSVOP(OP_CONST, 0,
7131 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7132 pl_yylval.opval->op_private = OPpCONST_BARE;
7136 /* Check for plugged-in keyword */
7140 char *saved_bufptr = PL_bufptr;
7142 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
7144 if (result == KEYWORD_PLUGIN_DECLINE) {
7145 /* not a plugged-in keyword */
7146 PL_bufptr = saved_bufptr;
7147 } else if (result == KEYWORD_PLUGIN_STMT) {
7148 pl_yylval.opval = o;
7150 if (!PL_nexttoke) PL_expect = XSTATE;
7151 return REPORT(PLUGSTMT);
7152 } else if (result == KEYWORD_PLUGIN_EXPR) {
7153 pl_yylval.opval = o;
7155 if (!PL_nexttoke) PL_expect = XOPERATOR;
7156 return REPORT(PLUGEXPR);
7158 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
7163 /* Check for built-in keyword */
7164 tmp = keyword(PL_tokenbuf, len, 0);
7166 /* Is this a label? */
7167 if (!anydelim && PL_expect == XSTATE
7168 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
7170 pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
7171 pl_yylval.pval[len] = '\0';
7172 pl_yylval.pval[len+1] = UTF ? 1 : 0;
7177 /* Check for lexical sub */
7178 if (PL_expect != XOPERATOR) {
7179 char tmpbuf[sizeof PL_tokenbuf + 1];
7181 Copy(PL_tokenbuf, tmpbuf+1, len, char);
7182 off = pad_findmy_pvn(tmpbuf, len+1, 0);
7183 if (off != NOT_IN_PAD) {
7184 assert(off); /* we assume this is boolean-true below */
7185 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
7186 HV * const stash = PAD_COMPNAME_OURSTASH(off);
7187 HEK * const stashname = HvNAME_HEK(stash);
7188 sv = newSVhek(stashname);
7189 sv_catpvs(sv, "::");
7190 sv_catpvn_flags(sv, PL_tokenbuf, len,
7191 (UTF ? SV_CATUTF8 : SV_CATBYTES));
7192 gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
7202 rv2cv_op = newOP(OP_PADANY, 0);
7203 rv2cv_op->op_targ = off;
7204 cv = find_lexical_cv(off);
7212 if (tmp < 0) { /* second-class keyword? */
7213 GV *ogv = NULL; /* override (winner) */
7214 GV *hgv = NULL; /* hidden (loser) */
7215 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
7217 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7218 (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
7220 && (cv = GvCVu(gv)))
7222 if (GvIMPORTED_CV(gv))
7224 else if (! CvMETHOD(cv))
7228 && (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
7231 && (isGV_with_GP(gv)
7232 ? GvCVu(gv) && GvIMPORTED_CV(gv)
7233 : SvPCS_IMPORTED(gv)
7234 && (gv_init(gv, PL_globalstash, PL_tokenbuf,
7242 tmp = 0; /* overridden by import or by GLOBAL */
7245 && -tmp==KEY_lock /* XXX generalizable kludge */
7248 tmp = 0; /* any sub overrides "weak" keyword */
7250 else { /* no override */
7252 if (tmp == KEY_dump) {
7253 Perl_croak(aTHX_ "dump() must be written as CORE::dump() as of Perl 5.30");
7257 if (hgv && tmp != KEY_x) /* never ambiguous */
7258 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7259 "Ambiguous call resolved as CORE::%s(), "
7260 "qualify as such or use &",
7265 if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
7266 && (!anydelim || *s != '#')) {
7267 /* no override, and not s### either; skipspace is safe here
7268 * check for => on following line */
7270 STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
7271 STRLEN soff = s - SvPVX(PL_linestr);
7273 arrow = *s == '=' && s[1] == '>';
7274 PL_bufptr = SvPVX(PL_linestr) + bufoff;
7275 s = SvPVX(PL_linestr) + soff;
7283 /* Trade off - by using this evil construction we can pull the
7284 variable gv into the block labelled keylookup. If not, then
7285 we have to give it function scope so that the goto from the
7286 earlier ':' case doesn't bypass the initialisation. */
7287 just_a_word_zero_gv:
7297 default: /* not a keyword */
7300 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7302 bool no_op_error = FALSE;
7304 if (PL_expect == XOPERATOR) {
7305 if (PL_bufptr == PL_linestart) {
7306 CopLINE_dec(PL_curcop);
7307 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7308 CopLINE_inc(PL_curcop);
7311 /* We want to call no_op with s pointing after the
7312 bareword, so defer it. But we want it to come
7313 before the Bad name croak. */
7317 /* Get the rest if it looks like a package qualifier */
7319 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7321 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7324 no_op("Bareword",s);
7325 no_op_error = FALSE;
7328 Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
7329 UTF8fARG(UTF, len, PL_tokenbuf),
7330 *s == '\'' ? "'" : "::");
7336 no_op("Bareword",s);
7338 /* See if the name is "Foo::",
7339 in which case Foo is a bareword
7340 (and a package name). */
7343 && PL_tokenbuf[len - 2] == ':'
7344 && PL_tokenbuf[len - 1] == ':')
7346 if (ckWARN(WARN_BAREWORD)
7347 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7348 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7349 "Bareword \"%" UTF8f
7350 "\" refers to nonexistent package",
7351 UTF8fARG(UTF, len, PL_tokenbuf));
7353 PL_tokenbuf[len] = '\0';
7362 /* if we saw a global override before, get the right name */
7365 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
7368 SV * const tmp_sv = sv;
7369 sv = newSVpvs("CORE::GLOBAL::");
7370 sv_catsv(sv, tmp_sv);
7371 SvREFCNT_dec(tmp_sv);
7375 /* Presume this is going to be a bareword of some sort. */
7377 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
7378 pl_yylval.opval->op_private = OPpCONST_BARE;
7380 /* And if "Foo::", then that's what it certainly is. */
7386 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
7387 const_op->op_private = OPpCONST_BARE;
7389 newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
7393 : SvROK(gv) && SvTYPE(SvRV(gv)) == SVt_PVCV
7396 : rv2cv_op_cv(rv2cv_op, RV2CVOPCV_RETURN_STUB);
7399 /* Use this var to track whether intuit_method has been
7400 called. intuit_method returns 0 or > 255. */
7403 /* See if it's the indirect object for a list operator. */
7406 && PL_oldoldbufptr < PL_bufptr
7407 && (PL_oldoldbufptr == PL_last_lop
7408 || PL_oldoldbufptr == PL_last_uni)
7409 && /* NO SKIPSPACE BEFORE HERE! */
7411 || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
7414 bool immediate_paren = *s == '(';
7417 /* (Now we can afford to cross potential line boundary.) */
7420 /* intuit_method() can indirectly call lex_next_chunk(),
7423 s_off = s - SvPVX(PL_linestr);
7424 /* Two barewords in a row may indicate method call. */
7425 if ( ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
7427 && (tmp = intuit_method(s, lex ? NULL : sv, cv)))
7429 /* the code at method: doesn't use s */
7432 s = SvPVX(PL_linestr) + s_off;
7434 /* If not a declared subroutine, it's an indirect object. */
7435 /* (But it's an indir obj regardless for sort.) */
7436 /* Also, if "_" follows a filetest operator, it's a bareword */
7439 ( !immediate_paren && (PL_last_lop_op == OP_SORT
7441 && (PL_last_lop_op != OP_MAPSTART
7442 && PL_last_lop_op != OP_GREPSTART))))
7443 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7444 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
7448 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7453 PL_expect = XOPERATOR;
7456 /* Is this a word before a => operator? */
7457 if (*s == '=' && s[1] == '>' && !pkgname) {
7460 if (gvp || (lex && !off)) {
7461 assert (cSVOPx(pl_yylval.opval)->op_sv == sv);
7462 /* This is our own scalar, created a few lines
7463 above, so this is safe. */
7465 sv_setpv(sv, PL_tokenbuf);
7466 if (UTF && !IN_BYTES
7467 && is_utf8_string((U8*)PL_tokenbuf, len))
7474 /* If followed by a paren, it's certainly a subroutine. */
7479 while (SPACE_OR_TAB(*d))
7481 if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
7486 NEXTVAL_NEXTTOKE.opval =
7487 off ? rv2cv_op : pl_yylval.opval;
7489 op_free(pl_yylval.opval), force_next(PRIVATEREF);
7490 else op_free(rv2cv_op), force_next(BAREWORD);
7495 /* If followed by var or block, call it a method (unless sub) */
7497 if ((*s == '$' || *s == '{') && !cv) {
7499 PL_last_lop = PL_oldbufptr;
7500 PL_last_lop_op = OP_METHOD;
7501 if (!PL_lex_allbrackets
7502 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7504 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7506 PL_expect = XBLOCKTERM;
7508 return REPORT(METHOD);
7511 /* If followed by a bareword, see if it looks like indir obj. */
7515 && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$')
7516 && (tmp = intuit_method(s, lex ? NULL : sv, cv)))
7520 assert(cSVOPx(pl_yylval.opval)->op_sv == sv);
7522 sv_setpvn(sv, PL_tokenbuf, len);
7523 if (UTF && !IN_BYTES
7524 && is_utf8_string((U8*)PL_tokenbuf, len))
7526 else SvUTF8_off(sv);
7529 if (tmp == METHOD && !PL_lex_allbrackets
7530 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7532 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7537 /* Not a method, so call it a subroutine (if defined) */
7540 /* Check for a constant sub */
7541 if ((sv = cv_const_sv_or_av(cv))) {
7544 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7545 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7546 if (SvTYPE(sv) == SVt_PVAV)
7547 pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7550 pl_yylval.opval->op_private = 0;
7551 pl_yylval.opval->op_folded = 1;
7552 pl_yylval.opval->op_flags |= OPf_SPECIAL;
7557 op_free(pl_yylval.opval);
7559 off ? newCVREF(0, rv2cv_op) : rv2cv_op;
7560 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7561 PL_last_lop = PL_oldbufptr;
7562 PL_last_lop_op = OP_ENTERSUB;
7563 /* Is there a prototype? */
7567 STRLEN protolen = CvPROTOLEN(cv);
7568 const char *proto = CvPROTO(cv);
7570 proto = S_strip_spaces(aTHX_ proto, &protolen);
7573 if ((optional = *proto == ';'))
7576 while (*proto == ';');
7580 *proto == '$' || *proto == '_'
7581 || *proto == '*' || *proto == '+'
7586 *proto == '\\' && proto[1] && proto[2] == '\0'
7589 UNIPROTO(UNIOPSUB,optional);
7590 if (*proto == '\\' && proto[1] == '[') {
7591 const char *p = proto + 2;
7592 while(*p && *p != ']')
7594 if(*p == ']' && !p[1])
7595 UNIPROTO(UNIOPSUB,optional);
7597 if (*proto == '&' && *s == '{') {
7599 sv_setpvs(PL_subname, "__ANON__");
7601 sv_setpvs(PL_subname, "__ANON__::__ANON__");
7602 if (!PL_lex_allbrackets
7603 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7605 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7610 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7612 force_next(off ? PRIVATEREF : BAREWORD);
7613 if (!PL_lex_allbrackets
7614 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7616 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7621 /* Call it a bare word */
7623 if (PL_hints & HINT_STRICT_SUBS)
7624 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7627 /* after "print" and similar functions (corresponding to
7628 * "F? L" in opcode.pl), whatever wasn't already parsed as
7629 * a filehandle should be subject to "strict subs".
7630 * Likewise for the optional indirect-object argument to system
7631 * or exec, which can't be a bareword */
7632 if ((PL_last_lop_op == OP_PRINT
7633 || PL_last_lop_op == OP_PRTF
7634 || PL_last_lop_op == OP_SAY
7635 || PL_last_lop_op == OP_SYSTEM
7636 || PL_last_lop_op == OP_EXEC)
7637 && (PL_hints & HINT_STRICT_SUBS))
7638 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7639 if (lastchar != '-') {
7640 if (ckWARN(WARN_RESERVED)) {
7644 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
7646 /* PL_warn_reserved is constant */
7647 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
7648 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7650 GCC_DIAG_RESTORE_STMT;
7658 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7659 && saw_infix_sigil) {
7660 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7661 "Operator or semicolon missing before %c%" UTF8f,
7663 UTF8fARG(UTF, strlen(PL_tokenbuf),
7665 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7666 "Ambiguous use of %c resolved as operator %c",
7667 lastchar, lastchar);
7674 newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7679 newSVOP(OP_CONST, 0,
7680 Perl_newSVpvf(aTHX_ "%" IVdf, (IV)CopLINE(PL_curcop)))
7683 case KEY___PACKAGE__:
7685 newSVOP(OP_CONST, 0,
7687 ? newSVhek(HvNAME_HEK(PL_curstash))
7694 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
7695 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
7698 gv = (GV *)*hv_fetchs(stash, "DATA", 1);
7700 gv_init(gv,stash,"DATA",4,0);
7703 GvIOp(gv) = newIO();
7704 IoIFP(GvIOp(gv)) = PL_rsfp;
7705 /* Mark this internal pseudo-handle as clean */
7706 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
7707 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
7708 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
7710 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
7711 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7712 /* if the script was opened in binmode, we need to revert
7713 * it to text mode for compatibility; but only iff it has CRs
7714 * XXX this is a questionable hack at best. */
7715 if (PL_bufend-PL_bufptr > 2
7716 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
7719 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
7720 loc = PerlIO_tell(PL_rsfp);
7721 (void)PerlIO_seek(PL_rsfp, 0L, 0);
7724 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7726 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
7727 #endif /* NETWARE */
7729 PerlIO_seek(PL_rsfp, loc, 0);
7733 #ifdef PERLIO_LAYERS
7736 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7745 FUN0OP(CvCLONE(PL_compcv)
7746 ? newOP(OP_RUNCV, 0)
7747 : newPVOP(OP_RUNCV,0,NULL));
7756 if (PL_expect == XSTATE) {
7767 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7768 if ((*s == ':' && s[1] == ':')
7769 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7773 Copy(PL_bufptr, PL_tokenbuf, olen, char);
7777 Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword",
7778 UTF8fARG(UTF, len, PL_tokenbuf));
7781 else if (tmp == KEY_require || tmp == KEY_do
7783 /* that's a way to remember we saw "CORE::" */
7795 LOP(OP_ACCEPT,XTERM);
7798 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7803 LOP(OP_ATAN2,XTERM);
7809 LOP(OP_BINMODE,XTERM);
7812 LOP(OP_BLESS,XTERM);
7821 /* We have to disambiguate the two senses of
7822 "continue". If the next token is a '{' then
7823 treat it as the start of a continue block;
7824 otherwise treat it as a control operator.
7834 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7844 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7853 if (!PL_cryptseen) {
7854 PL_cryptseen = TRUE;
7858 LOP(OP_CRYPT,XTERM);
7861 LOP(OP_CHMOD,XTERM);
7864 LOP(OP_CHOWN,XTERM);
7867 LOP(OP_CONNECT,XTERM);
7887 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7889 if (len && memNEs(PL_tokenbuf+1, len, "CORE")
7890 && !keyword(PL_tokenbuf + 1, len, 0)) {
7891 SSize_t off = s-SvPVX(PL_linestr);
7893 s = SvPVX(PL_linestr)+off;
7895 force_ident_maybe_lex('&');
7900 if (orig_keyword == KEY_do) {
7909 PL_hints |= HINT_BLOCK_SCOPE;
7919 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7920 STR_WITH_LEN("NDBM_File::"),
7921 STR_WITH_LEN("DB_File::"),
7922 STR_WITH_LEN("GDBM_File::"),
7923 STR_WITH_LEN("SDBM_File::"),
7924 STR_WITH_LEN("ODBM_File::"),
7926 LOP(OP_DBMOPEN,XTERM);
7938 pl_yylval.ival = CopLINE(PL_curcop);
7942 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7954 if (*s == '{') { /* block eval */
7955 PL_expect = XTERMBLOCK;
7956 UNIBRACK(OP_ENTERTRY);
7958 else { /* string eval */
7960 UNIBRACK(OP_ENTEREVAL);
7965 UNIBRACK(-OP_ENTEREVAL);
7979 case KEY_endhostent:
7985 case KEY_endservent:
7988 case KEY_endprotoent:
7999 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8001 pl_yylval.ival = CopLINE(PL_curcop);
8003 if ( PL_expect == XSTATE
8004 && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
8007 SSize_t s_off = s - SvPVX(PL_linestr);
8009 if ( memBEGINPs(p, (STRLEN) (PL_bufend - p), "my")
8010 && isSPACE(*(p + 2)))
8014 else if ( memBEGINPs(p, (STRLEN) (PL_bufend - p), "our")
8015 && isSPACE(*(p + 3)))
8021 /* skip optional package name, as in "for my abc $x (..)" */
8022 if (isIDFIRST_lazy_if_safe(p, PL_bufend, UTF)) {
8023 p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8026 if (*p != '$' && *p != '\\')
8027 Perl_croak(aTHX_ "Missing $ on loop variable");
8029 /* The buffer may have been reallocated, update s */
8030 s = SvPVX(PL_linestr) + s_off;
8035 LOP(OP_FORMLINE,XTERM);
8044 LOP(OP_FCNTL,XTERM);
8050 LOP(OP_FLOCK,XTERM);
8053 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8058 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8063 LOP(OP_GREPSTART, XREF);
8080 case KEY_getpriority:
8081 LOP(OP_GETPRIORITY,XTERM);
8083 case KEY_getprotobyname:
8086 case KEY_getprotobynumber:
8087 LOP(OP_GPBYNUMBER,XTERM);
8089 case KEY_getprotoent:
8101 case KEY_getpeername:
8102 UNI(OP_GETPEERNAME);
8104 case KEY_gethostbyname:
8107 case KEY_gethostbyaddr:
8108 LOP(OP_GHBYADDR,XTERM);
8110 case KEY_gethostent:
8113 case KEY_getnetbyname:
8116 case KEY_getnetbyaddr:
8117 LOP(OP_GNBYADDR,XTERM);
8122 case KEY_getservbyname:
8123 LOP(OP_GSBYNAME,XTERM);
8125 case KEY_getservbyport:
8126 LOP(OP_GSBYPORT,XTERM);
8128 case KEY_getservent:
8131 case KEY_getsockname:
8132 UNI(OP_GETSOCKNAME);
8134 case KEY_getsockopt:
8135 LOP(OP_GSOCKOPT,XTERM);
8150 pl_yylval.ival = CopLINE(PL_curcop);
8151 Perl_ck_warner_d(aTHX_
8152 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8153 "given is experimental");
8158 orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB,
8166 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8168 pl_yylval.ival = CopLINE(PL_curcop);
8172 LOP(OP_INDEX,XTERM);
8178 LOP(OP_IOCTL,XTERM);
8205 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8210 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8224 LOP(OP_LISTEN,XTERM);
8233 s = scan_pat(s,OP_MATCH);
8234 TERM(sublex_start());
8237 LOP(OP_MAPSTART, XREF);
8240 LOP(OP_MKDIR,XTERM);
8243 LOP(OP_MSGCTL,XTERM);
8246 LOP(OP_MSGGET,XTERM);
8249 LOP(OP_MSGRCV,XTERM);
8252 LOP(OP_MSGSND,XTERM);
8259 yyerror(Perl_form(aTHX_
8260 "Can't redeclare \"%s\" in \"%s\"",
8261 tmp == KEY_my ? "my" :
8262 tmp == KEY_state ? "state" : "our",
8263 PL_in_my == KEY_my ? "my" :
8264 PL_in_my == KEY_state ? "state" : "our"));
8266 PL_in_my = (U16)tmp;
8268 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8269 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
8270 if (memEQs(PL_tokenbuf, len, "sub"))
8272 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
8273 if (!PL_in_my_stash) {
8277 len = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
8278 PERL_MY_SNPRINTF_POST_GUARD(len, sizeof(tmpbuf));
8279 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
8282 else if (*s == '\\') {
8283 if (!FEATURE_MYREF_IS_ENABLED)
8284 Perl_croak(aTHX_ "The experimental declared_refs "
8285 "feature is not enabled");
8286 Perl_ck_warner_d(aTHX_
8287 packWARN(WARN_EXPERIMENTAL__DECLARED_REFS),
8288 "Declaring references is experimental");
8296 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8301 s = tokenize_use(0, s);
8305 if (*s == '(' || (s = skipspace(s), *s == '('))
8308 if (!PL_lex_allbrackets
8309 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8311 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8318 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8320 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
8322 for (t=d; isSPACE(*t);)
8324 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8326 && !(t[0] == '=' && t[1] == '>')
8327 && !(t[0] == ':' && t[1] == ':')
8328 && !keyword(s, d-s, 0)
8330 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8331 "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")",
8332 UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8338 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8340 pl_yylval.ival = OP_OR;
8350 LOP(OP_OPEN_DIR,XTERM);
8353 checkcomma(s,PL_tokenbuf,"filehandle");
8357 checkcomma(s,PL_tokenbuf,"filehandle");
8376 s = force_word(s,BAREWORD,FALSE,TRUE);
8378 s = force_strict_version(s);
8382 LOP(OP_PIPE_OP,XTERM);
8385 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8387 missingterm(NULL, 0);
8388 COPLINE_SET_FROM_MULTI_END;
8389 pl_yylval.ival = OP_CONST;
8390 TERM(sublex_start());
8397 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8399 missingterm(NULL, 0);
8400 COPLINE_SET_FROM_MULTI_END;
8401 PL_expect = XOPERATOR;
8402 if (SvCUR(PL_lex_stuff)) {
8403 int warned_comma = !ckWARN(WARN_QW);
8404 int warned_comment = warned_comma;
8405 d = SvPV_force(PL_lex_stuff, len);
8407 for (; isSPACE(*d) && len; --len, ++d)
8412 if (!warned_comma || !warned_comment) {
8413 for (; !isSPACE(*d) && len; --len, ++d) {
8414 if (!warned_comma && *d == ',') {
8415 Perl_warner(aTHX_ packWARN(WARN_QW),
8416 "Possible attempt to separate words with commas");
8419 else if (!warned_comment && *d == '#') {
8420 Perl_warner(aTHX_ packWARN(WARN_QW),
8421 "Possible attempt to put comments in qw() list");
8427 for (; !isSPACE(*d) && len; --len, ++d)
8430 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8431 words = op_append_elem(OP_LIST, words,
8432 newSVOP(OP_CONST, 0, tokeq(sv)));
8437 words = newNULLLIST();
8438 SvREFCNT_dec_NN(PL_lex_stuff);
8439 PL_lex_stuff = NULL;
8440 PL_expect = XOPERATOR;
8441 pl_yylval.opval = sawparens(words);
8446 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8448 missingterm(NULL, 0);
8449 pl_yylval.ival = OP_STRINGIFY;
8450 if (SvIVX(PL_lex_stuff) == '\'')
8451 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
8452 TERM(sublex_start());
8455 s = scan_pat(s,OP_QR);
8456 TERM(sublex_start());
8459 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8461 missingterm(NULL, 0);
8462 pl_yylval.ival = OP_BACKTICK;
8463 TERM(sublex_start());
8471 s = force_version(s, FALSE);
8473 else if (*s != 'v' || !isDIGIT(s[1])
8474 || (s = force_version(s, TRUE), *s == 'v'))
8476 *PL_tokenbuf = '\0';
8477 s = force_word(s,BAREWORD,TRUE,TRUE);
8478 if (isIDFIRST_lazy_if_safe(PL_tokenbuf,
8479 PL_tokenbuf + sizeof(PL_tokenbuf),
8482 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8483 GV_ADD | (UTF ? SVf_UTF8 : 0));
8486 yyerror("<> at require-statement should be quotes");
8488 if (orig_keyword == KEY_require) {
8494 PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
8496 PL_last_uni = PL_oldbufptr;
8497 PL_last_lop_op = OP_REQUIRE;
8499 return REPORT( (int)REQUIRE );
8508 LOP(OP_RENAME,XTERM);
8517 LOP(OP_RINDEX,XTERM);
8526 UNIDOR(OP_READLINE);
8529 UNIDOR(OP_BACKTICK);
8538 LOP(OP_REVERSE,XTERM);
8541 UNIDOR(OP_READLINK);
8548 if (pl_yylval.opval)
8549 TERM(sublex_start());
8551 TOKEN(1); /* force error */
8554 checkcomma(s,PL_tokenbuf,"filehandle");
8564 LOP(OP_SELECT,XTERM);
8570 LOP(OP_SEMCTL,XTERM);
8573 LOP(OP_SEMGET,XTERM);
8576 LOP(OP_SEMOP,XTERM);
8582 LOP(OP_SETPGRP,XTERM);
8584 case KEY_setpriority:
8585 LOP(OP_SETPRIORITY,XTERM);
8587 case KEY_sethostent:
8593 case KEY_setservent:
8596 case KEY_setprotoent:
8606 LOP(OP_SEEKDIR,XTERM);
8608 case KEY_setsockopt:
8609 LOP(OP_SSOCKOPT,XTERM);
8615 LOP(OP_SHMCTL,XTERM);
8618 LOP(OP_SHMGET,XTERM);
8621 LOP(OP_SHMREAD,XTERM);
8624 LOP(OP_SHMWRITE,XTERM);
8627 LOP(OP_SHUTDOWN,XTERM);
8636 LOP(OP_SOCKET,XTERM);
8638 case KEY_socketpair:
8639 LOP(OP_SOCKPAIR,XTERM);
8642 checkcomma(s,PL_tokenbuf,"subroutine name");
8645 s = force_word(s,BAREWORD,TRUE,TRUE);
8649 LOP(OP_SPLIT,XTERM);
8652 LOP(OP_SPRINTF,XTERM);
8655 LOP(OP_SPLICE,XTERM);
8670 LOP(OP_SUBSTR,XTERM);
8676 char * const tmpbuf = PL_tokenbuf + 1;
8677 bool have_name, have_proto;
8678 const int key = tmp;
8679 SV *format_name = NULL;
8680 bool is_sigsub = FEATURE_SIGNATURES_IS_ENABLED;
8682 SSize_t off = s-SvPVX(PL_linestr);
8684 d = SvPVX(PL_linestr)+off;
8686 SAVEBOOL(PL_parser->sig_seen);
8687 PL_parser->sig_seen = FALSE;
8689 if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
8691 || (*s == ':' && s[1] == ':'))
8694 PL_expect = XATTRBLOCK;
8695 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8697 if (key == KEY_format)
8698 format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
8700 if (memchr(tmpbuf, ':', len) || key != KEY_sub
8702 PL_tokenbuf, len + 1, 0
8704 sv_setpvn(PL_subname, tmpbuf, len);
8706 sv_setsv(PL_subname,PL_curstname);
8707 sv_catpvs(PL_subname,"::");
8708 sv_catpvn(PL_subname,tmpbuf,len);
8710 if (SvUTF8(PL_linestr))
8711 SvUTF8_on(PL_subname);
8718 if (key == KEY_my || key == KEY_our || key==KEY_state)
8721 /* diag_listed_as: Missing name in "%s sub" */
8723 "Missing name in \"%s\"", PL_bufptr);
8725 PL_expect = XATTRTERM;
8726 sv_setpvs(PL_subname,"?");
8730 if (key == KEY_format) {
8732 NEXTVAL_NEXTTOKE.opval
8733 = newSVOP(OP_CONST,0, format_name);
8734 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
8735 force_next(BAREWORD);
8740 /* Look for a prototype */
8741 if (*s == '(' && !is_sigsub) {
8742 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8744 Perl_croak(aTHX_ "Prototype not terminated");
8745 COPLINE_SET_FROM_MULTI_END;
8746 (void)validate_proto(PL_subname, PL_lex_stuff,
8747 ckWARN(WARN_ILLEGALPROTO), 0);
8755 if ( !(*s == ':' && s[1] != ':')
8756 && (*s != '{' && *s != '(') && key != KEY_format)
8758 assert(key == KEY_sub || key == KEY_AUTOLOAD ||
8759 key == KEY_DESTROY || key == KEY_BEGIN ||
8760 key == KEY_UNITCHECK || key == KEY_CHECK ||
8761 key == KEY_INIT || key == KEY_END ||
8762 key == KEY_my || key == KEY_state ||
8765 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
8766 else if (*s != ';' && *s != '}')
8767 Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname));
8771 NEXTVAL_NEXTTOKE.opval =
8772 newSVOP(OP_CONST, 0, PL_lex_stuff);
8773 PL_lex_stuff = NULL;
8778 sv_setpvs(PL_subname, "__ANON__");
8780 sv_setpvs(PL_subname, "__ANON__::__ANON__");
8786 force_ident_maybe_lex('&');
8794 LOP(OP_SYSTEM,XREF);
8797 LOP(OP_SYMLINK,XTERM);
8800 LOP(OP_SYSCALL,XTERM);
8803 LOP(OP_SYSOPEN,XTERM);
8806 LOP(OP_SYSSEEK,XTERM);
8809 LOP(OP_SYSREAD,XTERM);
8812 LOP(OP_SYSWRITE,XTERM);
8817 TERM(sublex_start());
8838 LOP(OP_TRUNCATE,XTERM);
8850 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8852 pl_yylval.ival = CopLINE(PL_curcop);
8856 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8858 pl_yylval.ival = CopLINE(PL_curcop);
8862 LOP(OP_UNLINK,XTERM);
8868 LOP(OP_UNPACK,XTERM);
8871 LOP(OP_UTIME,XTERM);
8877 LOP(OP_UNSHIFT,XTERM);
8880 s = tokenize_use(1, s);
8890 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8892 pl_yylval.ival = CopLINE(PL_curcop);
8893 Perl_ck_warner_d(aTHX_
8894 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8895 "when is experimental");
8899 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8901 pl_yylval.ival = CopLINE(PL_curcop);
8905 PL_hints |= HINT_BLOCK_SCOPE;
8912 LOP(OP_WAITPID,XTERM);
8918 /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8919 * we use the same number on EBCDIC */
8920 gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8924 if (PL_expect == XOPERATOR) {
8925 if (*s == '=' && !PL_lex_allbrackets
8926 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8936 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8938 pl_yylval.ival = OP_XOR;
8947 Looks up an identifier in the pad or in a package
8949 is_sig indicates that this is a subroutine signature variable
8950 rather than a plain pad var.
8953 PRIVATEREF if this is a lexical name.
8954 BAREWORD if this belongs to a package.
8957 if we're in a my declaration
8958 croak if they tried to say my($foo::bar)
8959 build the ops for a my() declaration
8960 if it's an access to a my() variable
8961 build ops for access to a my() variable
8962 if in a dq string, and they've said @foo and we can't find @foo
8964 build ops for a bareword
8968 S_pending_ident(pTHX)
8971 const char pit = (char)pl_yylval.ival;
8972 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8973 /* All routes through this function want to know if there is a colon. */
8974 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8976 DEBUG_T({ PerlIO_printf(Perl_debug_log,
8977 "### Pending identifier '%s'\n", PL_tokenbuf); });
8978 assert(tokenbuf_len >= 2);
8980 /* if we're in a my(), we can't allow dynamics here.
8981 $foo'bar has already been turned into $foo::bar, so
8982 just check for colons.
8984 if it's a legal name, the OP is a PADANY.
8987 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
8989 /* diag_listed_as: No package name allowed for variable %s
8991 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8992 "%se %s in \"our\"",
8993 *PL_tokenbuf=='&' ?"subroutin":"variabl",
8994 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
8995 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
9000 /* "my" variable %s can't be in a package */
9001 /* PL_no_myglob is constant */
9002 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
9003 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
9004 PL_in_my == KEY_my ? "my" : "state",
9005 *PL_tokenbuf == '&' ? "subroutin" : "variabl",
9007 UTF ? SVf_UTF8 : 0);
9008 GCC_DIAG_RESTORE_STMT;
9011 if (PL_in_my == KEY_sigvar) {
9012 /* A signature 'padop' needs in addition, an op_first to
9013 * point to a child sigdefelem, and an extra field to hold
9014 * the signature index. We can achieve both by using an
9015 * UNOP_AUX and (ab)using the op_aux field to hold the
9016 * index. If we ever need more fields, use a real malloced
9017 * aux strut instead.
9019 o = newUNOP_AUX(OP_ARGELEM, 0, NULL,
9020 INT2PTR(UNOP_AUX_item *,
9021 (PL_parser->sig_elems)));
9022 o->op_private |= ( PL_tokenbuf[0] == '$' ? OPpARGELEM_SV
9023 : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV
9027 o = newOP(OP_PADANY, 0);
9028 o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
9029 UTF ? SVf_UTF8 : 0);
9030 if (PL_in_my == KEY_sigvar)
9033 pl_yylval.opval = o;
9039 build the ops for accesses to a my() variable.
9044 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9046 if (tmp != NOT_IN_PAD) {
9047 /* might be an "our" variable" */
9048 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9049 /* build ops for a bareword */
9050 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9051 HEK * const stashname = HvNAME_HEK(stash);
9052 SV * const sym = newSVhek(stashname);
9053 sv_catpvs(sym, "::");
9054 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9055 pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
9056 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9060 ((PL_tokenbuf[0] == '$') ? SVt_PV
9061 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9066 pl_yylval.opval = newOP(OP_PADANY, 0);
9067 pl_yylval.opval->op_targ = tmp;
9073 Whine if they've said @foo or @foo{key} in a doublequoted string,
9074 and @foo (or %foo) isn't a variable we can find in the symbol
9077 if (ckWARN(WARN_AMBIGUOUS)
9079 && PL_lex_state != LEX_NORMAL
9080 && !PL_lex_brackets)
9082 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9083 ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
9085 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9088 /* Downgraded from fatal to warning 20000522 mjd */
9089 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9090 "Possible unintended interpolation of %" UTF8f
9092 UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9096 /* build ops for a bareword */
9097 pl_yylval.opval = newSVOP(OP_CONST, 0,
9098 newSVpvn_flags(PL_tokenbuf + 1,
9099 tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9100 UTF ? SVf_UTF8 : 0 ));
9101 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9103 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9104 (PL_in_eval ? GV_ADDMULTI : GV_ADD)
9105 | ( UTF ? SVf_UTF8 : 0 ),
9106 ((PL_tokenbuf[0] == '$') ? SVt_PV
9107 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9113 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9115 PERL_ARGS_ASSERT_CHECKCOMMA;
9117 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
9118 if (ckWARN(WARN_SYNTAX)) {
9121 for (w = s+2; *w && level; w++) {
9129 /* the list of chars below is for end of statements or
9130 * block / parens, boolean operators (&&, ||, //) and branch
9131 * constructs (or, and, if, until, unless, while, err, for).
9132 * Not a very solid hack... */
9133 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9134 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9135 "%s (...) interpreted as function",name);
9138 while (s < PL_bufend && isSPACE(*s))
9142 while (s < PL_bufend && isSPACE(*s))
9144 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
9145 const char * const w = s;
9146 s += UTF ? UTF8SKIP(s) : 1;
9147 while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
9148 s += UTF ? UTF8SKIP(s) : 1;
9149 while (s < PL_bufend && isSPACE(*s))
9153 if (keyword(w, s - w, 0))
9156 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9157 if (gv && GvCVu(gv))
9162 Copy(w, tmpbuf+1, s - w, char);
9164 off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
9165 if (off != NOT_IN_PAD) return;
9167 Perl_croak(aTHX_ "No comma allowed after %s", what);
9172 /* S_new_constant(): do any overload::constant lookup.
9174 Either returns sv, or mortalizes/frees sv and returns a new SV*.
9175 Best used as sv=new_constant(..., sv, ...).
9176 If s, pv are NULL, calls subroutine with one argument,
9177 and <type> is used with error messages only.
9178 <type> is assumed to be well formed UTF-8 */
9181 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9182 SV *sv, SV *pv, const char *type, STRLEN typelen)
9185 HV * table = GvHV(PL_hintgv); /* ^H */
9190 const char *why1 = "", *why2 = "", *why3 = "";
9192 PERL_ARGS_ASSERT_NEW_CONSTANT;
9193 /* We assume that this is true: */
9194 if (*key == 'c') { assert (strEQ(key, "charnames")); }
9197 /* charnames doesn't work well if there have been errors found */
9198 if (PL_error_count > 0 && *key == 'c')
9200 SvREFCNT_dec_NN(sv);
9201 return &PL_sv_undef;
9204 sv_2mortal(sv); /* Parent created it permanently */
9206 || ! (PL_hints & HINT_LOCALIZE_HH)
9207 || ! (cvp = hv_fetch(table, key, keylen, FALSE))
9212 /* Here haven't found what we're looking for. If it is charnames,
9213 * perhaps it needs to be loaded. Try doing that before giving up */
9215 Perl_load_module(aTHX_
9217 newSVpvs("_charnames"),
9218 /* version parameter; no need to specify it, as if
9219 * we get too early a version, will fail anyway,
9220 * not being able to find '_charnames' */
9225 assert(sp == PL_stack_sp);
9226 table = GvHV(PL_hintgv);
9228 && (PL_hints & HINT_LOCALIZE_HH)
9229 && (cvp = hv_fetch(table, key, keylen, FALSE))
9235 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9236 msg = Perl_form(aTHX_
9237 "Constant(%.*s) unknown",
9238 (int)(type ? typelen : len),
9244 why3 = "} is not defined";
9247 msg = Perl_form(aTHX_
9248 /* The +3 is for '\N{'; -4 for that, plus '}' */
9249 "Unknown charname '%.*s'", (int)typelen - 4, type + 3
9253 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
9254 (int)(type ? typelen : len),
9255 (type ? type: s), why1, why2, why3);
9258 yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9259 return SvREFCNT_inc_simple_NN(sv);
9264 pv = newSVpvn_flags(s, len, SVs_TEMP);
9266 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9268 typesv = &PL_sv_undef;
9270 PUSHSTACKi(PERLSI_OVERLOAD);
9282 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9286 /* Check the eval first */
9287 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9289 const char * errstr;
9290 sv_catpvs(errsv, "Propagated");
9291 errstr = SvPV_const(errsv, errlen);
9292 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9294 res = SvREFCNT_inc_simple_NN(sv);
9298 SvREFCNT_inc_simple_void_NN(res);
9307 why1 = "Call to &{$^H{";
9309 why3 = "}} did not return a defined value";
9311 (void)sv_2mortal(sv);
9318 PERL_STATIC_INLINE void
9319 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
9320 bool is_utf8, bool check_dollar, bool tick_warn)
9323 const char *olds = *s;
9324 PERL_ARGS_ASSERT_PARSE_IDENT;
9326 while (*s < PL_bufend) {
9328 Perl_croak(aTHX_ "%s", ident_too_long);
9329 if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) {
9330 /* The UTF-8 case must come first, otherwise things
9331 * like c\N{COMBINING TILDE} would start failing, as the
9332 * isWORDCHAR_A case below would gobble the 'c' up.
9335 char *t = *s + UTF8SKIP(*s);
9336 while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
9339 if (*d + (t - *s) > e)
9340 Perl_croak(aTHX_ "%s", ident_too_long);
9341 Copy(*s, *d, t - *s, char);
9345 else if ( isWORDCHAR_A(**s) ) {
9348 } while (isWORDCHAR_A(**s) && *d < e);
9350 else if ( allow_package
9352 && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8))
9359 else if (allow_package && **s == ':' && (*s)[1] == ':'
9360 /* Disallow things like Foo::$bar. For the curious, this is
9361 * the code path that triggers the "Bad name after" warning
9362 * when looking for barewords.
9364 && !(check_dollar && (*s)[2] == '$')) {
9371 if (UNLIKELY(tick_warn && saw_tick && PL_lex_state == LEX_INTERPNORMAL
9372 && !PL_lex_brackets && ckWARN(WARN_SYNTAX))) {
9375 Newx(d, *s - olds + saw_tick + 2, char); /* +2 for $# */
9378 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9379 "Old package separator used in string");
9380 if (olds[-1] == '#')
9384 if (*olds == '\'') {
9391 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9392 "\t(Did you mean \"%" UTF8f "\" instead?)\n",
9393 UTF8fARG(is_utf8, d2-d, d));
9398 /* Returns a NUL terminated string, with the length of the string written to
9402 S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9405 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9406 bool is_utf8 = cBOOL(UTF);
9408 PERL_ARGS_ASSERT_SCAN_WORD;
9410 parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, FALSE);
9416 /* Is the byte 'd' a legal single character identifier name? 'u' is true
9417 * iff Unicode semantics are to be used. The legal ones are any of:
9418 * a) all ASCII characters except:
9419 * 1) control and space-type ones, like NUL, SOH, \t, and SPACE;
9421 * The final case currently doesn't get this far in the program, so we
9422 * don't test for it. If that were to change, it would be ok to allow it.
9423 * b) When not under Unicode rules, any upper Latin1 character
9424 * c) Otherwise, when unicode rules are used, all XIDS characters.
9426 * Because all ASCII characters have the same representation whether
9427 * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
9428 * '{' without knowing if is UTF-8 or not. */
9429 #define VALID_LEN_ONE_IDENT(s, e, is_utf8) \
9430 (isGRAPH_A(*(s)) || ((is_utf8) \
9431 ? isIDFIRST_utf8_safe(s, e) \
9433 && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
9436 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
9438 I32 herelines = PL_parser->herelines;
9439 SSize_t bracket = -1;
9442 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9443 bool is_utf8 = cBOOL(UTF);
9444 I32 orig_copline = 0, tmp_copline = 0;
9446 PERL_ARGS_ASSERT_SCAN_IDENT;
9448 if (isSPACE(*s) || !*s)
9451 while (isDIGIT(*s)) {
9453 Perl_croak(aTHX_ "%s", ident_too_long);
9457 else { /* See if it is a "normal" identifier */
9458 parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE);
9463 /* Either a digit variable, or parse_ident() found an identifier
9464 (anything valid as a bareword), so job done and return. */
9465 if (PL_lex_state != LEX_NORMAL)
9466 PL_lex_state = LEX_INTERPENDMAYBE;
9470 /* Here, it is not a run-of-the-mill identifier name */
9472 if (*s == '$' && s[1]
9473 && ( isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8)
9474 || isDIGIT_A((U8)s[1])
9477 || memBEGINs(s+1, (STRLEN) (PL_bufend - (s+1)), "::")) )
9479 /* Dereferencing a value in a scalar variable.
9480 The alternatives are different syntaxes for a scalar variable.
9481 Using ' as a leading package separator isn't allowed. :: is. */
9484 /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */
9486 bracket = s - SvPVX(PL_linestr);
9488 orig_copline = CopLINE(PL_curcop);
9489 if (s < PL_bufend && isSPACE(*s)) {
9493 if ((s <= PL_bufend - (is_utf8)
9496 && VALID_LEN_ONE_IDENT(s, PL_bufend, is_utf8))
9499 const STRLEN skip = UTF8SKIP(s);
9502 for ( i = 0; i < skip; i++ )
9510 /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
9511 if (*d == '^' && *s && isCONTROLVAR(*s)) {
9515 /* Warn about ambiguous code after unary operators if {...} notation isn't
9516 used. There's no difference in ambiguity; it's merely a heuristic
9517 about when not to warn. */
9518 else if (ck_uni && bracket == -1)
9520 if (bracket != -1) {
9523 /* If we were processing {...} notation then... */
9524 if (isIDFIRST_lazy_if_safe(d, e, is_utf8)
9525 || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
9528 /* note we have to check for a normal identifier first,
9529 * as it handles utf8 symbols, and only after that has
9530 * been ruled out can we look at the caret words */
9531 if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) {
9532 /* if it starts as a valid identifier, assume that it is one.
9533 (the later check for } being at the expected point will trap
9534 cases where this doesn't pan out.) */
9535 d += is_utf8 ? UTF8SKIP(d) : 1;
9536 parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE);
9539 else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
9541 while (isWORDCHAR(*s) && d < e) {
9545 Perl_croak(aTHX_ "%s", ident_too_long);
9548 tmp_copline = CopLINE(PL_curcop);
9549 if (s < PL_bufend && isSPACE(*s)) {
9552 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9553 /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation. */
9554 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9555 const char * const brack =
9557 ((*s == '[') ? "[...]" : "{...}");
9558 orig_copline = CopLINE(PL_curcop);
9559 CopLINE_set(PL_curcop, tmp_copline);
9560 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9561 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9562 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9563 funny, dest, brack, funny, dest, brack);
9564 CopLINE_set(PL_curcop, orig_copline);
9567 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9568 PL_lex_allbrackets++;
9574 tmp_copline = CopLINE(PL_curcop);
9575 if ((skip = s < PL_bufend && isSPACE(*s))) {
9576 /* Avoid incrementing line numbers or resetting PL_linestart,
9577 in case we have to back up. */
9578 STRLEN s_off = s - SvPVX(PL_linestr);
9580 s = SvPVX(PL_linestr) + s_off;
9585 /* Expect to find a closing } after consuming any trailing whitespace.
9588 /* Now increment line numbers if applicable. */
9592 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9593 PL_lex_state = LEX_INTERPEND;
9596 if (PL_lex_state == LEX_NORMAL) {
9597 if (ckWARN(WARN_AMBIGUOUS)
9598 && (keyword(dest, d - dest, 0)
9599 || get_cvn_flags(dest, d - dest, is_utf8
9603 SV *tmp = newSVpvn_flags( dest, d - dest,
9604 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
9607 orig_copline = CopLINE(PL_curcop);
9608 CopLINE_set(PL_curcop, tmp_copline);
9609 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9610 "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
9611 funny, SVfARG(tmp), funny, SVfARG(tmp));
9612 CopLINE_set(PL_curcop, orig_copline);
9617 /* Didn't find the closing } at the point we expected, so restore
9618 state such that the next thing to process is the opening { and */
9619 s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
9620 CopLINE_set(PL_curcop, orig_copline);
9621 PL_parser->herelines = herelines;
9623 PL_parser->sub_no_recover = TRUE;
9626 else if ( PL_lex_state == LEX_INTERPNORMAL
9628 && !intuit_more(s, PL_bufend))
9629 PL_lex_state = LEX_INTERPEND;
9634 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
9636 /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
9637 * found in the parse starting at 's', based on the subset that are valid
9638 * in this context input to this routine in 'valid_flags'. Advances s.
9639 * Returns TRUE if the input should be treated as a valid flag, so the next
9640 * char may be as well; otherwise FALSE. 'charset' should point to a NUL
9641 * upon first call on the current regex. This routine will set it to any
9642 * charset modifier found. The caller shouldn't change it. This way,
9643 * another charset modifier encountered in the parse can be detected as an
9644 * error, as we have decided to allow only one */
9647 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9649 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9650 if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) {
9651 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9652 UTF ? SVf_UTF8 : 0);
9654 /* Pretend that it worked, so will continue processing before
9663 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
9664 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
9665 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
9666 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
9667 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
9668 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
9669 case LOCALE_PAT_MOD:
9671 goto multiple_charsets;
9673 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
9676 case UNICODE_PAT_MOD:
9678 goto multiple_charsets;
9680 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
9683 case ASCII_RESTRICT_PAT_MOD:
9685 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9689 /* Error if previous modifier wasn't an 'a', but if it was, see
9690 * if, and accept, a second occurrence (only) */
9692 || get_regex_charset(*pmfl)
9693 != REGEX_ASCII_RESTRICTED_CHARSET)
9695 goto multiple_charsets;
9697 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
9701 case DEPENDS_PAT_MOD:
9703 goto multiple_charsets;
9705 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
9714 if (*charset != c) {
9715 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9717 else if (c == 'a') {
9718 /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
9719 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9722 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9725 /* Pretend that it worked, so will continue processing before dieing */
9731 S_scan_pat(pTHX_ char *start, I32 type)
9735 const char * const valid_flags =
9736 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
9737 char charset = '\0'; /* character set modifier */
9738 unsigned int x_mod_count = 0;
9740 PERL_ARGS_ASSERT_SCAN_PAT;
9742 s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
9744 Perl_croak(aTHX_ "Search pattern not terminated");
9746 pm = (PMOP*)newPMOP(type, 0);
9747 if (PL_multi_open == '?') {
9748 /* This is the only point in the code that sets PMf_ONCE: */
9749 pm->op_pmflags |= PMf_ONCE;
9751 /* Hence it's safe to do this bit of PMOP book-keeping here, which
9752 allows us to restrict the list needed by reset to just the ??
9754 assert(type != OP_TRANS);
9756 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
9759 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
9762 elements = mg->mg_len / sizeof(PMOP**);
9763 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9764 ((PMOP**)mg->mg_ptr) [elements++] = pm;
9765 mg->mg_len = elements * sizeof(PMOP**);
9766 PmopSTASH_set(pm,PL_curstash);
9770 /* if qr/...(?{..}).../, then need to parse the pattern within a new
9771 * anon CV. False positives like qr/[(?{]/ are harmless */
9773 if (type == OP_QR) {
9775 char *e, *p = SvPV(PL_lex_stuff, len);
9777 for (; p < e; p++) {
9778 if (p[0] == '(' && p[1] == '?'
9779 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9781 pm->op_pmflags |= PMf_HAS_CV;
9785 pm->op_pmflags |= PMf_IS_QR;
9788 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
9789 &s, &charset, &x_mod_count))
9791 /* issue a warning if /c is specified,but /g is not */
9792 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
9794 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9795 "Use of /c modifier is meaningless without /g" );
9798 PL_lex_op = (OP*)pm;
9799 pl_yylval.ival = OP_MATCH;
9804 S_scan_subst(pTHX_ char *start)
9810 line_t linediff = 0;
9812 char charset = '\0'; /* character set modifier */
9813 unsigned int x_mod_count = 0;
9816 PERL_ARGS_ASSERT_SCAN_SUBST;
9818 pl_yylval.ival = OP_NULL;
9820 s = scan_str(start, TRUE, FALSE, FALSE, &t);
9823 Perl_croak(aTHX_ "Substitution pattern not terminated");
9827 first_start = PL_multi_start;
9828 first_line = CopLINE(PL_curcop);
9829 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9831 SvREFCNT_dec_NN(PL_lex_stuff);
9832 PL_lex_stuff = NULL;
9833 Perl_croak(aTHX_ "Substitution replacement not terminated");
9835 PL_multi_start = first_start; /* so whole substitution is taken together */
9837 pm = (PMOP*)newPMOP(OP_SUBST, 0);
9841 if (*s == EXEC_PAT_MOD) {
9845 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
9846 &s, &charset, &x_mod_count))
9852 if ((pm->op_pmflags & PMf_CONTINUE)) {
9853 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
9857 SV * const repl = newSVpvs("");
9860 pm->op_pmflags |= PMf_EVAL;
9861 for (; es > 1; es--) {
9862 sv_catpvs(repl, "eval ");
9864 sv_catpvs(repl, "do {");
9865 sv_catsv(repl, PL_parser->lex_sub_repl);
9866 sv_catpvs(repl, "}");
9867 SvREFCNT_dec(PL_parser->lex_sub_repl);
9868 PL_parser->lex_sub_repl = repl;
9872 linediff = CopLINE(PL_curcop) - first_line;
9874 CopLINE_set(PL_curcop, first_line);
9876 if (linediff || es) {
9877 /* the IVX field indicates that the replacement string is a s///e;
9878 * the NVX field indicates how many src code lines the replacement
9880 sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
9881 ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = linediff;
9882 ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
9886 PL_lex_op = (OP*)pm;
9887 pl_yylval.ival = OP_SUBST;
9892 S_scan_trans(pTHX_ char *start)
9899 bool nondestruct = 0;
9902 PERL_ARGS_ASSERT_SCAN_TRANS;
9904 pl_yylval.ival = OP_NULL;
9906 s = scan_str(start,FALSE,FALSE,FALSE,&t);
9908 Perl_croak(aTHX_ "Transliteration pattern not terminated");
9912 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
9914 SvREFCNT_dec_NN(PL_lex_stuff);
9915 PL_lex_stuff = NULL;
9916 Perl_croak(aTHX_ "Transliteration replacement not terminated");
9919 complement = del = squash = 0;
9923 complement = OPpTRANS_COMPLEMENT;
9926 del = OPpTRANS_DELETE;
9929 squash = OPpTRANS_SQUASH;
9941 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
9942 o->op_private &= ~OPpTRANS_ALL;
9943 o->op_private |= del|squash|complement|
9944 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9945 (DO_UTF8(PL_parser->lex_sub_repl) ? OPpTRANS_TO_UTF : 0);
9948 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
9955 Takes a pointer to the first < in <<FOO.
9956 Returns a pointer to the byte following <<FOO.
9958 This function scans a heredoc, which involves different methods
9959 depending on whether we are in a string eval, quoted construct, etc.
9960 This is because PL_linestr could containing a single line of input, or
9961 a whole string being evalled, or the contents of the current quote-
9964 The two basic methods are:
9965 - Steal lines from the input stream
9966 - Scan the heredoc in PL_linestr and remove it therefrom
9968 In a file scope or filtered eval, the first method is used; in a
9969 string eval, the second.
9971 In a quote-like operator, we have to choose between the two,
9972 depending on where we can find a newline. We peek into outer lex-
9973 ing scopes until we find one with a newline in it. If we reach the
9974 outermost lexing scope and it is a file, we use the stream method.
9975 Otherwise it is treated as an eval.
9979 S_scan_heredoc(pTHX_ char *s)
9981 I32 op_type = OP_SCALAR;
9990 bool indented = FALSE;
9991 const bool infile = PL_rsfp || PL_parser->filtered;
9992 const line_t origline = CopLINE(PL_curcop);
9993 LEXSHARED *shared = PL_parser->lex_shared;
9995 PERL_ARGS_ASSERT_SCAN_HEREDOC;
9998 d = PL_tokenbuf + 1;
9999 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
10000 *PL_tokenbuf = '\n';
10002 if (*peek == '~') {
10006 while (SPACE_OR_TAB(*peek))
10008 if (*peek == '`' || *peek == '\'' || *peek =='"') {
10011 s = delimcpy(d, e, s, PL_bufend, term, &len);
10012 if (s == PL_bufend)
10013 Perl_croak(aTHX_ "Unterminated delimiter for here document");
10019 /* <<\FOO is equivalent to <<'FOO' */
10023 if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
10024 Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
10026 while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
10027 peek += UTF ? UTF8SKIP(peek) : 1;
10029 len = (peek - s >= e - d) ? (e - d) : (peek - s);
10030 Copy(s, d, len, char);
10034 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
10035 Perl_croak(aTHX_ "Delimiter for here document is too long");
10038 len = d - PL_tokenbuf;
10040 #ifndef PERL_STRICT_CR
10041 d = (char *) memchr(s, '\r', PL_bufend - s);
10043 char * const olds = s;
10045 while (s < PL_bufend) {
10051 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
10060 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10065 tmpstr = newSV_type(SVt_PVIV);
10066 SvGROW(tmpstr, 80);
10067 if (term == '\'') {
10068 op_type = OP_CONST;
10069 SvIV_set(tmpstr, -1);
10071 else if (term == '`') {
10072 op_type = OP_BACKTICK;
10073 SvIV_set(tmpstr, '\\');
10076 PL_multi_start = origline + 1 + PL_parser->herelines;
10077 PL_multi_open = PL_multi_close = '<';
10078 /* inside a string eval or quote-like operator */
10079 if (!infile || PL_lex_inwhat) {
10082 char * const olds = s;
10083 PERL_CONTEXT * const cx = CX_CUR();
10084 /* These two fields are not set until an inner lexing scope is
10085 entered. But we need them set here. */
10086 shared->ls_bufptr = s;
10087 shared->ls_linestr = PL_linestr;
10089 /* Look for a newline. If the current buffer does not have one,
10090 peek into the line buffer of the parent lexing scope, going
10091 up as many levels as necessary to find one with a newline
10094 while (!(s = (char *)memchr(
10095 (void *)shared->ls_bufptr, '\n',
10096 SvEND(shared->ls_linestr)-shared->ls_bufptr
10098 shared = shared->ls_prev;
10099 /* shared is only null if we have gone beyond the outermost
10100 lexing scope. In a file, we will have broken out of the
10101 loop in the previous iteration. In an eval, the string buf-
10102 fer ends with "\n;", so the while condition above will have
10103 evaluated to false. So shared can never be null. Or so you
10104 might think. Odd syntax errors like s;@{<<; can gobble up
10105 the implicit semicolon at the end of a flie, causing the
10106 file handle to be closed even when we are not in a string
10107 eval. So shared may be null in that case.
10108 (Closing '}' here to balance the earlier open brace for
10109 editors that look for matched pairs.) */
10110 if (UNLIKELY(!shared))
10112 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10113 most lexing scope. In a file, shared->ls_linestr at that
10114 level is just one line, so there is no body to steal. */
10115 if (infile && !shared->ls_prev) {
10120 else { /* eval or we've already hit EOF */
10121 s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10125 linestr = shared->ls_linestr;
10126 bufend = SvEND(linestr);
10131 while (s < bufend - len + 1) {
10133 ++PL_parser->herelines;
10135 if (memEQ(s, PL_tokenbuf + 1, len - 1)) {
10139 /* Only valid if it's preceded by whitespace only */
10140 while (backup != myolds && --backup >= myolds) {
10141 if (! SPACE_OR_TAB(*backup)) {
10148 /* No whitespace or all! */
10149 if (backup == s || *backup == '\n') {
10150 Newx(indent, indent_len + 1, char);
10151 memcpy(indent, backup + 1, indent_len);
10152 indent[indent_len] = 0;
10153 s--; /* before our delimiter */
10154 PL_parser->herelines--; /* this line doesn't count */
10160 while (s < bufend - len + 1
10161 && memNE(s,PL_tokenbuf,len) )
10164 ++PL_parser->herelines;
10168 if (s >= bufend - len + 1) {
10171 sv_setpvn(tmpstr,d+1,s-d);
10173 /* the preceding stmt passes a newline */
10174 PL_parser->herelines++;
10176 /* s now points to the newline after the heredoc terminator.
10177 d points to the newline before the body of the heredoc.
10180 /* We are going to modify linestr in place here, so set
10181 aside copies of the string if necessary for re-evals or
10183 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10184 check shared->re_eval_str. */
10185 if (shared->re_eval_start || shared->re_eval_str) {
10186 /* Set aside the rest of the regexp */
10187 if (!shared->re_eval_str)
10188 shared->re_eval_str =
10189 newSVpvn(shared->re_eval_start,
10190 bufend - shared->re_eval_start);
10191 shared->re_eval_start -= s-d;
10193 if (cxstack_ix >= 0
10194 && CxTYPE(cx) == CXt_EVAL
10195 && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
10196 && cx->blk_eval.cur_text == linestr)
10198 cx->blk_eval.cur_text = newSVsv(linestr);
10199 cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
10201 /* Copy everything from s onwards back to d. */
10202 Move(s,d,bufend-s + 1,char);
10203 SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10204 /* Setting PL_bufend only applies when we have not dug deeper
10205 into other scopes, because sublex_done sets PL_bufend to
10206 SvEND(PL_linestr). */
10207 if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
10213 char *oldbufptr_save;
10214 char *oldoldbufptr_save;
10216 SvPVCLEAR(tmpstr); /* avoid "uninitialized" warning */
10217 term = PL_tokenbuf[1];
10219 linestr_save = PL_linestr; /* must restore this afterwards */
10220 d = s; /* and this */
10221 oldbufptr_save = PL_oldbufptr;
10222 oldoldbufptr_save = PL_oldoldbufptr;
10223 PL_linestr = newSVpvs("");
10224 PL_bufend = SvPVX(PL_linestr);
10226 PL_bufptr = PL_bufend;
10227 CopLINE_set(PL_curcop,
10228 origline + 1 + PL_parser->herelines);
10229 if (!lex_next_chunk(LEX_NO_TERM)
10230 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
10231 /* Simply freeing linestr_save might seem simpler here, as it
10232 does not matter what PL_linestr points to, since we are
10233 about to croak; but in a quote-like op, linestr_save
10234 will have been prospectively freed already, via
10235 SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
10236 restore PL_linestr. */
10237 SvREFCNT_dec_NN(PL_linestr);
10238 PL_linestr = linestr_save;
10239 PL_oldbufptr = oldbufptr_save;
10240 PL_oldoldbufptr = oldoldbufptr_save;
10243 CopLINE_set(PL_curcop, origline);
10244 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10245 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10246 /* ^That should be enough to avoid this needing to grow: */
10247 sv_catpvs(PL_linestr, "\n\0");
10248 assert(s == SvPVX(PL_linestr));
10249 PL_bufend = SvEND(PL_linestr);
10252 PL_parser->herelines++;
10253 PL_last_lop = PL_last_uni = NULL;
10254 #ifndef PERL_STRICT_CR
10255 if (PL_bufend - PL_linestart >= 2) {
10256 if ( (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
10257 || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10259 PL_bufend[-2] = '\n';
10261 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10263 else if (PL_bufend[-1] == '\r')
10264 PL_bufend[-1] = '\n';
10266 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10267 PL_bufend[-1] = '\n';
10269 if (indented && (PL_bufend-s) >= len) {
10270 char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
10273 char *backup = found;
10276 /* Only valid if it's preceded by whitespace only */
10277 while (backup != s && --backup >= s) {
10278 if (! SPACE_OR_TAB(*backup)) {
10284 /* All whitespace or none! */
10285 if (backup == found || SPACE_OR_TAB(*backup)) {
10286 Newx(indent, indent_len + 1, char);
10287 memcpy(indent, backup, indent_len);
10288 indent[indent_len] = 0;
10289 SvREFCNT_dec(PL_linestr);
10290 PL_linestr = linestr_save;
10291 PL_linestart = SvPVX(linestr_save);
10292 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10293 PL_oldbufptr = oldbufptr_save;
10294 PL_oldoldbufptr = oldoldbufptr_save;
10300 /* Didn't find it */
10301 sv_catsv(tmpstr,PL_linestr);
10303 if (*s == term && PL_bufend-s >= len
10304 && memEQ(s,PL_tokenbuf + 1,len))
10306 SvREFCNT_dec(PL_linestr);
10307 PL_linestr = linestr_save;
10308 PL_linestart = SvPVX(linestr_save);
10309 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10310 PL_oldbufptr = oldbufptr_save;
10311 PL_oldoldbufptr = oldoldbufptr_save;
10315 sv_catsv(tmpstr,PL_linestr);
10320 PL_multi_end = origline + PL_parser->herelines;
10321 if (indented && indent) {
10322 STRLEN linecount = 1;
10323 STRLEN herelen = SvCUR(tmpstr);
10324 char *ss = SvPVX(tmpstr);
10325 char *se = ss + herelen;
10326 SV *newstr = newSV(herelen+1);
10329 /* Trim leading whitespace */
10331 /* newline only? Copy and move on */
10333 sv_catpvs(newstr,"\n");
10337 /* Found our indentation? Strip it */
10338 } else if (se - ss >= indent_len
10339 && memEQ(ss, indent, indent_len))
10345 while ((ss + le) < se && *(ss + le) != '\n')
10348 sv_catpvn(newstr, ss, le);
10352 /* Line doesn't begin with our indentation? Croak */
10355 "Indentation on line %d of here-doc doesn't match delimiter",
10360 /* avoid sv_setsv() as we dont wan't to COW here */
10361 sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
10363 SvREFCNT_dec_NN(newstr);
10365 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10366 SvPV_shrink_to_cur(tmpstr);
10369 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10372 PL_lex_stuff = tmpstr;
10373 pl_yylval.ival = op_type;
10377 SvREFCNT_dec(tmpstr);
10378 CopLINE_set(PL_curcop, origline);
10379 missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1);
10382 /* scan_inputsymbol
10383 takes: position of first '<' in input buffer
10384 returns: position of first char following the matching '>' in
10386 side-effects: pl_yylval and lex_op are set.
10391 <<>> read from ARGV without magic open
10392 <FH> read from filehandle
10393 <pkg::FH> read from package qualified filehandle
10394 <pkg'FH> read from package qualified filehandle
10395 <$fh> read from filehandle in $fh
10396 <*.h> filename glob
10401 S_scan_inputsymbol(pTHX_ char *start)
10403 char *s = start; /* current position in buffer */
10406 bool nomagicopen = FALSE;
10407 char *d = PL_tokenbuf; /* start of temp holding space */
10408 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
10410 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10412 end = (char *) memchr(s, '\n', PL_bufend - s);
10415 if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
10416 nomagicopen = TRUE;
10422 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
10424 /* die if we didn't have space for the contents of the <>,
10425 or if it didn't end, or if we see a newline
10428 if (len >= (I32)sizeof PL_tokenbuf)
10429 Perl_croak(aTHX_ "Excessively long <> operator");
10431 Perl_croak(aTHX_ "Unterminated <> operator");
10436 Remember, only scalar variables are interpreted as filehandles by
10437 this code. Anything more complex (e.g., <$fh{$num}>) will be
10438 treated as a glob() call.
10439 This code makes use of the fact that except for the $ at the front,
10440 a scalar variable and a filehandle look the same.
10442 if (*d == '$' && d[1]) d++;
10444 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10445 while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') {
10446 d += UTF ? UTF8SKIP(d) : 1;
10449 /* If we've tried to read what we allow filehandles to look like, and
10450 there's still text left, then it must be a glob() and not a getline.
10451 Use scan_str to pull out the stuff between the <> and treat it
10452 as nothing more than a string.
10455 if (d - PL_tokenbuf != len) {
10456 pl_yylval.ival = OP_GLOB;
10457 s = scan_str(start,FALSE,FALSE,FALSE,NULL);
10459 Perl_croak(aTHX_ "Glob not terminated");
10463 bool readline_overriden = FALSE;
10465 /* we're in a filehandle read situation */
10468 /* turn <> into <ARGV> */
10470 Copy("ARGV",d,5,char);
10472 /* Check whether readline() is overriden */
10473 if ((gv_readline = gv_override("readline",8)))
10474 readline_overriden = TRUE;
10476 /* if <$fh>, create the ops to turn the variable into a
10480 /* try to find it in the pad for this block, otherwise find
10481 add symbol table ops
10483 const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
10484 if (tmp != NOT_IN_PAD) {
10485 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10486 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10487 HEK * const stashname = HvNAME_HEK(stash);
10488 SV * const sym = sv_2mortal(newSVhek(stashname));
10489 sv_catpvs(sym, "::");
10490 sv_catpv(sym, d+1);
10495 OP * const o = newOP(OP_PADSV, 0);
10497 PL_lex_op = readline_overriden
10498 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
10499 op_append_elem(OP_LIST, o,
10500 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10501 : newUNOP(OP_READLINE, 0, o);
10509 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
10511 PL_lex_op = readline_overriden
10512 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
10513 op_append_elem(OP_LIST,
10514 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10515 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10516 : newUNOP(OP_READLINE, 0,
10517 newUNOP(OP_RV2SV, 0,
10518 newGVOP(OP_GV, 0, gv)));
10520 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10521 pl_yylval.ival = OP_NULL;
10524 /* If it's none of the above, it must be a literal filehandle
10525 (<Foo::BAR> or <FOO>) so build a simple readline OP */
10527 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
10528 PL_lex_op = readline_overriden
10529 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
10530 op_append_elem(OP_LIST,
10531 newGVOP(OP_GV, 0, gv),
10532 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10533 : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
10534 pl_yylval.ival = OP_NULL;
10544 start position in buffer
10545 keep_bracketed_quoted preserve \ quoting of embedded delimiters, but
10546 only if they are of the open/close form
10547 keep_delims preserve the delimiters around the string
10548 re_reparse compiling a run-time /(?{})/:
10549 collapse // to /, and skip encoding src
10550 delimp if non-null, this is set to the position of
10551 the closing delimiter, or just after it if
10552 the closing and opening delimiters differ
10553 (i.e., the opening delimiter of a substitu-
10555 returns: position to continue reading from buffer
10556 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10557 updates the read buffer.
10559 This subroutine pulls a string out of the input. It is called for:
10560 q single quotes q(literal text)
10561 ' single quotes 'literal text'
10562 qq double quotes qq(interpolate $here please)
10563 " double quotes "interpolate $here please"
10564 qx backticks qx(/bin/ls -l)
10565 ` backticks `/bin/ls -l`
10566 qw quote words @EXPORT_OK = qw( func() $spam )
10567 m// regexp match m/this/
10568 s/// regexp substitute s/this/that/
10569 tr/// string transliterate tr/this/that/
10570 y/// string transliterate y/this/that/
10571 ($*@) sub prototypes sub foo ($)
10572 (stuff) sub attr parameters sub foo : attr(stuff)
10573 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
10575 In most of these cases (all but <>, patterns and transliterate)
10576 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
10577 calls scan_str(). s/// makes yylex() call scan_subst() which calls
10578 scan_str(). tr/// and y/// make yylex() call scan_trans() which
10581 It skips whitespace before the string starts, and treats the first
10582 character as the delimiter. If the delimiter is one of ([{< then
10583 the corresponding "close" character )]}> is used as the closing
10584 delimiter. It allows quoting of delimiters, and if the string has
10585 balanced delimiters ([{<>}]) it allows nesting.
10587 On success, the SV with the resulting string is put into lex_stuff or,
10588 if that is already non-NULL, into lex_repl. The second case occurs only
10589 when parsing the RHS of the special constructs s/// and tr/// (y///).
10590 For convenience, the terminating delimiter character is stuffed into
10595 S_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
10599 SV *sv; /* scalar value: string */
10600 const char *tmps; /* temp string, used for delimiter matching */
10601 char *s = start; /* current position in the buffer */
10602 char term; /* terminating character */
10603 char *to; /* current position in the sv's data */
10604 I32 brackets = 1; /* bracket nesting level */
10605 bool has_utf8 = FALSE; /* is there any utf8 content? */
10606 IV termcode; /* terminating char. code */
10607 U8 termstr[UTF8_MAXBYTES+1]; /* terminating string */
10608 STRLEN termlen; /* length of terminating string */
10611 /* The delimiters that have a mirror-image closing one */
10612 const char * opening_delims = "([{<";
10613 const char * closing_delims = ")]}>";
10615 /* The only non-UTF character that isn't a stand alone grapheme is
10616 * white-space, hence can't be a delimiter. */
10617 const char * non_grapheme_msg = "Use of unassigned code point or"
10618 " non-standalone grapheme for a delimiter"
10620 PERL_ARGS_ASSERT_SCAN_STR;
10622 /* skip space before the delimiter */
10627 /* mark where we are, in case we need to report errors */
10630 /* after skipping whitespace, the next character is the terminator */
10632 if (!UTF || UTF8_IS_INVARIANT(term)) {
10633 termcode = termstr[0] = term;
10637 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
10638 if (UTF && UNLIKELY(! _is_grapheme((U8 *) start,
10643 yyerror(non_grapheme_msg);
10646 Copy(s, termstr, termlen, U8);
10649 /* mark where we are */
10650 PL_multi_start = CopLINE(PL_curcop);
10651 PL_multi_open = termcode;
10652 herelines = PL_parser->herelines;
10654 /* If the delimiter has a mirror-image closing one, get it */
10655 if (term && (tmps = strchr(opening_delims, term))) {
10656 termcode = termstr[0] = term = closing_delims[tmps - opening_delims];
10659 PL_multi_close = termcode;
10661 if (PL_multi_open == PL_multi_close) {
10662 keep_bracketed_quoted = FALSE;
10665 /* create a new SV to hold the contents. 79 is the SV's initial length.
10666 What a random number. */
10667 sv = newSV_type(SVt_PVIV);
10669 SvIV_set(sv, termcode);
10670 (void)SvPOK_only(sv); /* validate pointer */
10672 /* move past delimiter and try to read a complete string */
10674 sv_catpvn(sv, s, termlen);
10677 /* extend sv if need be */
10678 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
10679 /* set 'to' to the next character in the sv's string */
10680 to = SvPVX(sv)+SvCUR(sv);
10682 /* if open delimiter is the close delimiter read unbridle */
10683 if (PL_multi_open == PL_multi_close) {
10684 for (; s < PL_bufend; s++,to++) {
10685 /* embedded newlines increment the current line number */
10686 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10687 COPLINE_INC_WITH_HERELINES;
10688 /* handle quoted delimiters */
10689 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
10690 if (!keep_bracketed_quoted
10692 || (re_reparse && s[1] == '\\'))
10695 else /* any other quotes are simply copied straight through */
10698 /* terminate when run out of buffer (the for() condition), or
10699 have found the terminator */
10700 else if (*s == term) { /* First byte of terminator matches */
10701 if (termlen == 1) /* If is the only byte, are done */
10704 /* If the remainder of the terminator matches, also are
10705 * done, after checking that is a separate grapheme */
10706 if ( s + termlen <= PL_bufend
10707 && memEQ(s + 1, (char*)termstr + 1, termlen - 1))
10710 && UNLIKELY(! _is_grapheme((U8 *) start,
10715 yyerror(non_grapheme_msg);
10720 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) {
10728 /* if the terminator isn't the same as the start character (e.g.,
10729 matched brackets), we have to allow more in the quoting, and
10730 be prepared for nested brackets.
10733 /* read until we run out of string, or we find the terminator */
10734 for (; s < PL_bufend; s++,to++) {
10735 /* embedded newlines increment the line count */
10736 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
10737 COPLINE_INC_WITH_HERELINES;
10738 /* backslashes can escape the open or closing characters */
10739 if (*s == '\\' && s+1 < PL_bufend) {
10740 if (!keep_bracketed_quoted
10741 && ( ((UV)s[1] == PL_multi_open)
10742 || ((UV)s[1] == PL_multi_close) ))
10749 /* allow nested opens and closes */
10750 else if ((UV)*s == PL_multi_close && --brackets <= 0)
10752 else if ((UV)*s == PL_multi_open)
10754 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
10759 /* terminate the copied string and update the sv's end-of-string */
10761 SvCUR_set(sv, to - SvPVX_const(sv));
10764 * this next chunk reads more into the buffer if we're not done yet
10768 break; /* handle case where we are done yet :-) */
10770 #ifndef PERL_STRICT_CR
10771 if (to - SvPVX_const(sv) >= 2) {
10772 if ( (to[-2] == '\r' && to[-1] == '\n')
10773 || (to[-2] == '\n' && to[-1] == '\r'))
10777 SvCUR_set(sv, to - SvPVX_const(sv));
10779 else if (to[-1] == '\r')
10782 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
10786 /* if we're out of file, or a read fails, bail and reset the current
10787 line marker so we can report where the unterminated string began
10789 COPLINE_INC_WITH_HERELINES;
10790 PL_bufptr = PL_bufend;
10791 if (!lex_next_chunk(0)) {
10793 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
10796 s = start = PL_bufptr;
10799 /* at this point, we have successfully read the delimited string */
10802 sv_catpvn(sv, s, termlen);
10808 PL_multi_end = CopLINE(PL_curcop);
10809 CopLINE_set(PL_curcop, PL_multi_start);
10810 PL_parser->herelines = herelines;
10812 /* if we allocated too much space, give some back */
10813 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10814 SvLEN_set(sv, SvCUR(sv) + 1);
10815 SvPV_renew(sv, SvLEN(sv));
10818 /* decide whether this is the first or second quoted string we've read
10823 PL_parser->lex_sub_repl = sv;
10826 if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
10832 takes: pointer to position in buffer
10833 returns: pointer to new position in buffer
10834 side-effects: builds ops for the constant in pl_yylval.op
10836 Read a number in any of the formats that Perl accepts:
10838 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10839 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
10840 0b[01](_?[01])* binary integers
10841 0[0-7](_?[0-7])* octal integers
10842 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* hexadecimal integers
10843 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+ hexadecimal floats
10845 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
10848 If it reads a number without a decimal point or an exponent, it will
10849 try converting the number to an integer and see if it can do so
10850 without loss of precision.
10854 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
10856 const char *s = start; /* current position in buffer */
10857 char *d; /* destination in temp buffer */
10858 char *e; /* end of temp buffer */
10859 NV nv; /* number read, as a double */
10860 SV *sv = NULL; /* place to put the converted number */
10861 bool floatit; /* boolean: int or float? */
10862 const char *lastub = NULL; /* position of last underbar */
10863 static const char* const number_too_long = "Number too long";
10864 bool warned_about_underscore = 0;
10865 #define WARN_ABOUT_UNDERSCORE() \
10867 if (!warned_about_underscore) { \
10868 warned_about_underscore = 1; \
10869 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \
10870 "Misplaced _ in number"); \
10873 /* Hexadecimal floating point.
10875 * In many places (where we have quads and NV is IEEE 754 double)
10876 * we can fit the mantissa bits of a NV into an unsigned quad.
10877 * (Note that UVs might not be quads even when we have quads.)
10878 * This will not work everywhere, though (either no quads, or
10879 * using long doubles), in which case we have to resort to NV,
10880 * which will probably mean horrible loss of precision due to
10881 * multiple fp operations. */
10882 bool hexfp = FALSE;
10883 int total_bits = 0;
10884 int significant_bits = 0;
10885 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
10886 # define HEXFP_UQUAD
10887 Uquad_t hexfp_uquad = 0;
10888 int hexfp_frac_bits = 0;
10893 NV hexfp_mult = 1.0;
10894 UV high_non_zero = 0; /* highest digit */
10895 int non_zero_integer_digits = 0;
10897 PERL_ARGS_ASSERT_SCAN_NUM;
10899 /* We use the first character to decide what type of number this is */
10903 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
10905 /* if it starts with a 0, it could be an octal number, a decimal in
10906 0.13 disguise, or a hexadecimal number, or a binary number. */
10910 u holds the "number so far"
10911 shift the power of 2 of the base
10912 (hex == 4, octal == 3, binary == 1)
10913 overflowed was the number more than we can hold?
10915 Shift is used when we add a digit. It also serves as an "are
10916 we in octal/hex/binary?" indicator to disallow hex characters
10917 when in octal mode.
10922 bool overflowed = FALSE;
10923 bool just_zero = TRUE; /* just plain 0 or binary number? */
10924 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10925 static const char* const bases[5] =
10926 { "", "binary", "", "octal", "hexadecimal" };
10927 static const char* const Bases[5] =
10928 { "", "Binary", "", "Octal", "Hexadecimal" };
10929 static const char* const maxima[5] =
10931 "0b11111111111111111111111111111111",
10935 const char *base, *Base, *max;
10937 /* check for hex */
10938 if (isALPHA_FOLD_EQ(s[1], 'x')) {
10942 } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
10947 /* check for a decimal in disguise */
10948 else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
10950 /* so it must be octal */
10957 WARN_ABOUT_UNDERSCORE();
10961 base = bases[shift];
10962 Base = Bases[shift];
10963 max = maxima[shift];
10965 /* read the rest of the number */
10967 /* x is used in the overflow test,
10968 b is the digit we're adding on. */
10973 /* if we don't mention it, we're done */
10977 /* _ are ignored -- but warned about if consecutive */
10979 if (lastub && s == lastub + 1)
10980 WARN_ABOUT_UNDERSCORE();
10984 /* 8 and 9 are not octal */
10985 case '8': case '9':
10987 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
10991 case '2': case '3': case '4':
10992 case '5': case '6': case '7':
10994 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
10997 case '0': case '1':
10998 b = *s++ & 15; /* ASCII digit -> value of digit */
11002 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11003 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11004 /* make sure they said 0x */
11007 b = (*s++ & 7) + 9;
11009 /* Prepare to put the digit we have onto the end
11010 of the number so far. We check for overflows.
11016 assert(shift >= 0);
11017 x = u << shift; /* make room for the digit */
11019 total_bits += shift;
11021 if ((x >> shift) != u
11022 && !(PL_hints & HINT_NEW_BINARY)) {
11025 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11026 "Integer overflow in %s number",
11029 u = x | b; /* add the digit to the end */
11032 n *= nvshift[shift];
11033 /* If an NV has not enough bits in its
11034 * mantissa to represent an UV this summing of
11035 * small low-order numbers is a waste of time
11036 * (because the NV cannot preserve the
11037 * low-order bits anyway): we could just
11038 * remember when did we overflow and in the
11039 * end just multiply n by the right
11044 if (high_non_zero == 0 && b > 0)
11048 non_zero_integer_digits++;
11050 /* this could be hexfp, but peek ahead
11051 * to avoid matching ".." */
11052 if (UNLIKELY(HEXFP_PEEK(s))) {
11060 /* if we get here, we had success: make a scalar value from
11065 /* final misplaced underbar check */
11067 WARN_ABOUT_UNDERSCORE();
11069 if (UNLIKELY(HEXFP_PEEK(s))) {
11070 /* Do sloppy (on the underbars) but quick detection
11071 * (and value construction) for hexfp, the decimal
11072 * detection will shortly be more thorough with the
11073 * underbar checks. */
11075 significant_bits = non_zero_integer_digits * shift;
11078 #else /* HEXFP_NV */
11081 /* Ignore the leading zero bits of
11082 * the high (first) non-zero digit. */
11083 if (high_non_zero) {
11084 if (high_non_zero < 0x8)
11085 significant_bits--;
11086 if (high_non_zero < 0x4)
11087 significant_bits--;
11088 if (high_non_zero < 0x2)
11089 significant_bits--;
11096 bool accumulate = TRUE;
11098 int lim = 1 << shift;
11099 for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) ||
11101 if (isXDIGIT(*h)) {
11102 significant_bits += shift;
11105 if (significant_bits < NV_MANT_DIG) {
11106 /* We are in the long "run" of xdigits,
11107 * accumulate the full four bits. */
11108 assert(shift >= 0);
11109 hexfp_uquad <<= shift;
11111 hexfp_frac_bits += shift;
11112 } else if (significant_bits - shift < NV_MANT_DIG) {
11113 /* We are at a hexdigit either at,
11114 * or straddling, the edge of mantissa.
11115 * We will try grabbing as many as
11116 * possible bits. */
11118 significant_bits - NV_MANT_DIG;
11122 hexfp_uquad <<= tail;
11123 assert((shift - tail) >= 0);
11124 hexfp_uquad |= b >> (shift - tail);
11125 hexfp_frac_bits += tail;
11127 /* Ignore the trailing zero bits
11128 * of the last non-zero xdigit.
11130 * The assumption here is that if
11131 * one has input of e.g. the xdigit
11132 * eight (0x8), there is only one
11133 * bit being input, not the full
11134 * four bits. Conversely, if one
11135 * specifies a zero xdigit, the
11136 * assumption is that one really
11137 * wants all those bits to be zero. */
11139 if ((b & 0x1) == 0x0) {
11140 significant_bits--;
11141 if ((b & 0x2) == 0x0) {
11142 significant_bits--;
11143 if ((b & 0x4) == 0x0) {
11144 significant_bits--;
11150 accumulate = FALSE;
11153 /* Keep skipping the xdigits, and
11154 * accumulating the significant bits,
11155 * but do not shift the uquad
11156 * (which would catastrophically drop
11157 * high-order bits) or accumulate the
11158 * xdigits anymore. */
11160 #else /* HEXFP_NV */
11162 nv_mult /= nvshift[shift];
11164 hexfp_nv += b * nv_mult;
11166 accumulate = FALSE;
11170 if (significant_bits >= NV_MANT_DIG)
11171 accumulate = FALSE;
11175 if ((total_bits > 0 || significant_bits > 0) &&
11176 isALPHA_FOLD_EQ(*h, 'p')) {
11177 bool negexp = FALSE;
11181 else if (*h == '-') {
11187 while (isDIGIT(*h) || *h == '_') {
11190 hexfp_exp += *h - '0';
11193 && -hexfp_exp < NV_MIN_EXP - 1) {
11194 /* NOTE: this means that the exponent
11195 * underflow warning happens for
11196 * the IEEE 754 subnormals (denormals),
11197 * because DBL_MIN_EXP etc are the lowest
11198 * possible binary (or, rather, DBL_RADIX-base)
11199 * exponent for normals, not subnormals.
11201 * This may or may not be a good thing. */
11202 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11203 "Hexadecimal float: exponent underflow");
11209 && hexfp_exp > NV_MAX_EXP - 1) {
11210 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11211 "Hexadecimal float: exponent overflow");
11219 hexfp_exp = -hexfp_exp;
11221 hexfp_exp -= hexfp_frac_bits;
11223 hexfp_mult = Perl_pow(2.0, hexfp_exp);
11231 if (n > 4294967295.0)
11232 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11233 "%s number > %s non-portable",
11239 if (u > 0xffffffff)
11240 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11241 "%s number > %s non-portable",
11246 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
11247 sv = new_constant(start, s - start, "integer",
11248 sv, NULL, NULL, 0);
11249 else if (PL_hints & HINT_NEW_BINARY)
11250 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
11255 handle decimal numbers.
11256 we're also sent here when we read a 0 as the first digit
11258 case '1': case '2': case '3': case '4': case '5':
11259 case '6': case '7': case '8': case '9': case '.':
11262 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
11271 /* read next group of digits and _ and copy into d */
11274 || UNLIKELY(hexfp && isXDIGIT(*s)))
11276 /* skip underscores, checking for misplaced ones
11280 if (lastub && s == lastub + 1)
11281 WARN_ABOUT_UNDERSCORE();
11285 /* check for end of fixed-length buffer */
11287 Perl_croak(aTHX_ "%s", number_too_long);
11288 /* if we're ok, copy the character */
11293 /* final misplaced underbar check */
11294 if (lastub && s == lastub + 1)
11295 WARN_ABOUT_UNDERSCORE();
11297 /* read a decimal portion if there is one. avoid
11298 3..5 being interpreted as the number 3. followed
11301 if (*s == '.' && s[1] != '.') {
11306 WARN_ABOUT_UNDERSCORE();
11310 /* copy, ignoring underbars, until we run out of digits.
11314 || UNLIKELY(hexfp && isXDIGIT(*s));
11317 /* fixed length buffer check */
11319 Perl_croak(aTHX_ "%s", number_too_long);
11321 if (lastub && s == lastub + 1)
11322 WARN_ABOUT_UNDERSCORE();
11328 /* fractional part ending in underbar? */
11330 WARN_ABOUT_UNDERSCORE();
11331 if (*s == '.' && isDIGIT(s[1])) {
11332 /* oops, it's really a v-string, but without the "v" */
11338 /* read exponent part, if present */
11339 if ((isALPHA_FOLD_EQ(*s, 'e')
11340 || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
11341 && strchr("+-0123456789_", s[1]))
11343 int exp_digits = 0;
11344 const char *save_s = s;
11347 /* regardless of whether user said 3E5 or 3e5, use lower 'e',
11348 ditto for p (hexfloats) */
11349 if ((isALPHA_FOLD_EQ(*s, 'e'))) {
11350 /* At least some Mach atof()s don't grok 'E' */
11353 else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
11360 /* stray preinitial _ */
11362 WARN_ABOUT_UNDERSCORE();
11366 /* allow positive or negative exponent */
11367 if (*s == '+' || *s == '-')
11370 /* stray initial _ */
11372 WARN_ABOUT_UNDERSCORE();
11376 /* read digits of exponent */
11377 while (isDIGIT(*s) || *s == '_') {
11381 Perl_croak(aTHX_ "%s", number_too_long);
11385 if (((lastub && s == lastub + 1)
11386 || (!isDIGIT(s[1]) && s[1] != '_')))
11387 WARN_ABOUT_UNDERSCORE();
11393 /* no exponent digits, the [eEpP] could be for something else,
11394 * though in practice we don't get here for p since that's preparsed
11395 * earlier, and results in only the 0xX being consumed, so behave similarly
11396 * for decimal floats and consume only the D.DD, leaving the [eE] to the
11409 We try to do an integer conversion first if no characters
11410 indicating "float" have been found.
11415 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
11417 if (flags == IS_NUMBER_IN_UV) {
11419 sv = newSViv(uv); /* Prefer IVs over UVs. */
11422 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11423 if (uv <= (UV) IV_MIN)
11424 sv = newSViv(-(IV)uv);
11431 /* terminate the string */
11433 if (UNLIKELY(hexfp)) {
11434 # ifdef NV_MANT_DIG
11435 if (significant_bits > NV_MANT_DIG)
11436 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11437 "Hexadecimal float: mantissa overflow");
11440 nv = hexfp_uquad * hexfp_mult;
11441 #else /* HEXFP_NV */
11442 nv = hexfp_nv * hexfp_mult;
11445 nv = Atof(PL_tokenbuf);
11451 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
11452 const char *const key = floatit ? "float" : "integer";
11453 const STRLEN keylen = floatit ? 5 : 7;
11454 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
11455 key, keylen, sv, NULL, NULL, 0);
11459 /* if it starts with a v, it could be a v-string */
11462 sv = newSV(5); /* preallocate storage space */
11463 ENTER_with_name("scan_vstring");
11465 s = scan_vstring(s, PL_bufend, sv);
11466 SvREFCNT_inc_simple_void_NN(sv);
11467 LEAVE_with_name("scan_vstring");
11471 /* make the op for the constant and return */
11474 lvalp->opval = newSVOP(OP_CONST, 0, sv);
11476 lvalp->opval = NULL;
11482 S_scan_formline(pTHX_ char *s)
11484 SV * const stuff = newSVpvs("");
11485 bool needargs = FALSE;
11486 bool eofmt = FALSE;
11488 PERL_ARGS_ASSERT_SCAN_FORMLINE;
11490 while (!needargs) {
11494 #ifdef PERL_STRICT_CR
11495 while (SPACE_OR_TAB(*t))
11498 while (SPACE_OR_TAB(*t) || *t == '\r')
11501 if (*t == '\n' || t == PL_bufend) {
11506 eol = (char *) memchr(s,'\n',PL_bufend-s);
11511 for (t = s; t < eol; t++) {
11512 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
11514 goto enough; /* ~~ must be first line in formline */
11516 if (*t == '@' || *t == '^')
11520 sv_catpvn(stuff, s, eol-s);
11521 #ifndef PERL_STRICT_CR
11522 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
11523 char *end = SvPVX(stuff) + SvCUR(stuff);
11526 SvCUR_set(stuff, SvCUR(stuff) - 1);
11534 if ((PL_rsfp || PL_parser->filtered)
11535 && PL_parser->form_lex_state == LEX_NORMAL) {
11537 PL_bufptr = PL_bufend;
11538 COPLINE_INC_WITH_HERELINES;
11539 got_some = lex_next_chunk(0);
11540 CopLINE_dec(PL_curcop);
11545 incline(s, PL_bufend);
11548 if (!SvCUR(stuff) || needargs)
11549 PL_lex_state = PL_parser->form_lex_state;
11550 if (SvCUR(stuff)) {
11551 PL_expect = XSTATE;
11553 const char *s2 = s;
11554 while (isSPACE(*s2) && *s2 != '\n')
11557 PL_expect = XTERMBLOCK;
11558 NEXTVAL_NEXTTOKE.ival = 0;
11561 NEXTVAL_NEXTTOKE.ival = 0;
11562 force_next(FORMLBRACK);
11565 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
11568 NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff);
11572 SvREFCNT_dec(stuff);
11574 PL_lex_formbrack = 0;
11580 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
11582 const I32 oldsavestack_ix = PL_savestack_ix;
11583 CV* const outsidecv = PL_compcv;
11585 SAVEI32(PL_subline);
11586 save_item(PL_subname);
11587 SAVESPTR(PL_compcv);
11589 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
11590 CvFLAGS(PL_compcv) |= flags;
11592 PL_subline = CopLINE(PL_curcop);
11593 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
11594 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
11595 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
11596 if (outsidecv && CvPADLIST(outsidecv))
11597 CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
11599 return oldsavestack_ix;
11603 /* Do extra initialisation of a CV (typically one just created by
11604 * start_subparse()) if that CV is for a named sub
11608 Perl_init_named_cv(pTHX_ CV *cv, OP *nameop)
11610 PERL_ARGS_ASSERT_INIT_NAMED_CV;
11612 if (nameop->op_type == OP_CONST) {
11613 const char *const name = SvPV_nolen_const(((SVOP*)nameop)->op_sv);
11614 if ( strEQ(name, "BEGIN")
11615 || strEQ(name, "END")
11616 || strEQ(name, "INIT")
11617 || strEQ(name, "CHECK")
11618 || strEQ(name, "UNITCHECK")
11623 /* State subs inside anonymous subs need to be
11624 clonable themselves. */
11625 if ( CvANON(CvOUTSIDE(cv))
11626 || CvCLONE(CvOUTSIDE(cv))
11627 || !PadnameIsSTATE(PadlistNAMESARRAY(CvPADLIST(
11629 ))[nameop->op_targ])
11636 S_yywarn(pTHX_ const char *const s, U32 flags)
11638 PERL_ARGS_ASSERT_YYWARN;
11640 PL_in_eval |= EVAL_WARNONLY;
11641 yyerror_pv(s, flags);
11646 Perl_abort_execution(pTHX_ const char * const msg, const char * const name)
11648 PERL_ARGS_ASSERT_ABORT_EXECUTION;
11651 Perl_croak(aTHX_ "%s%s had compilation errors.\n", msg, name);
11654 "%sExecution of %s aborted due to compilation errors.\n", msg, name);
11656 NOT_REACHED; /* NOTREACHED */
11662 /* Called, after at least one error has been found, to abort the parse now,
11663 * instead of trying to forge ahead */
11665 yyerror_pvn(NULL, 0, 0);
11669 Perl_yyerror(pTHX_ const char *const s)
11671 PERL_ARGS_ASSERT_YYERROR;
11672 return yyerror_pvn(s, strlen(s), 0);
11676 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11678 PERL_ARGS_ASSERT_YYERROR_PV;
11679 return yyerror_pvn(s, strlen(s), flags);
11683 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11685 const char *context = NULL;
11688 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
11689 int yychar = PL_parser->yychar;
11691 /* Output error message 's' with length 'len'. 'flags' are SV flags that
11692 * apply. If the number of errors found is large enough, it abandons
11693 * parsing. If 's' is NULL, there is no message, and it abandons
11694 * processing unconditionally */
11697 if (!yychar || (yychar == ';' && !PL_rsfp))
11698 sv_catpvs(where_sv, "at EOF");
11699 else if ( PL_oldoldbufptr
11700 && PL_bufptr > PL_oldoldbufptr
11701 && PL_bufptr - PL_oldoldbufptr < 200
11702 && PL_oldoldbufptr != PL_oldbufptr
11703 && PL_oldbufptr != PL_bufptr)
11707 The code below is removed for NetWare because it
11708 abends/crashes on NetWare when the script has error such as
11709 not having the closing quotes like:
11710 if ($var eq "value)
11711 Checking of white spaces is anyway done in NetWare code.
11714 while (isSPACE(*PL_oldoldbufptr))
11717 context = PL_oldoldbufptr;
11718 contlen = PL_bufptr - PL_oldoldbufptr;
11720 else if ( PL_oldbufptr
11721 && PL_bufptr > PL_oldbufptr
11722 && PL_bufptr - PL_oldbufptr < 200
11723 && PL_oldbufptr != PL_bufptr) {
11726 The code below is removed for NetWare because it
11727 abends/crashes on NetWare when the script has error such as
11728 not having the closing quotes like:
11729 if ($var eq "value)
11730 Checking of white spaces is anyway done in NetWare code.
11733 while (isSPACE(*PL_oldbufptr))
11736 context = PL_oldbufptr;
11737 contlen = PL_bufptr - PL_oldbufptr;
11739 else if (yychar > 255)
11740 sv_catpvs(where_sv, "next token ???");
11741 else if (yychar == YYEMPTY) {
11742 if (PL_lex_state == LEX_NORMAL)
11743 sv_catpvs(where_sv, "at end of line");
11744 else if (PL_lex_inpat)
11745 sv_catpvs(where_sv, "within pattern");
11747 sv_catpvs(where_sv, "within string");
11750 sv_catpvs(where_sv, "next char ");
11752 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
11753 else if (isPRINT_LC(yychar)) {
11754 const char string = yychar;
11755 sv_catpvn(where_sv, &string, 1);
11758 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
11760 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
11761 Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ",
11762 OutCopFILE(PL_curcop),
11763 (IV)(PL_parser->preambling == NOLINE
11764 ? CopLINE(PL_curcop)
11765 : PL_parser->preambling));
11767 Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
11768 UTF8fARG(UTF, contlen, context));
11770 Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
11771 if ( PL_multi_start < PL_multi_end
11772 && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1)
11774 Perl_sv_catpvf(aTHX_ msg,
11775 " (Might be a runaway multi-line %c%c string starting on"
11776 " line %" IVdf ")\n",
11777 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
11780 if (PL_in_eval & EVAL_WARNONLY) {
11781 PL_in_eval &= ~EVAL_WARNONLY;
11782 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
11788 if (s == NULL || PL_error_count >= 10) {
11789 const char * msg = "";
11790 const char * const name = OutCopFILE(PL_curcop);
11793 SV * errsv = ERRSV;
11794 if (SvCUR(errsv)) {
11795 msg = Perl_form(aTHX_ "%" SVf, SVfARG(errsv));
11800 abort_execution(msg, name);
11803 Perl_croak(aTHX_ "%s%s has too many errors.\n", msg, name);
11807 PL_in_my_stash = NULL;
11812 S_swallow_bom(pTHX_ U8 *s)
11814 const STRLEN slen = SvCUR(PL_linestr);
11816 PERL_ARGS_ASSERT_SWALLOW_BOM;
11820 if (s[1] == 0xFE) {
11821 /* UTF-16 little-endian? (or UTF-32LE?) */
11822 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
11823 /* diag_listed_as: Unsupported script encoding %s */
11824 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
11825 #ifndef PERL_NO_UTF16_FILTER
11827 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
11830 if (PL_bufend > (char*)s) {
11831 s = add_utf16_textfilter(s, TRUE);
11834 /* diag_listed_as: Unsupported script encoding %s */
11835 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11840 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
11841 #ifndef PERL_NO_UTF16_FILTER
11843 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
11846 if (PL_bufend > (char *)s) {
11847 s = add_utf16_textfilter(s, FALSE);
11850 /* diag_listed_as: Unsupported script encoding %s */
11851 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11855 case BOM_UTF8_FIRST_BYTE: {
11856 if (memBEGINs(s+1, slen - 1, BOM_UTF8_TAIL)) {
11858 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11860 s += sizeof(BOM_UTF8) - 1; /* UTF-8 */
11867 if (s[2] == 0xFE && s[3] == 0xFF) {
11868 /* UTF-32 big-endian */
11869 /* diag_listed_as: Unsupported script encoding %s */
11870 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
11873 else if (s[2] == 0 && s[3] != 0) {
11876 * are a good indicator of UTF-16BE. */
11877 #ifndef PERL_NO_UTF16_FILTER
11879 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
11881 s = add_utf16_textfilter(s, FALSE);
11883 /* diag_listed_as: Unsupported script encoding %s */
11884 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11891 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11894 * are a good indicator of UTF-16LE. */
11895 #ifndef PERL_NO_UTF16_FILTER
11897 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
11899 s = add_utf16_textfilter(s, TRUE);
11901 /* diag_listed_as: Unsupported script encoding %s */
11902 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11910 #ifndef PERL_NO_UTF16_FILTER
11912 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
11914 SV *const filter = FILTER_DATA(idx);
11915 /* We re-use this each time round, throwing the contents away before we
11917 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
11918 SV *const utf8_buffer = filter;
11919 IV status = IoPAGE(filter);
11920 const bool reverse = cBOOL(IoLINES(filter));
11923 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11925 /* As we're automatically added, at the lowest level, and hence only called
11926 from this file, we can be sure that we're not called in block mode. Hence
11927 don't bother writing code to deal with block mode. */
11929 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11932 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
11934 DEBUG_P(PerlIO_printf(Perl_debug_log,
11935 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
11936 FPTR2DPTR(void *, S_utf16_textfilter),
11937 reverse ? 'l' : 'b', idx, maxlen, status,
11938 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11945 /* First, look in our buffer of existing UTF-8 data: */
11946 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11950 } else if (status == 0) {
11952 IoPAGE(filter) = 0;
11953 nl = SvEND(utf8_buffer);
11956 STRLEN got = nl - SvPVX(utf8_buffer);
11957 /* Did we have anything to append? */
11959 sv_catpvn(sv, SvPVX(utf8_buffer), got);
11960 /* Everything else in this code works just fine if SVp_POK isn't
11961 set. This, however, needs it, and we need it to work, else
11962 we loop infinitely because the buffer is never consumed. */
11963 sv_chop(utf8_buffer, nl);
11967 /* OK, not a complete line there, so need to read some more UTF-16.
11968 Read an extra octect if the buffer currently has an odd number. */
11972 if (SvCUR(utf16_buffer) >= 2) {
11973 /* Location of the high octet of the last complete code point.
11974 Gosh, UTF-16 is a pain. All the benefits of variable length,
11975 *coupled* with all the benefits of partial reads and
11977 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11978 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11980 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11984 /* We have the first half of a surrogate. Read more. */
11985 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11988 status = FILTER_READ(idx + 1, utf16_buffer,
11989 160 + (SvCUR(utf16_buffer) & 1));
11990 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
11991 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
11994 IoPAGE(filter) = status;
11999 /* 'chars' isn't quite the right name, as code points above 0xFFFF
12000 * require 4 bytes per char */
12001 chars = SvCUR(utf16_buffer) >> 1;
12002 have = SvCUR(utf8_buffer);
12004 /* Assume the worst case size as noted by the functions: twice the
12005 * number of input bytes */
12006 SvGROW(utf8_buffer, have + chars * 4 + 1);
12009 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
12010 (U8*)SvPVX_const(utf8_buffer) + have,
12011 chars * 2, &newlen);
12013 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
12014 (U8*)SvPVX_const(utf8_buffer) + have,
12015 chars * 2, &newlen);
12017 SvCUR_set(utf8_buffer, have + newlen);
12020 /* No need to keep this SV "well-formed" with a '\0' after the end, as
12021 it's private to us, and utf16_to_utf8{,reversed} take a
12022 (pointer,length) pair, rather than a NUL-terminated string. */
12023 if(SvCUR(utf16_buffer) & 1) {
12024 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
12025 SvCUR_set(utf16_buffer, 1);
12027 SvCUR_set(utf16_buffer, 0);
12030 DEBUG_P(PerlIO_printf(Perl_debug_log,
12031 "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
12033 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12034 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
12039 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
12041 SV *filter = filter_add(S_utf16_textfilter, NULL);
12043 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
12045 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
12047 IoLINES(filter) = reversed;
12048 IoPAGE(filter) = 1; /* Not EOF */
12050 /* Sadly, we have to return a valid pointer, come what may, so we have to
12051 ignore any error return from this. */
12052 SvCUR_set(PL_linestr, 0);
12053 if (FILTER_READ(0, PL_linestr, 0)) {
12054 SvUTF8_on(PL_linestr);
12056 SvUTF8_on(PL_linestr);
12058 PL_bufend = SvEND(PL_linestr);
12059 return (U8*)SvPVX(PL_linestr);
12064 Returns a pointer to the next character after the parsed
12065 vstring, as well as updating the passed in sv.
12067 Function must be called like
12069 sv = sv_2mortal(newSV(5));
12070 s = scan_vstring(s,e,sv);
12072 where s and e are the start and end of the string.
12073 The sv should already be large enough to store the vstring
12074 passed in, for performance reasons.
12076 This function may croak if fatal warnings are enabled in the
12077 calling scope, hence the sv_2mortal in the example (to prevent
12078 a leak). Make sure to do SvREFCNT_inc afterwards if you use
12084 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
12086 const char *pos = s;
12087 const char *start = s;
12089 PERL_ARGS_ASSERT_SCAN_VSTRING;
12091 if (*pos == 'v') pos++; /* get past 'v' */
12092 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12094 if ( *pos != '.') {
12095 /* this may not be a v-string if followed by => */
12096 const char *next = pos;
12097 while (next < e && isSPACE(*next))
12099 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
12100 /* return string not v-string */
12101 sv_setpvn(sv,(char *)s,pos-s);
12102 return (char *)pos;
12106 if (!isALPHA(*pos)) {
12107 U8 tmpbuf[UTF8_MAXBYTES+1];
12110 s++; /* get past 'v' */
12115 /* this is atoi() that tolerates underscores */
12118 const char *end = pos;
12120 while (--end >= s) {
12122 const UV orev = rev;
12123 rev += (*end - '0') * mult;
12126 /* diag_listed_as: Integer overflow in %s number */
12127 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12128 "Integer overflow in decimal number");
12132 /* Append native character for the rev point */
12133 tmpend = uvchr_to_utf8(tmpbuf, rev);
12134 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12135 if (!UVCHR_IS_INVARIANT(rev))
12137 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
12143 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12147 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12154 Perl_keyword_plugin_standard(pTHX_
12155 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
12157 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
12158 PERL_UNUSED_CONTEXT;
12159 PERL_UNUSED_ARG(keyword_ptr);
12160 PERL_UNUSED_ARG(keyword_len);
12161 PERL_UNUSED_ARG(op_ptr);
12162 return KEYWORD_PLUGIN_DECLINE;
12166 =for apidoc Amx|void|wrap_keyword_plugin|Perl_keyword_plugin_t new_plugin|Perl_keyword_plugin_t *old_plugin_p
12168 Puts a C function into the chain of keyword plugins. This is the
12169 preferred way to manipulate the L</PL_keyword_plugin> variable.
12170 C<new_plugin> is a pointer to the C function that is to be added to the
12171 keyword plugin chain, and C<old_plugin_p> points to the storage location
12172 where a pointer to the next function in the chain will be stored. The
12173 value of C<new_plugin> is written into the L</PL_keyword_plugin> variable,
12174 while the value previously stored there is written to C<*old_plugin_p>.
12176 L</PL_keyword_plugin> is global to an entire process, and a module wishing
12177 to hook keyword parsing may find itself invoked more than once per
12178 process, typically in different threads. To handle that situation, this
12179 function is idempotent. The location C<*old_plugin_p> must initially
12180 (once per process) contain a null pointer. A C variable of static
12181 duration (declared at file scope, typically also marked C<static> to give
12182 it internal linkage) will be implicitly initialised appropriately, if it
12183 does not have an explicit initialiser. This function will only actually
12184 modify the plugin chain if it finds C<*old_plugin_p> to be null. This
12185 function is also thread safe on the small scale. It uses appropriate
12186 locking to avoid race conditions in accessing L</PL_keyword_plugin>.
12188 When this function is called, the function referenced by C<new_plugin>
12189 must be ready to be called, except for C<*old_plugin_p> being unfilled.
12190 In a threading situation, C<new_plugin> may be called immediately, even
12191 before this function has returned. C<*old_plugin_p> will always be
12192 appropriately set before C<new_plugin> is called. If C<new_plugin>
12193 decides not to do anything special with the identifier that it is given
12194 (which is the usual case for most calls to a keyword plugin), it must
12195 chain the plugin function referenced by C<*old_plugin_p>.
12197 Taken all together, XS code to install a keyword plugin should typically
12198 look something like this:
12200 static Perl_keyword_plugin_t next_keyword_plugin;
12201 static OP *my_keyword_plugin(pTHX_
12202 char *keyword_plugin, STRLEN keyword_len, OP **op_ptr)
12204 if (memEQs(keyword_ptr, keyword_len,
12205 "my_new_keyword")) {
12208 return next_keyword_plugin(aTHX_
12209 keyword_ptr, keyword_len, op_ptr);
12213 wrap_keyword_plugin(my_keyword_plugin,
12214 &next_keyword_plugin);
12216 Direct access to L</PL_keyword_plugin> should be avoided.
12222 Perl_wrap_keyword_plugin(pTHX_
12223 Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p)
12227 PERL_UNUSED_CONTEXT;
12228 PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN;
12229 if (*old_plugin_p) return;
12230 KEYWORD_PLUGIN_MUTEX_LOCK;
12231 if (!*old_plugin_p) {
12232 *old_plugin_p = PL_keyword_plugin;
12233 PL_keyword_plugin = new_plugin;
12235 KEYWORD_PLUGIN_MUTEX_UNLOCK;
12238 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
12240 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
12242 SAVEI32(PL_lex_brackets);
12243 if (PL_lex_brackets > 100)
12244 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
12245 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
12246 SAVEI32(PL_lex_allbrackets);
12247 PL_lex_allbrackets = 0;
12248 SAVEI8(PL_lex_fakeeof);
12249 PL_lex_fakeeof = (U8)fakeeof;
12250 if(yyparse(gramtype) && !PL_parser->error_count)
12251 qerror(Perl_mess(aTHX_ "Parse error"));
12254 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
12256 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
12260 SAVEVPTR(PL_eval_root);
12261 PL_eval_root = NULL;
12262 parse_recdescent(gramtype, fakeeof);
12268 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
12270 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
12273 if (flags & ~PARSE_OPTIONAL)
12274 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
12275 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
12276 if (!exprop && !(flags & PARSE_OPTIONAL)) {
12277 if (!PL_parser->error_count)
12278 qerror(Perl_mess(aTHX_ "Parse error"));
12279 exprop = newOP(OP_NULL, 0);
12285 =for apidoc Amx|OP *|parse_arithexpr|U32 flags
12287 Parse a Perl arithmetic expression. This may contain operators of precedence
12288 down to the bit shift operators. The expression must be followed (and thus
12289 terminated) either by a comparison or lower-precedence operator or by
12290 something that would normally terminate an expression such as semicolon.
12291 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12292 otherwise it is mandatory. It is up to the caller to ensure that the
12293 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12294 the source of the code to be parsed and the lexical context for the
12297 The op tree representing the expression is returned. If an optional
12298 expression is absent, a null pointer is returned, otherwise the pointer
12301 If an error occurs in parsing or compilation, in most cases a valid op
12302 tree is returned anyway. The error is reflected in the parser state,
12303 normally resulting in a single exception at the top level of parsing
12304 which covers all the compilation errors that occurred. Some compilation
12305 errors, however, will throw an exception immediately.
12311 Perl_parse_arithexpr(pTHX_ U32 flags)
12313 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
12317 =for apidoc Amx|OP *|parse_termexpr|U32 flags
12319 Parse a Perl term expression. This may contain operators of precedence
12320 down to the assignment operators. The expression must be followed (and thus
12321 terminated) either by a comma or lower-precedence operator or by
12322 something that would normally terminate an expression such as semicolon.
12323 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12324 otherwise it is mandatory. It is up to the caller to ensure that the
12325 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12326 the source of the code to be parsed and the lexical context for the
12329 The op tree representing the expression is returned. If an optional
12330 expression is absent, a null pointer is returned, otherwise the pointer
12333 If an error occurs in parsing or compilation, in most cases a valid op
12334 tree is returned anyway. The error is reflected in the parser state,
12335 normally resulting in a single exception at the top level of parsing
12336 which covers all the compilation errors that occurred. Some compilation
12337 errors, however, will throw an exception immediately.
12343 Perl_parse_termexpr(pTHX_ U32 flags)
12345 return parse_expr(LEX_FAKEEOF_COMMA, flags);
12349 =for apidoc Amx|OP *|parse_listexpr|U32 flags
12351 Parse a Perl list expression. This may contain operators of precedence
12352 down to the comma operator. The expression must be followed (and thus
12353 terminated) either by a low-precedence logic operator such as C<or> or by
12354 something that would normally terminate an expression such as semicolon.
12355 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12356 otherwise it is mandatory. It is up to the caller to ensure that the
12357 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12358 the source of the code to be parsed and the lexical context for the
12361 The op tree representing the expression is returned. If an optional
12362 expression is absent, a null pointer is returned, otherwise the pointer
12365 If an error occurs in parsing or compilation, in most cases a valid op
12366 tree is returned anyway. The error is reflected in the parser state,
12367 normally resulting in a single exception at the top level of parsing
12368 which covers all the compilation errors that occurred. Some compilation
12369 errors, however, will throw an exception immediately.
12375 Perl_parse_listexpr(pTHX_ U32 flags)
12377 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
12381 =for apidoc Amx|OP *|parse_fullexpr|U32 flags
12383 Parse a single complete Perl expression. This allows the full
12384 expression grammar, including the lowest-precedence operators such
12385 as C<or>. The expression must be followed (and thus terminated) by a
12386 token that an expression would normally be terminated by: end-of-file,
12387 closing bracketing punctuation, semicolon, or one of the keywords that
12388 signals a postfix expression-statement modifier. If C<flags> has the
12389 C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
12390 mandatory. It is up to the caller to ensure that the dynamic parser
12391 state (L</PL_parser> et al) is correctly set to reflect the source of
12392 the code to be parsed and the lexical context for the expression.
12394 The op tree representing the expression is returned. If an optional
12395 expression is absent, a null pointer is returned, otherwise the pointer
12398 If an error occurs in parsing or compilation, in most cases a valid op
12399 tree is returned anyway. The error is reflected in the parser state,
12400 normally resulting in a single exception at the top level of parsing
12401 which covers all the compilation errors that occurred. Some compilation
12402 errors, however, will throw an exception immediately.
12408 Perl_parse_fullexpr(pTHX_ U32 flags)
12410 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
12414 =for apidoc Amx|OP *|parse_block|U32 flags
12416 Parse a single complete Perl code block. This consists of an opening
12417 brace, a sequence of statements, and a closing brace. The block
12418 constitutes a lexical scope, so C<my> variables and various compile-time
12419 effects can be contained within it. It is up to the caller to ensure
12420 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12421 reflect the source of the code to be parsed and the lexical context for
12424 The op tree representing the code block is returned. This is always a
12425 real op, never a null pointer. It will normally be a C<lineseq> list,
12426 including C<nextstate> or equivalent ops. No ops to construct any kind
12427 of runtime scope are included by virtue of it being a block.
12429 If an error occurs in parsing or compilation, in most cases a valid op
12430 tree (most likely null) is returned anyway. The error is reflected in
12431 the parser state, normally resulting in a single exception at the top
12432 level of parsing which covers all the compilation errors that occurred.
12433 Some compilation errors, however, will throw an exception immediately.
12435 The C<flags> parameter is reserved for future use, and must always
12442 Perl_parse_block(pTHX_ U32 flags)
12445 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
12446 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
12450 =for apidoc Amx|OP *|parse_barestmt|U32 flags
12452 Parse a single unadorned Perl statement. This may be a normal imperative
12453 statement or a declaration that has compile-time effect. It does not
12454 include any label or other affixture. It is up to the caller to ensure
12455 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12456 reflect the source of the code to be parsed and the lexical context for
12459 The op tree representing the statement is returned. This may be a
12460 null pointer if the statement is null, for example if it was actually
12461 a subroutine definition (which has compile-time side effects). If not
12462 null, it will be ops directly implementing the statement, suitable to
12463 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
12464 equivalent op (except for those embedded in a scope contained entirely
12465 within the statement).
12467 If an error occurs in parsing or compilation, in most cases a valid op
12468 tree (most likely null) is returned anyway. The error is reflected in
12469 the parser state, normally resulting in a single exception at the top
12470 level of parsing which covers all the compilation errors that occurred.
12471 Some compilation errors, however, will throw an exception immediately.
12473 The C<flags> parameter is reserved for future use, and must always
12480 Perl_parse_barestmt(pTHX_ U32 flags)
12483 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
12484 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
12488 =for apidoc Amx|SV *|parse_label|U32 flags
12490 Parse a single label, possibly optional, of the type that may prefix a
12491 Perl statement. It is up to the caller to ensure that the dynamic parser
12492 state (L</PL_parser> et al) is correctly set to reflect the source of
12493 the code to be parsed. If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
12494 label is optional, otherwise it is mandatory.
12496 The name of the label is returned in the form of a fresh scalar. If an
12497 optional label is absent, a null pointer is returned.
12499 If an error occurs in parsing, which can only occur if the label is
12500 mandatory, a valid label is returned anyway. The error is reflected in
12501 the parser state, normally resulting in a single exception at the top
12502 level of parsing which covers all the compilation errors that occurred.
12508 Perl_parse_label(pTHX_ U32 flags)
12510 if (flags & ~PARSE_OPTIONAL)
12511 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
12513 PL_parser->yychar = yylex();
12514 if (PL_parser->yychar == LABEL) {
12515 char * const lpv = pl_yylval.pval;
12516 STRLEN llen = strlen(lpv);
12517 PL_parser->yychar = YYEMPTY;
12518 return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
12525 STRLEN wlen, bufptr_pos;
12528 if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
12530 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
12531 if (word_takes_any_delimiter(s, wlen))
12533 bufptr_pos = s - SvPVX(PL_linestr);
12535 lex_read_space(LEX_KEEP_PREVIOUS);
12537 s = SvPVX(PL_linestr) + bufptr_pos;
12538 if (t[0] == ':' && t[1] != ':') {
12539 PL_oldoldbufptr = PL_oldbufptr;
12542 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
12546 if (flags & PARSE_OPTIONAL) {
12549 qerror(Perl_mess(aTHX_ "Parse error"));
12550 return newSVpvs("x");
12557 =for apidoc Amx|OP *|parse_fullstmt|U32 flags
12559 Parse a single complete Perl statement. This may be a normal imperative
12560 statement or a declaration that has compile-time effect, and may include
12561 optional labels. It is up to the caller to ensure that the dynamic
12562 parser state (L</PL_parser> et al) is correctly set to reflect the source
12563 of the code to be parsed and the lexical context for the statement.
12565 The op tree representing the statement is returned. This may be a
12566 null pointer if the statement is null, for example if it was actually
12567 a subroutine definition (which has compile-time side effects). If not
12568 null, it will be the result of a L</newSTATEOP> call, normally including
12569 a C<nextstate> or equivalent op.
12571 If an error occurs in parsing or compilation, in most cases a valid op
12572 tree (most likely null) is returned anyway. The error is reflected in
12573 the parser state, normally resulting in a single exception at the top
12574 level of parsing which covers all the compilation errors that occurred.
12575 Some compilation errors, however, will throw an exception immediately.
12577 The C<flags> parameter is reserved for future use, and must always
12584 Perl_parse_fullstmt(pTHX_ U32 flags)
12587 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
12588 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
12592 =for apidoc Amx|OP *|parse_stmtseq|U32 flags
12594 Parse a sequence of zero or more Perl statements. These may be normal
12595 imperative statements, including optional labels, or declarations
12596 that have compile-time effect, or any mixture thereof. The statement
12597 sequence ends when a closing brace or end-of-file is encountered in a
12598 place where a new statement could have validly started. It is up to
12599 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
12600 is correctly set to reflect the source of the code to be parsed and the
12601 lexical context for the statements.
12603 The op tree representing the statement sequence is returned. This may
12604 be a null pointer if the statements were all null, for example if there
12605 were no statements or if there were only subroutine definitions (which
12606 have compile-time side effects). If not null, it will be a C<lineseq>
12607 list, normally including C<nextstate> or equivalent ops.
12609 If an error occurs in parsing or compilation, in most cases a valid op
12610 tree is returned anyway. The error is reflected in the parser state,
12611 normally resulting in a single exception at the top level of parsing
12612 which covers all the compilation errors that occurred. Some compilation
12613 errors, however, will throw an exception immediately.
12615 The C<flags> parameter is reserved for future use, and must always
12622 Perl_parse_stmtseq(pTHX_ U32 flags)
12627 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
12628 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
12629 c = lex_peek_unichar(0);
12630 if (c != -1 && c != /*{*/'}')
12631 qerror(Perl_mess(aTHX_ "Parse error"));
12636 * ex: set ts=8 sts=4 sw=4 et: