3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 =head1 Lexer interface
26 This is the lower layer of the Perl parser, managing characters and tokens.
28 =for apidoc AmnU|yy_parser *|PL_parser
30 Pointer to a structure encapsulating the state of the parsing operation
31 currently in progress. The pointer can be locally changed to perform
32 a nested parse without interfering with the state of an outer parse.
33 Individual members of C<PL_parser> have their own documentation.
39 #define PERL_IN_TOKE_C
41 #include "dquote_inline.h"
42 #include "invlist_inline.h"
44 #define new_constant(a,b,c,d,e,f,g, h) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h)
47 #define pl_yylval (PL_parser->yylval)
49 /* XXX temporary backwards compatibility */
50 #define PL_lex_brackets (PL_parser->lex_brackets)
51 #define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52 #define PL_lex_fakeeof (PL_parser->lex_fakeeof)
53 #define PL_lex_brackstack (PL_parser->lex_brackstack)
54 #define PL_lex_casemods (PL_parser->lex_casemods)
55 #define PL_lex_casestack (PL_parser->lex_casestack)
56 #define PL_lex_dojoin (PL_parser->lex_dojoin)
57 #define PL_lex_formbrack (PL_parser->lex_formbrack)
58 #define PL_lex_inpat (PL_parser->lex_inpat)
59 #define PL_lex_inwhat (PL_parser->lex_inwhat)
60 #define PL_lex_op (PL_parser->lex_op)
61 #define PL_lex_repl (PL_parser->lex_repl)
62 #define PL_lex_starts (PL_parser->lex_starts)
63 #define PL_lex_stuff (PL_parser->lex_stuff)
64 #define PL_multi_start (PL_parser->multi_start)
65 #define PL_multi_open (PL_parser->multi_open)
66 #define PL_multi_close (PL_parser->multi_close)
67 #define PL_preambled (PL_parser->preambled)
68 #define PL_linestr (PL_parser->linestr)
69 #define PL_expect (PL_parser->expect)
70 #define PL_copline (PL_parser->copline)
71 #define PL_bufptr (PL_parser->bufptr)
72 #define PL_oldbufptr (PL_parser->oldbufptr)
73 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
74 #define PL_linestart (PL_parser->linestart)
75 #define PL_bufend (PL_parser->bufend)
76 #define PL_last_uni (PL_parser->last_uni)
77 #define PL_last_lop (PL_parser->last_lop)
78 #define PL_last_lop_op (PL_parser->last_lop_op)
79 #define PL_lex_state (PL_parser->lex_state)
80 #define PL_rsfp (PL_parser->rsfp)
81 #define PL_rsfp_filters (PL_parser->rsfp_filters)
82 #define PL_in_my (PL_parser->in_my)
83 #define PL_in_my_stash (PL_parser->in_my_stash)
84 #define PL_tokenbuf (PL_parser->tokenbuf)
85 #define PL_multi_end (PL_parser->multi_end)
86 #define PL_error_count (PL_parser->error_count)
88 # define PL_nexttoke (PL_parser->nexttoke)
89 # define PL_nexttype (PL_parser->nexttype)
90 # define PL_nextval (PL_parser->nextval)
93 #define SvEVALED(sv) \
94 (SvTYPE(sv) >= SVt_PVNV \
95 && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen)
97 static const char* const ident_too_long = "Identifier too long";
99 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
101 #define XENUMMASK 0x3f
102 #define XFAKEEOF 0x40
103 #define XFAKEBRACK 0x80
105 #ifdef USE_UTF8_SCRIPTS
106 # define UTF cBOOL(!IN_BYTES)
108 # define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
111 /* The maximum number of characters preceding the unrecognized one to display */
112 #define UNRECOGNIZED_PRECEDE_COUNT 10
114 /* In variables named $^X, these are the legal values for X.
115 * 1999-02-27 mjd-perl-patch@plover.com */
116 #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
118 #define SPACE_OR_TAB(c) isBLANK_A(c)
120 #define HEXFP_PEEK(s) \
122 (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
123 isALPHA_FOLD_EQ(s[0], 'p'))
125 /* LEX_* are values for PL_lex_state, the state of the lexer.
126 * They are arranged oddly so that the guard on the switch statement
127 * can get by with a single comparison (if the compiler is smart enough).
129 * These values refer to the various states within a sublex parse,
130 * i.e. within a double quotish string
133 /* #define LEX_NOTPARSING 11 is done in perl.h. */
135 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
136 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
137 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
138 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
139 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
141 /* at end of code, eg "$x" followed by: */
142 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
143 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
145 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
146 string or after \E, $foo, etc */
147 #define LEX_INTERPCONST 2 /* NOT USED */
148 #define LEX_FORMLINE 1 /* expecting a format line */
152 static const char* const lex_state_names[] = {
167 #include "keywords.h"
169 /* CLINE is a macro that ensures PL_copline has a sane value */
171 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
174 * Convenience functions to return different tokens and prime the
175 * lexer for the next token. They all take an argument.
177 * TOKEN : generic token (used for '(', DOLSHARP, etc)
178 * OPERATOR : generic operator
179 * AOPERATOR : assignment operator
180 * PREBLOCK : beginning the block after an if, while, foreach, ...
181 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
182 * PREREF : *EXPR where EXPR is not a simple identifier
183 * TERM : expression term
184 * POSTDEREF : postfix dereference (->$* ->@[...] etc.)
185 * LOOPX : loop exiting command (goto, last, dump, etc)
186 * FTST : file test operator
187 * FUN0 : zero-argument function
188 * FUN0OP : zero-argument function, with its op created in this file
189 * FUN1 : not used, except for not, which isn't a UNIOP
190 * BOop : bitwise or or xor
192 * BCop : bitwise complement
193 * SHop : shift operator
194 * PWop : power operator
195 * PMop : pattern-matching operator
196 * Aop : addition-level operator
197 * AopNOASSIGN : addition-level operator that is never part of .=
198 * Mop : multiplication-level operator
199 * Eop : equality-testing operator
200 * Rop : relational operator <= != gt
202 * Also see LOP and lop() below.
205 #ifdef DEBUGGING /* Serve -DT. */
206 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
208 # define REPORT(retval) (retval)
211 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
212 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
213 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
214 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
215 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
216 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
217 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
218 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
219 #define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \
221 PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
223 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
224 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
225 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
226 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
227 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
228 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
229 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
231 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
232 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
233 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
234 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
235 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
236 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
237 #define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
238 #define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
240 /* This bit of chicanery makes a unary function followed by
241 * a parenthesis into a function with one argument, highest precedence.
242 * The UNIDOR macro is for unary functions that can be followed by the //
243 * operator (such as C<shift // 0>).
245 #define UNI3(f,x,have_x) { \
246 pl_yylval.ival = f; \
247 if (have_x) PL_expect = x; \
249 PL_last_uni = PL_oldbufptr; \
250 PL_last_lop_op = (f) < 0 ? -(f) : (f); \
252 return REPORT( (int)FUNC1 ); \
254 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
256 #define UNI(f) UNI3(f,XTERM,1)
257 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
258 #define UNIPROTO(f,optional) { \
259 if (optional) PL_last_uni = PL_oldbufptr; \
263 #define UNIBRACK(f) UNI3(f,0,0)
265 /* grandfather return to old style */
268 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
269 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
270 pl_yylval.ival = (f); \
276 #define COPLINE_INC_WITH_HERELINES \
278 CopLINE_inc(PL_curcop); \
279 if (PL_parser->herelines) \
280 CopLINE(PL_curcop) += PL_parser->herelines, \
281 PL_parser->herelines = 0; \
283 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
284 * is no sublex_push to follow. */
285 #define COPLINE_SET_FROM_MULTI_END \
287 CopLINE_set(PL_curcop, PL_multi_end); \
288 if (PL_multi_end != PL_multi_start) \
289 PL_parser->herelines = 0; \
293 /* A file-local structure for passing around information about subroutines and
294 * related definable words */
304 static const struct code no_code = { NULL, NULL, NULL, NULL, NULL, 0, FALSE };
309 /* how to interpret the pl_yylval associated with the token */
313 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
318 static struct debug_tokens {
320 enum token_type type;
322 } const debug_tokens[] =
324 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
325 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
326 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
327 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
328 { ANON_SIGSUB, TOKENTYPE_IVAL, "ANON_SIGSUB" },
329 { ARROW, TOKENTYPE_NONE, "ARROW" },
330 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
331 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
332 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
333 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
334 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
335 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
336 { DO, TOKENTYPE_NONE, "DO" },
337 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
338 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
339 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
340 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
341 { ELSE, TOKENTYPE_NONE, "ELSE" },
342 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
343 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
344 { FOR, TOKENTYPE_IVAL, "FOR" },
345 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
346 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
347 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
348 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
349 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
350 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
351 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
352 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
353 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
354 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
355 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
356 { IF, TOKENTYPE_IVAL, "IF" },
357 { LABEL, TOKENTYPE_OPVAL, "LABEL" },
358 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
359 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
360 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
361 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
362 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
363 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
364 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
365 { MY, TOKENTYPE_IVAL, "MY" },
366 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
367 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
368 { OROP, TOKENTYPE_IVAL, "OROP" },
369 { OROR, TOKENTYPE_NONE, "OROR" },
370 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
371 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
372 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
373 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
374 { POSTJOIN, TOKENTYPE_NONE, "POSTJOIN" },
375 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
376 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
377 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
378 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
379 { PREINC, TOKENTYPE_NONE, "PREINC" },
380 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
381 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
382 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
383 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
384 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
385 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
386 { SIGSUB, TOKENTYPE_NONE, "SIGSUB" },
387 { SUB, TOKENTYPE_NONE, "SUB" },
388 { SUBLEXEND, TOKENTYPE_NONE, "SUBLEXEND" },
389 { SUBLEXSTART, TOKENTYPE_NONE, "SUBLEXSTART" },
390 { THING, TOKENTYPE_OPVAL, "THING" },
391 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
392 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
393 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
394 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
395 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
396 { USE, TOKENTYPE_IVAL, "USE" },
397 { WHEN, TOKENTYPE_IVAL, "WHEN" },
398 { WHILE, TOKENTYPE_IVAL, "WHILE" },
399 { BAREWORD, TOKENTYPE_OPVAL, "BAREWORD" },
400 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
401 { 0, TOKENTYPE_NONE, NULL }
404 /* dump the returned token in rv, plus any optional arg in pl_yylval */
407 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
409 PERL_ARGS_ASSERT_TOKEREPORT;
412 const char *name = NULL;
413 enum token_type type = TOKENTYPE_NONE;
414 const struct debug_tokens *p;
415 SV* const report = newSVpvs("<== ");
417 for (p = debug_tokens; p->token; p++) {
418 if (p->token == (int)rv) {
425 Perl_sv_catpv(aTHX_ report, name);
426 else if (isGRAPH(rv))
428 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
430 sv_catpvs(report, " (pending identifier)");
433 sv_catpvs(report, "EOF");
435 Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
440 Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
442 case TOKENTYPE_OPNUM:
443 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
444 PL_op_name[lvalp->ival]);
447 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
449 case TOKENTYPE_OPVAL:
451 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
452 PL_op_name[lvalp->opval->op_type]);
453 if (lvalp->opval->op_type == OP_CONST) {
454 Perl_sv_catpvf(aTHX_ report, " %s",
455 SvPEEK(cSVOPx_sv(lvalp->opval)));
460 sv_catpvs(report, "(opval=null)");
463 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
469 /* print the buffer with suitable escapes */
472 S_printbuf(pTHX_ const char *const fmt, const char *const s)
474 SV* const tmp = newSVpvs("");
476 PERL_ARGS_ASSERT_PRINTBUF;
478 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */
479 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
480 GCC_DIAG_RESTORE_STMT;
489 * This subroutine looks for an '=' next to the operator that has just been
490 * parsed and turns it into an ASSIGNOP if it finds one.
494 S_ao(pTHX_ int toketype)
496 if (*PL_bufptr == '=') {
498 if (toketype == ANDAND)
499 pl_yylval.ival = OP_ANDASSIGN;
500 else if (toketype == OROR)
501 pl_yylval.ival = OP_ORASSIGN;
502 else if (toketype == DORDOR)
503 pl_yylval.ival = OP_DORASSIGN;
506 return REPORT(toketype);
511 * When Perl expects an operator and finds something else, no_op
512 * prints the warning. It always prints "<something> found where
513 * operator expected. It prints "Missing semicolon on previous line?"
514 * if the surprise occurs at the start of the line. "do you need to
515 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
516 * where the compiler doesn't know if foo is a method call or a function.
517 * It prints "Missing operator before end of line" if there's nothing
518 * after the missing operator, or "... before <...>" if there is something
519 * after the missing operator.
521 * PL_bufptr is expected to point to the start of the thing that was found,
522 * and s after the next token or partial token.
526 S_no_op(pTHX_ const char *const what, char *s)
528 char * const oldbp = PL_bufptr;
529 const bool is_first = (PL_oldbufptr == PL_linestart);
531 PERL_ARGS_ASSERT_NO_OP;
537 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
538 if (ckWARN_d(WARN_SYNTAX)) {
540 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
541 "\t(Missing semicolon on previous line?)\n");
542 else if (PL_oldoldbufptr && isIDFIRST_lazy_if_safe(PL_oldoldbufptr,
547 for (t = PL_oldoldbufptr;
548 (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
549 t += UTF ? UTF8SKIP(t) : 1)
553 if (t < PL_bufptr && isSPACE(*t))
554 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
555 "\t(Do you need to predeclare %" UTF8f "?)\n",
556 UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
560 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
561 "\t(Missing operator before %" UTF8f "?)\n",
562 UTF8fARG(UTF, s - oldbp, oldbp));
570 * Complain about missing quote/regexp/heredoc terminator.
571 * If it's called with NULL then it cauterizes the line buffer.
572 * If we're in a delimited string and the delimiter is a control
573 * character, it's reformatted into a two-char sequence like ^C.
578 S_missingterm(pTHX_ char *s, STRLEN len)
580 char tmpbuf[UTF8_MAXBYTES + 1];
585 char * const nl = (char *) my_memrchr(s, '\n', len);
592 else if (PL_multi_close < 32) {
594 tmpbuf[1] = (char)toCTRL(PL_multi_close);
600 if (LIKELY(PL_multi_close < 256)) {
601 *tmpbuf = (char)PL_multi_close;
606 char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close);
613 q = memchr(s, '"', len) ? '\'' : '"';
614 sv = sv_2mortal(newSVpvn(s, len));
617 Perl_croak(aTHX_ "Can't find string terminator %c%" SVf "%c"
618 " anywhere before EOF", q, SVfARG(sv), q);
624 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
625 * utf16-to-utf8-reversed.
628 #ifdef PERL_CR_FILTER
632 const char *s = SvPVX_const(sv);
633 const char * const e = s + SvCUR(sv);
635 PERL_ARGS_ASSERT_STRIP_RETURN;
637 /* outer loop optimized to do nothing if there are no CR-LFs */
639 if (*s++ == '\r' && *s == '\n') {
640 /* hit a CR-LF, need to copy the rest */
644 if (*s == '\r' && s[1] == '\n')
655 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
657 const I32 count = FILTER_READ(idx+1, sv, maxlen);
658 if (count > 0 && !maxlen)
665 =for apidoc lex_start
667 Creates and initialises a new lexer/parser state object, supplying
668 a context in which to lex and parse from a new source of Perl code.
669 A pointer to the new state object is placed in L</PL_parser>. An entry
670 is made on the save stack so that upon unwinding, the new state object
671 will be destroyed and the former value of L</PL_parser> will be restored.
672 Nothing else need be done to clean up the parsing context.
674 The code to be parsed comes from C<line> and C<rsfp>. C<line>, if
675 non-null, provides a string (in SV form) containing code to be parsed.
676 A copy of the string is made, so subsequent modification of C<line>
677 does not affect parsing. C<rsfp>, if non-null, provides an input stream
678 from which code will be read to be parsed. If both are non-null, the
679 code in C<line> comes first and must consist of complete lines of input,
680 and C<rsfp> supplies the remainder of the source.
682 The C<flags> parameter is reserved for future use. Currently it is only
683 used by perl internally, so extensions should always pass zero.
688 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
689 can share filters with the current parser.
690 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
691 caller, hence isn't owned by the parser, so shouldn't be closed on parser
692 destruction. This is used to handle the case of defaulting to reading the
693 script from the standard input because no filename was given on the command
694 line (without getting confused by situation where STDIN has been closed, so
695 the script handle is opened on fd 0) */
698 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
700 const char *s = NULL;
701 yy_parser *parser, *oparser;
703 if (flags && flags & ~LEX_START_FLAGS)
704 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
706 /* create and initialise a parser */
708 Newxz(parser, 1, yy_parser);
709 parser->old_parser = oparser = PL_parser;
712 parser->stack = NULL;
713 parser->stack_max1 = NULL;
716 /* on scope exit, free this parser and restore any outer one */
718 parser->saved_curcop = PL_curcop;
720 /* initialise lexer state */
722 parser->nexttoke = 0;
723 parser->error_count = oparser ? oparser->error_count : 0;
724 parser->copline = parser->preambling = NOLINE;
725 parser->lex_state = LEX_NORMAL;
726 parser->expect = XSTATE;
728 parser->recheck_utf8_validity = TRUE;
729 parser->rsfp_filters =
730 !(flags & LEX_START_SAME_FILTER) || !oparser
732 : MUTABLE_AV(SvREFCNT_inc(
733 oparser->rsfp_filters
734 ? oparser->rsfp_filters
735 : (oparser->rsfp_filters = newAV())
738 Newx(parser->lex_brackstack, 120, char);
739 Newx(parser->lex_casestack, 12, char);
740 *parser->lex_casestack = '\0';
741 Newxz(parser->lex_shared, 1, LEXSHARED);
745 const U8* first_bad_char_loc;
747 s = SvPV_const(line, len);
750 && UNLIKELY(! is_utf8_string_loc((U8 *) s,
752 &first_bad_char_loc)))
754 _force_out_malformed_utf8_message(first_bad_char_loc,
755 (U8 *) s + SvCUR(line),
757 1 /* 1 means die */ );
758 NOT_REACHED; /* NOTREACHED */
761 parser->linestr = flags & LEX_START_COPIED
762 ? SvREFCNT_inc_simple_NN(line)
763 : newSVpvn_flags(s, len, SvUTF8(line));
765 sv_catpvs(parser->linestr, "\n;");
767 parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
770 parser->oldoldbufptr =
773 parser->linestart = SvPVX(parser->linestr);
774 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
775 parser->last_lop = parser->last_uni = NULL;
777 STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
778 |LEX_DONT_CLOSE_RSFP));
779 parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
780 |LEX_DONT_CLOSE_RSFP));
782 parser->in_pod = parser->filtered = 0;
786 /* delete a parser object */
789 Perl_parser_free(pTHX_ const yy_parser *parser)
791 PERL_ARGS_ASSERT_PARSER_FREE;
793 PL_curcop = parser->saved_curcop;
794 SvREFCNT_dec(parser->linestr);
796 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
797 PerlIO_clearerr(parser->rsfp);
798 else if (parser->rsfp && (!parser->old_parser
799 || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
800 PerlIO_close(parser->rsfp);
801 SvREFCNT_dec(parser->rsfp_filters);
802 SvREFCNT_dec(parser->lex_stuff);
803 SvREFCNT_dec(parser->lex_sub_repl);
805 Safefree(parser->lex_brackstack);
806 Safefree(parser->lex_casestack);
807 Safefree(parser->lex_shared);
808 PL_parser = parser->old_parser;
813 Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab)
815 I32 nexttoke = parser->nexttoke;
816 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
818 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
819 && parser->nextval[nexttoke].opval
820 && parser->nextval[nexttoke].opval->op_slabbed
821 && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
822 op_free(parser->nextval[nexttoke].opval);
823 parser->nextval[nexttoke].opval = NULL;
830 =for apidoc AmnxUN|SV *|PL_parser-E<gt>linestr
832 Buffer scalar containing the chunk currently under consideration of the
833 text currently being lexed. This is always a plain string scalar (for
834 which C<SvPOK> is true). It is not intended to be used as a scalar by
835 normal scalar means; instead refer to the buffer directly by the pointer
836 variables described below.
838 The lexer maintains various C<char*> pointers to things in the
839 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
840 reallocated, all of these pointers must be updated. Don't attempt to
841 do this manually, but rather use L</lex_grow_linestr> if you need to
842 reallocate the buffer.
844 The content of the text chunk in the buffer is commonly exactly one
845 complete line of input, up to and including a newline terminator,
846 but there are situations where it is otherwise. The octets of the
847 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
848 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
849 flag on this scalar, which may disagree with it.
851 For direct examination of the buffer, the variable
852 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
853 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
854 of these pointers is usually preferable to examination of the scalar
855 through normal scalar means.
857 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufend
859 Direct pointer to the end of the chunk of text currently being lexed, the
860 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
861 + SvCUR(PL_parser-E<gt>linestr)>. A C<NUL> character (zero octet) is
862 always located at the end of the buffer, and does not count as part of
863 the buffer's contents.
865 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufptr
867 Points to the current position of lexing inside the lexer buffer.
868 Characters around this point may be freely examined, within
869 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
870 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
871 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
873 Lexing code (whether in the Perl core or not) moves this pointer past
874 the characters that it consumes. It is also expected to perform some
875 bookkeeping whenever a newline character is consumed. This movement
876 can be more conveniently performed by the function L</lex_read_to>,
877 which handles newlines appropriately.
879 Interpretation of the buffer's octets can be abstracted out by
880 using the slightly higher-level functions L</lex_peek_unichar> and
881 L</lex_read_unichar>.
883 =for apidoc AmnxUN|char *|PL_parser-E<gt>linestart
885 Points to the start of the current line inside the lexer buffer.
886 This is useful for indicating at which column an error occurred, and
887 not much else. This must be updated by any lexing code that consumes
888 a newline; the function L</lex_read_to> handles this detail.
894 =for apidoc lex_bufutf8
896 Indicates whether the octets in the lexer buffer
897 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
898 of Unicode characters. If not, they should be interpreted as Latin-1
899 characters. This is analogous to the C<SvUTF8> flag for scalars.
901 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
902 contains valid UTF-8. Lexing code must be robust in the face of invalid
905 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
906 is significant, but not the whole story regarding the input character
907 encoding. Normally, when a file is being read, the scalar contains octets
908 and its C<SvUTF8> flag is off, but the octets should be interpreted as
909 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
910 however, the scalar may have the C<SvUTF8> flag on, and in this case its
911 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
912 is in effect. This logic may change in the future; use this function
913 instead of implementing the logic yourself.
919 Perl_lex_bufutf8(pTHX)
925 =for apidoc lex_grow_linestr
927 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
928 at least C<len> octets (including terminating C<NUL>). Returns a
929 pointer to the reallocated buffer. This is necessary before making
930 any direct modification of the buffer that would increase its length.
931 L</lex_stuff_pvn> provides a more convenient way to insert text into
934 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
935 this function updates all of the lexer's variables that point directly
942 Perl_lex_grow_linestr(pTHX_ STRLEN len)
946 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
947 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
950 linestr = PL_parser->linestr;
951 buf = SvPVX(linestr);
952 if (len <= SvLEN(linestr))
955 /* Is the lex_shared linestr SV the same as the current linestr SV?
956 * Only in this case does re_eval_start need adjusting, since it
957 * points within lex_shared->ls_linestr's buffer */
958 current = ( !PL_parser->lex_shared->ls_linestr
959 || linestr == PL_parser->lex_shared->ls_linestr);
961 bufend_pos = PL_parser->bufend - buf;
962 bufptr_pos = PL_parser->bufptr - buf;
963 oldbufptr_pos = PL_parser->oldbufptr - buf;
964 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
965 linestart_pos = PL_parser->linestart - buf;
966 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
967 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
968 re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ?
969 PL_parser->lex_shared->re_eval_start - buf : 0;
971 buf = sv_grow(linestr, len);
973 PL_parser->bufend = buf + bufend_pos;
974 PL_parser->bufptr = buf + bufptr_pos;
975 PL_parser->oldbufptr = buf + oldbufptr_pos;
976 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
977 PL_parser->linestart = buf + linestart_pos;
978 if (PL_parser->last_uni)
979 PL_parser->last_uni = buf + last_uni_pos;
980 if (PL_parser->last_lop)
981 PL_parser->last_lop = buf + last_lop_pos;
982 if (current && PL_parser->lex_shared->re_eval_start)
983 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
988 =for apidoc lex_stuff_pvn
990 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
991 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
992 reallocating the buffer if necessary. This means that lexing code that
993 runs later will see the characters as if they had appeared in the input.
994 It is not recommended to do this as part of normal parsing, and most
995 uses of this facility run the risk of the inserted characters being
996 interpreted in an unintended manner.
998 The string to be inserted is represented by C<len> octets starting
999 at C<pv>. These octets are interpreted as either UTF-8 or Latin-1,
1000 according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
1001 The characters are recoded for the lexer buffer, according to how the
1002 buffer is currently being interpreted (L</lex_bufutf8>). If a string
1003 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1004 function is more convenient.
1010 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1014 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1015 if (flags & ~(LEX_STUFF_UTF8))
1016 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1018 if (flags & LEX_STUFF_UTF8) {
1021 STRLEN highhalf = variant_under_utf8_count((U8 *) pv,
1023 const char *p, *e = pv+len;;
1026 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1027 bufptr = PL_parser->bufptr;
1028 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1029 SvCUR_set(PL_parser->linestr,
1030 SvCUR(PL_parser->linestr) + len+highhalf);
1031 PL_parser->bufend += len+highhalf;
1032 for (p = pv; p != e; p++) {
1033 append_utf8_from_native_byte(*p, (U8 **) &bufptr);
1037 if (flags & LEX_STUFF_UTF8) {
1038 STRLEN highhalf = 0;
1039 const char *p, *e = pv+len;
1040 for (p = pv; p != e; p++) {
1042 if (UTF8_IS_ABOVE_LATIN1(c)) {
1043 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1044 "non-Latin-1 character into Latin-1 input");
1045 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1048 } else assert(UTF8_IS_INVARIANT(c));
1052 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1053 bufptr = PL_parser->bufptr;
1054 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1055 SvCUR_set(PL_parser->linestr,
1056 SvCUR(PL_parser->linestr) + len-highhalf);
1057 PL_parser->bufend += len-highhalf;
1060 if (UTF8_IS_INVARIANT(*p)) {
1066 *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
1072 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1073 bufptr = PL_parser->bufptr;
1074 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1075 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1076 PL_parser->bufend += len;
1077 Copy(pv, bufptr, len, char);
1083 =for apidoc lex_stuff_pv
1085 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1086 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1087 reallocating the buffer if necessary. This means that lexing code that
1088 runs later will see the characters as if they had appeared in the input.
1089 It is not recommended to do this as part of normal parsing, and most
1090 uses of this facility run the risk of the inserted characters being
1091 interpreted in an unintended manner.
1093 The string to be inserted is represented by octets starting at C<pv>
1094 and continuing to the first nul. These octets are interpreted as either
1095 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1096 in C<flags>. The characters are recoded for the lexer buffer, according
1097 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1098 If it is not convenient to nul-terminate a string to be inserted, the
1099 L</lex_stuff_pvn> function is more appropriate.
1105 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1107 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1108 lex_stuff_pvn(pv, strlen(pv), flags);
1112 =for apidoc lex_stuff_sv
1114 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1115 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1116 reallocating the buffer if necessary. This means that lexing code that
1117 runs later will see the characters as if they had appeared in the input.
1118 It is not recommended to do this as part of normal parsing, and most
1119 uses of this facility run the risk of the inserted characters being
1120 interpreted in an unintended manner.
1122 The string to be inserted is the string value of C<sv>. The characters
1123 are recoded for the lexer buffer, according to how the buffer is currently
1124 being interpreted (L</lex_bufutf8>). If a string to be inserted is
1125 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1126 need to construct a scalar.
1132 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1136 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1138 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1140 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1144 =for apidoc lex_unstuff
1146 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1147 C<ptr>. Text following C<ptr> will be moved, and the buffer shortened.
1148 This hides the discarded text from any lexing code that runs later,
1149 as if the text had never appeared.
1151 This is not the normal way to consume lexed text. For that, use
1158 Perl_lex_unstuff(pTHX_ char *ptr)
1162 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1163 buf = PL_parser->bufptr;
1165 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1168 bufend = PL_parser->bufend;
1170 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1171 unstuff_len = ptr - buf;
1172 Move(ptr, buf, bufend+1-ptr, char);
1173 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1174 PL_parser->bufend = bufend - unstuff_len;
1178 =for apidoc lex_read_to
1180 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1181 to C<ptr>. This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
1182 performing the correct bookkeeping whenever a newline character is passed.
1183 This is the normal way to consume lexed text.
1185 Interpretation of the buffer's octets can be abstracted out by
1186 using the slightly higher-level functions L</lex_peek_unichar> and
1187 L</lex_read_unichar>.
1193 Perl_lex_read_to(pTHX_ char *ptr)
1196 PERL_ARGS_ASSERT_LEX_READ_TO;
1197 s = PL_parser->bufptr;
1198 if (ptr < s || ptr > PL_parser->bufend)
1199 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1200 for (; s != ptr; s++)
1202 COPLINE_INC_WITH_HERELINES;
1203 PL_parser->linestart = s+1;
1205 PL_parser->bufptr = ptr;
1209 =for apidoc lex_discard_to
1211 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1212 up to C<ptr>. The remaining content of the buffer will be moved, and
1213 all pointers into the buffer updated appropriately. C<ptr> must not
1214 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1215 it is not permitted to discard text that has yet to be lexed.
1217 Normally it is not necessarily to do this directly, because it suffices to
1218 use the implicit discarding behaviour of L</lex_next_chunk> and things
1219 based on it. However, if a token stretches across multiple lines,
1220 and the lexing code has kept multiple lines of text in the buffer for
1221 that purpose, then after completion of the token it would be wise to
1222 explicitly discard the now-unneeded earlier lines, to avoid future
1223 multi-line tokens growing the buffer without bound.
1229 Perl_lex_discard_to(pTHX_ char *ptr)
1233 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1234 buf = SvPVX(PL_parser->linestr);
1236 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1239 if (ptr > PL_parser->bufptr)
1240 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1241 discard_len = ptr - buf;
1242 if (PL_parser->oldbufptr < ptr)
1243 PL_parser->oldbufptr = ptr;
1244 if (PL_parser->oldoldbufptr < ptr)
1245 PL_parser->oldoldbufptr = ptr;
1246 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1247 PL_parser->last_uni = NULL;
1248 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1249 PL_parser->last_lop = NULL;
1250 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1251 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1252 PL_parser->bufend -= discard_len;
1253 PL_parser->bufptr -= discard_len;
1254 PL_parser->oldbufptr -= discard_len;
1255 PL_parser->oldoldbufptr -= discard_len;
1256 if (PL_parser->last_uni)
1257 PL_parser->last_uni -= discard_len;
1258 if (PL_parser->last_lop)
1259 PL_parser->last_lop -= discard_len;
1263 Perl_notify_parser_that_changed_to_utf8(pTHX)
1265 /* Called when $^H is changed to indicate that HINT_UTF8 has changed from
1266 * off to on. At compile time, this has the effect of entering a 'use
1267 * utf8' section. This means that any input was not previously checked for
1268 * UTF-8 (because it was off), but now we do need to check it, or our
1269 * assumptions about the input being sane could be wrong, and we could
1270 * segfault. This routine just sets a flag so that the next time we look
1271 * at the input we do the well-formed UTF-8 check. If we aren't in the
1272 * proper phase, there may not be a parser object, but if there is, setting
1273 * the flag is harmless */
1276 PL_parser->recheck_utf8_validity = TRUE;
1281 =for apidoc lex_next_chunk
1283 Reads in the next chunk of text to be lexed, appending it to
1284 L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1285 looked to the end of the current chunk and wants to know more. It is
1286 usual, but not necessary, for lexing to have consumed the entirety of
1287 the current chunk at this time.
1289 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1290 chunk (i.e., the current chunk has been entirely consumed), normally the
1291 current chunk will be discarded at the same time that the new chunk is
1292 read in. If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
1293 will not be discarded. If the current chunk has not been entirely
1294 consumed, then it will not be discarded regardless of the flag.
1296 Returns true if some new text was added to the buffer, or false if the
1297 buffer has reached the end of the input text.
1302 #define LEX_FAKE_EOF 0x80000000
1303 #define LEX_NO_TERM 0x40000000 /* here-doc */
1306 Perl_lex_next_chunk(pTHX_ U32 flags)
1310 STRLEN old_bufend_pos, new_bufend_pos;
1311 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1312 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1313 bool got_some_for_debugger = 0;
1316 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1317 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1318 if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1320 linestr = PL_parser->linestr;
1321 buf = SvPVX(linestr);
1322 if (!(flags & LEX_KEEP_PREVIOUS)
1323 && PL_parser->bufptr == PL_parser->bufend)
1325 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1327 if (PL_parser->last_uni != PL_parser->bufend)
1328 PL_parser->last_uni = NULL;
1329 if (PL_parser->last_lop != PL_parser->bufend)
1330 PL_parser->last_lop = NULL;
1331 last_uni_pos = last_lop_pos = 0;
1333 SvCUR_set(linestr, 0);
1335 old_bufend_pos = PL_parser->bufend - buf;
1336 bufptr_pos = PL_parser->bufptr - buf;
1337 oldbufptr_pos = PL_parser->oldbufptr - buf;
1338 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1339 linestart_pos = PL_parser->linestart - buf;
1340 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1341 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1343 if (flags & LEX_FAKE_EOF) {
1345 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1347 } else if (filter_gets(linestr, old_bufend_pos)) {
1349 got_some_for_debugger = 1;
1350 } else if (flags & LEX_NO_TERM) {
1353 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1356 /* End of real input. Close filehandle (unless it was STDIN),
1357 * then add implicit termination.
1359 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1360 PerlIO_clearerr(PL_parser->rsfp);
1361 else if (PL_parser->rsfp)
1362 (void)PerlIO_close(PL_parser->rsfp);
1363 PL_parser->rsfp = NULL;
1364 PL_parser->in_pod = PL_parser->filtered = 0;
1365 if (!PL_in_eval && PL_minus_p) {
1367 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1368 PL_minus_n = PL_minus_p = 0;
1369 } else if (!PL_in_eval && PL_minus_n) {
1370 sv_catpvs(linestr, /*{*/";}");
1373 sv_catpvs(linestr, ";");
1376 buf = SvPVX(linestr);
1377 new_bufend_pos = SvCUR(linestr);
1378 PL_parser->bufend = buf + new_bufend_pos;
1379 PL_parser->bufptr = buf + bufptr_pos;
1382 const U8* first_bad_char_loc;
1383 if (UNLIKELY(! is_utf8_string_loc(
1384 (U8 *) PL_parser->bufptr,
1385 PL_parser->bufend - PL_parser->bufptr,
1386 &first_bad_char_loc)))
1388 _force_out_malformed_utf8_message(first_bad_char_loc,
1389 (U8 *) PL_parser->bufend,
1391 1 /* 1 means die */ );
1392 NOT_REACHED; /* NOTREACHED */
1396 PL_parser->oldbufptr = buf + oldbufptr_pos;
1397 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1398 PL_parser->linestart = buf + linestart_pos;
1399 if (PL_parser->last_uni)
1400 PL_parser->last_uni = buf + last_uni_pos;
1401 if (PL_parser->last_lop)
1402 PL_parser->last_lop = buf + last_lop_pos;
1403 if (PL_parser->preambling != NOLINE) {
1404 CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1405 PL_parser->preambling = NOLINE;
1407 if ( got_some_for_debugger
1408 && PERLDB_LINE_OR_SAVESRC
1409 && PL_curstash != PL_debstash)
1411 /* debugger active and we're not compiling the debugger code,
1412 * so store the line into the debugger's array of lines
1414 update_debugger_info(NULL, buf+old_bufend_pos,
1415 new_bufend_pos-old_bufend_pos);
1421 =for apidoc lex_peek_unichar
1423 Looks ahead one (Unicode) character in the text currently being lexed.
1424 Returns the codepoint (unsigned integer value) of the next character,
1425 or -1 if lexing has reached the end of the input text. To consume the
1426 peeked character, use L</lex_read_unichar>.
1428 If the next character is in (or extends into) the next chunk of input
1429 text, the next chunk will be read in. Normally the current chunk will be
1430 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1431 bit set, then the current chunk will not be discarded.
1433 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1434 is encountered, an exception is generated.
1440 Perl_lex_peek_unichar(pTHX_ U32 flags)
1444 if (flags & ~(LEX_KEEP_PREVIOUS))
1445 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1446 s = PL_parser->bufptr;
1447 bufend = PL_parser->bufend;
1453 if (!lex_next_chunk(flags))
1455 s = PL_parser->bufptr;
1456 bufend = PL_parser->bufend;
1459 if (UTF8_IS_INVARIANT(head))
1461 if (UTF8_IS_START(head)) {
1462 len = UTF8SKIP(&head);
1463 while ((STRLEN)(bufend-s) < len) {
1464 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1466 s = PL_parser->bufptr;
1467 bufend = PL_parser->bufend;
1470 unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1471 if (retlen == (STRLEN)-1) {
1472 _force_out_malformed_utf8_message((U8 *) s,
1475 1 /* 1 means die */ );
1476 NOT_REACHED; /* NOTREACHED */
1481 if (!lex_next_chunk(flags))
1483 s = PL_parser->bufptr;
1490 =for apidoc lex_read_unichar
1492 Reads the next (Unicode) character in the text currently being lexed.
1493 Returns the codepoint (unsigned integer value) of the character read,
1494 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1495 if lexing has reached the end of the input text. To non-destructively
1496 examine the next character, use L</lex_peek_unichar> instead.
1498 If the next character is in (or extends into) the next chunk of input
1499 text, the next chunk will be read in. Normally the current chunk will be
1500 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1501 bit set, then the current chunk will not be discarded.
1503 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1504 is encountered, an exception is generated.
1510 Perl_lex_read_unichar(pTHX_ U32 flags)
1513 if (flags & ~(LEX_KEEP_PREVIOUS))
1514 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1515 c = lex_peek_unichar(flags);
1518 COPLINE_INC_WITH_HERELINES;
1520 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1522 ++(PL_parser->bufptr);
1528 =for apidoc lex_read_space
1530 Reads optional spaces, in Perl style, in the text currently being
1531 lexed. The spaces may include ordinary whitespace characters and
1532 Perl-style comments. C<#line> directives are processed if encountered.
1533 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1534 at a non-space character (or the end of the input text).
1536 If spaces extend into the next chunk of input text, the next chunk will
1537 be read in. Normally the current chunk will be discarded at the same
1538 time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
1539 chunk will not be discarded.
1544 #define LEX_NO_INCLINE 0x40000000
1545 #define LEX_NO_NEXT_CHUNK 0x80000000
1548 Perl_lex_read_space(pTHX_ U32 flags)
1551 const bool can_incline = !(flags & LEX_NO_INCLINE);
1552 bool need_incline = 0;
1553 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1554 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1555 s = PL_parser->bufptr;
1556 bufend = PL_parser->bufend;
1562 } while (!(c == '\n' || (c == 0 && s == bufend)));
1563 } else if (c == '\n') {
1566 PL_parser->linestart = s;
1572 } else if (isSPACE(c)) {
1574 } else if (c == 0 && s == bufend) {
1577 if (flags & LEX_NO_NEXT_CHUNK)
1579 PL_parser->bufptr = s;
1580 l = CopLINE(PL_curcop);
1581 CopLINE(PL_curcop) += PL_parser->herelines + 1;
1582 got_more = lex_next_chunk(flags);
1583 CopLINE_set(PL_curcop, l);
1584 s = PL_parser->bufptr;
1585 bufend = PL_parser->bufend;
1588 if (can_incline && need_incline && PL_parser->rsfp) {
1598 PL_parser->bufptr = s;
1603 =for apidoc validate_proto
1605 This function performs syntax checking on a prototype, C<proto>.
1606 If C<warn> is true, any illegal characters or mismatched brackets
1607 will trigger illegalproto warnings, declaring that they were
1608 detected in the prototype for C<name>.
1610 The return value is C<true> if this is a valid prototype, and
1611 C<false> if it is not, regardless of whether C<warn> was C<true> or
1614 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1621 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash)
1623 STRLEN len, origlen;
1625 bool bad_proto = FALSE;
1626 bool in_brackets = FALSE;
1627 bool after_slash = FALSE;
1628 char greedy_proto = ' ';
1629 bool proto_after_greedy_proto = FALSE;
1630 bool must_be_last = FALSE;
1631 bool underscore = FALSE;
1632 bool bad_proto_after_underscore = FALSE;
1634 PERL_ARGS_ASSERT_VALIDATE_PROTO;
1639 p = SvPV(proto, len);
1641 for (; len--; p++) {
1644 proto_after_greedy_proto = TRUE;
1646 if (!strchr(";@%", *p))
1647 bad_proto_after_underscore = TRUE;
1650 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1657 in_brackets = FALSE;
1658 else if ((*p == '@' || *p == '%')
1662 must_be_last = TRUE;
1671 after_slash = FALSE;
1676 SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1679 ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1680 origlen, UNI_DISPLAY_ISPRINT)
1681 : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1683 if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) {
1684 SV *name2 = sv_2mortal(newSVsv(PL_curstname));
1685 sv_catpvs(name2, "::");
1686 sv_catsv(name2, (SV *)name);
1690 if (proto_after_greedy_proto)
1691 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1692 "Prototype after '%c' for %" SVf " : %s",
1693 greedy_proto, SVfARG(name), p);
1695 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1696 "Missing ']' in prototype for %" SVf " : %s",
1699 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1700 "Illegal character in prototype for %" SVf " : %s",
1702 if (bad_proto_after_underscore)
1703 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1704 "Illegal character after '_' in prototype for %" SVf " : %s",
1708 return (! (proto_after_greedy_proto || bad_proto) );
1713 * This subroutine has nothing to do with tilting, whether at windmills
1714 * or pinball tables. Its name is short for "increment line". It
1715 * increments the current line number in CopLINE(PL_curcop) and checks
1716 * to see whether the line starts with a comment of the form
1717 * # line 500 "foo.pm"
1718 * If so, it sets the current line number and file to the values in the comment.
1722 S_incline(pTHX_ const char *s, const char *end)
1730 PERL_ARGS_ASSERT_INCLINE;
1734 COPLINE_INC_WITH_HERELINES;
1735 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1736 && s+1 == PL_bufend && *s == ';') {
1737 /* fake newline in string eval */
1738 CopLINE_dec(PL_curcop);
1743 while (SPACE_OR_TAB(*s))
1745 if (memBEGINs(s, (STRLEN) (end - s), "line"))
1746 s += sizeof("line") - 1;
1749 if (SPACE_OR_TAB(*s))
1753 while (SPACE_OR_TAB(*s))
1761 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1763 while (SPACE_OR_TAB(*s))
1765 if (*s == '"' && (t = (char *) memchr(s+1, '"', end - s))) {
1771 while (*t && !isSPACE(*t))
1775 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1777 if (*e != '\n' && *e != '\0')
1778 return; /* false alarm */
1780 if (!grok_atoUV(n, &uv, &e))
1782 line_num = ((line_t)uv) - 1;
1785 const STRLEN len = t - s;
1787 if (!PL_rsfp && !PL_parser->filtered) {
1788 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1789 * to *{"::_<newfilename"} */
1790 /* However, the long form of evals is only turned on by the
1791 debugger - usually they're "(eval %lu)" */
1792 GV * const cfgv = CopFILEGV(PL_curcop);
1795 STRLEN tmplen2 = len;
1799 if (tmplen2 + 2 <= sizeof smallbuf)
1802 Newx(tmpbuf2, tmplen2 + 2, char);
1807 memcpy(tmpbuf2 + 2, s, tmplen2);
1810 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1812 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1813 /* adjust ${"::_<newfilename"} to store the new file name */
1814 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1815 /* The line number may differ. If that is the case,
1816 alias the saved lines that are in the array.
1817 Otherwise alias the whole array. */
1818 if (CopLINE(PL_curcop) == line_num) {
1819 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1820 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1822 else if (GvAV(cfgv)) {
1823 AV * const av = GvAV(cfgv);
1824 const line_t start = CopLINE(PL_curcop)+1;
1825 SSize_t items = AvFILLp(av) - start;
1827 AV * const av2 = GvAVn(gv2);
1828 SV **svp = AvARRAY(av) + start;
1829 Size_t l = line_num+1;
1830 while (items-- && l < SSize_t_MAX && l == (line_t)l)
1831 av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++));
1836 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1839 CopFILE_free(PL_curcop);
1840 CopFILE_setn(PL_curcop, s, len);
1842 CopLINE_set(PL_curcop, line_num);
1846 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1848 AV *av = CopFILEAVx(PL_curcop);
1851 if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1853 sv = *av_fetch(av, 0, 1);
1854 SvUPGRADE(sv, SVt_PVMG);
1856 if (!SvPOK(sv)) SvPVCLEAR(sv);
1858 sv_catsv(sv, orig_sv);
1860 sv_catpvn(sv, buf, len);
1865 if (PL_parser->preambling == NOLINE)
1866 av_store(av, CopLINE(PL_curcop), sv);
1872 * Called to gobble the appropriate amount and type of whitespace.
1873 * Skips comments as well.
1874 * Returns the next character after the whitespace that is skipped.
1877 * Same thing, but look ahead without incrementing line numbers or
1878 * adjusting PL_linestart.
1881 #define skipspace(s) skipspace_flags(s, 0)
1882 #define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
1885 Perl_skipspace_flags(pTHX_ char *s, U32 flags)
1887 PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1888 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1889 while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
1892 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1894 lex_read_space(flags | LEX_KEEP_PREVIOUS |
1895 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
1896 LEX_NO_NEXT_CHUNK : 0));
1898 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1899 if (PL_linestart > PL_bufptr)
1900 PL_bufptr = PL_linestart;
1908 * Check the unary operators to ensure there's no ambiguity in how they're
1909 * used. An ambiguous piece of code would be:
1911 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1912 * the +5 is its argument.
1920 if (PL_oldoldbufptr != PL_last_uni)
1922 while (isSPACE(*PL_last_uni))
1925 while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
1926 s += UTF ? UTF8SKIP(s) : 1;
1927 if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s))
1930 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1931 "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
1932 UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
1936 * LOP : macro to build a list operator. Its behaviour has been replaced
1937 * with a subroutine, S_lop() for which LOP is just another name.
1940 #define LOP(f,x) return lop(f,x,s)
1944 * Build a list operator (or something that might be one). The rules:
1945 * - if we have a next token, then it's a list operator (no parens) for
1946 * which the next token has already been parsed; e.g.,
1949 * - if the next thing is an opening paren, then it's a function
1950 * - else it's a list operator
1954 S_lop(pTHX_ I32 f, U8 x, char *s)
1956 PERL_ARGS_ASSERT_LOP;
1961 PL_last_lop = PL_oldbufptr;
1962 PL_last_lop_op = (OPCODE)f;
1967 return REPORT(FUNC);
1970 return REPORT(FUNC);
1973 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1974 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1975 return REPORT(LSTOP);
1981 * When the lexer realizes it knows the next token (for instance,
1982 * it is reordering tokens for the parser) then it can call S_force_next
1983 * to know what token to return the next time the lexer is called. Caller
1984 * will need to set PL_nextval[] and possibly PL_expect to ensure
1985 * the lexer handles the token correctly.
1989 S_force_next(pTHX_ I32 type)
1993 PerlIO_printf(Perl_debug_log, "### forced token:\n");
1994 tokereport(type, &NEXTVAL_NEXTTOKE);
1997 assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
1998 PL_nexttype[PL_nexttoke] = type;
2005 * This subroutine handles postfix deref syntax after the arrow has already
2006 * been emitted. @* $* etc. are emitted as two separate tokens right here.
2007 * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2008 * only the first, leaving yylex to find the next.
2012 S_postderef(pTHX_ int const funny, char const next)
2014 assert(funny == DOLSHARP || strchr("$@%&*", funny));
2016 PL_expect = XOPERATOR;
2017 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2018 assert('@' == funny || '$' == funny || DOLSHARP == funny);
2019 PL_lex_state = LEX_INTERPEND;
2021 force_next(POSTJOIN);
2027 if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
2028 && !PL_lex_brackets)
2030 PL_expect = XOPERATOR;
2039 int yyc = PL_parser->yychar;
2040 if (yyc != YYEMPTY) {
2042 NEXTVAL_NEXTTOKE = PL_parser->yylval;
2043 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2044 PL_lex_allbrackets--;
2046 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2047 } else if (yyc == '('/*)*/) {
2048 PL_lex_allbrackets--;
2053 PL_parser->yychar = YYEMPTY;
2058 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2060 SV * const sv = newSVpvn_utf8(start, len,
2064 && is_utf8_non_invariant_string((const U8*)start, len));
2070 * When the lexer knows the next thing is a word (for instance, it has
2071 * just seen -> and it knows that the next char is a word char, then
2072 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2076 * char *start : buffer position (must be within PL_linestr)
2077 * int token : PL_next* will be this type of bare word
2078 * (e.g., METHOD,BAREWORD)
2079 * int check_keyword : if true, Perl checks to make sure the word isn't
2080 * a keyword (do this if the word is a label, e.g. goto FOO)
2081 * int allow_pack : if true, : characters will also be allowed (require,
2082 * use, etc. do this)
2086 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2091 PERL_ARGS_ASSERT_FORCE_WORD;
2093 start = skipspace(start);
2095 if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
2096 || (allow_pack && *s == ':' && s[1] == ':') )
2098 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2099 if (check_keyword) {
2100 char *s2 = PL_tokenbuf;
2102 if (allow_pack && memBEGINPs(s2, len, "CORE::")) {
2103 s2 += sizeof("CORE::") - 1;
2104 len2 -= sizeof("CORE::") - 1;
2106 if (keyword(s2, len2, 0))
2109 if (token == METHOD) {
2114 PL_expect = XOPERATOR;
2117 NEXTVAL_NEXTTOKE.opval
2118 = newSVOP(OP_CONST,0,
2119 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2120 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2128 * Called when the lexer wants $foo *foo &foo etc, but the program
2129 * text only contains the "foo" portion. The first argument is a pointer
2130 * to the "foo", and the second argument is the type symbol to prefix.
2131 * Forces the next token to be a "BAREWORD".
2132 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2136 S_force_ident(pTHX_ const char *s, int kind)
2138 PERL_ARGS_ASSERT_FORCE_IDENT;
2141 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2142 OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2143 UTF ? SVf_UTF8 : 0));
2144 NEXTVAL_NEXTTOKE.opval = o;
2145 force_next(BAREWORD);
2147 o->op_private = OPpCONST_ENTERED;
2148 /* XXX see note in pp_entereval() for why we forgo typo
2149 warnings if the symbol must be introduced in an eval.
2151 gv_fetchpvn_flags(s, len,
2152 (PL_in_eval ? GV_ADDMULTI
2153 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2154 kind == '$' ? SVt_PV :
2155 kind == '@' ? SVt_PVAV :
2156 kind == '%' ? SVt_PVHV :
2164 S_force_ident_maybe_lex(pTHX_ char pit)
2166 NEXTVAL_NEXTTOKE.ival = pit;
2171 Perl_str_to_version(pTHX_ SV *sv)
2176 const char *start = SvPV_const(sv,len);
2177 const char * const end = start + len;
2178 const bool utf = cBOOL(SvUTF8(sv));
2180 PERL_ARGS_ASSERT_STR_TO_VERSION;
2182 while (start < end) {
2186 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2191 retval += ((NV)n)/nshift;
2200 * Forces the next token to be a version number.
2201 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2202 * and if "guessing" is TRUE, then no new token is created (and the caller
2203 * must use an alternative parsing method).
2207 S_force_version(pTHX_ char *s, int guessing)
2212 PERL_ARGS_ASSERT_FORCE_VERSION;
2220 while (isDIGIT(*d) || *d == '_' || *d == '.')
2222 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2224 s = scan_num(s, &pl_yylval);
2225 version = pl_yylval.opval;
2226 ver = cSVOPx(version)->op_sv;
2227 if (SvPOK(ver) && !SvNIOK(ver)) {
2228 SvUPGRADE(ver, SVt_PVNV);
2229 SvNV_set(ver, str_to_version(ver));
2230 SvNOK_on(ver); /* hint that it is a version */
2233 else if (guessing) {
2238 /* NOTE: The parser sees the package name and the VERSION swapped */
2239 NEXTVAL_NEXTTOKE.opval = version;
2240 force_next(BAREWORD);
2246 * S_force_strict_version
2247 * Forces the next token to be a version number using strict syntax rules.
2251 S_force_strict_version(pTHX_ char *s)
2254 const char *errstr = NULL;
2256 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2258 while (isSPACE(*s)) /* leading whitespace */
2261 if (is_STRICT_VERSION(s,&errstr)) {
2263 s = (char *)scan_version(s, ver, 0);
2264 version = newSVOP(OP_CONST, 0, ver);
2266 else if ((*s != ';' && *s != '{' && *s != '}' )
2267 && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
2271 yyerror(errstr); /* version required */
2275 /* NOTE: The parser sees the package name and the VERSION swapped */
2276 NEXTVAL_NEXTTOKE.opval = version;
2277 force_next(BAREWORD);
2284 * Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv',
2285 * modified as necessary. However, if HINT_NEW_STRING is on, 'sv' is
2286 * unchanged, and a new SV containing the modified input is returned.
2290 S_tokeq(pTHX_ SV *sv)
2297 PERL_ARGS_ASSERT_TOKEQ;
2301 assert (!SvIsCOW(sv));
2302 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2306 /* This is relying on the SV being "well formed" with a trailing '\0' */
2307 while (s < send && !(*s == '\\' && s[1] == '\\'))
2312 if ( PL_hints & HINT_NEW_STRING ) {
2313 pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2314 SVs_TEMP | SvUTF8(sv));
2318 if (s + 1 < send && (s[1] == '\\'))
2319 s++; /* all that, just for this */
2324 SvCUR_set(sv, d - SvPVX_const(sv));
2326 if ( PL_hints & HINT_NEW_STRING )
2327 return new_constant(NULL, 0, "q", sv, pv, "q", 1, NULL);
2332 * Now come three functions related to double-quote context,
2333 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2334 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2335 * interact with PL_lex_state, and create fake ( ... ) argument lists
2336 * to handle functions and concatenation.
2340 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
2345 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2347 * Pattern matching will set PL_lex_op to the pattern-matching op to
2348 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2350 * OP_CONST is easy--just make the new op and return.
2352 * Everything else becomes a FUNC.
2354 * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we
2355 * had an OP_CONST. This just sets us up for a
2356 * call to S_sublex_push().
2360 S_sublex_start(pTHX)
2362 const I32 op_type = pl_yylval.ival;
2364 if (op_type == OP_NULL) {
2365 pl_yylval.opval = PL_lex_op;
2369 if (op_type == OP_CONST) {
2370 SV *sv = PL_lex_stuff;
2371 PL_lex_stuff = NULL;
2374 if (SvTYPE(sv) == SVt_PVIV) {
2375 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2377 const char * const p = SvPV_const(sv, len);
2378 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2382 pl_yylval.opval = newSVOP(op_type, 0, sv);
2386 PL_parser->lex_super_state = PL_lex_state;
2387 PL_parser->lex_sub_inwhat = (U16)op_type;
2388 PL_parser->lex_sub_op = PL_lex_op;
2389 PL_parser->sub_no_recover = FALSE;
2390 PL_parser->sub_error_count = PL_error_count;
2391 PL_lex_state = LEX_INTERPPUSH;
2395 pl_yylval.opval = PL_lex_op;
2405 * Create a new scope to save the lexing state. The scope will be
2406 * ended in S_sublex_done. Returns a '(', starting the function arguments
2407 * to the uc, lc, etc. found before.
2408 * Sets PL_lex_state to LEX_INTERPCONCAT.
2415 const bool is_heredoc = PL_multi_close == '<';
2418 PL_lex_state = PL_parser->lex_super_state;
2419 SAVEI8(PL_lex_dojoin);
2420 SAVEI32(PL_lex_brackets);
2421 SAVEI32(PL_lex_allbrackets);
2422 SAVEI32(PL_lex_formbrack);
2423 SAVEI8(PL_lex_fakeeof);
2424 SAVEI32(PL_lex_casemods);
2425 SAVEI32(PL_lex_starts);
2426 SAVEI8(PL_lex_state);
2427 SAVESPTR(PL_lex_repl);
2428 SAVEVPTR(PL_lex_inpat);
2429 SAVEI16(PL_lex_inwhat);
2432 SAVECOPLINE(PL_curcop);
2433 SAVEI32(PL_multi_end);
2434 SAVEI32(PL_parser->herelines);
2435 PL_parser->herelines = 0;
2437 SAVEIV(PL_multi_close);
2438 SAVEPPTR(PL_bufptr);
2439 SAVEPPTR(PL_bufend);
2440 SAVEPPTR(PL_oldbufptr);
2441 SAVEPPTR(PL_oldoldbufptr);
2442 SAVEPPTR(PL_last_lop);
2443 SAVEPPTR(PL_last_uni);
2444 SAVEPPTR(PL_linestart);
2445 SAVESPTR(PL_linestr);
2446 SAVEGENERICPV(PL_lex_brackstack);
2447 SAVEGENERICPV(PL_lex_casestack);
2448 SAVEGENERICPV(PL_parser->lex_shared);
2449 SAVEBOOL(PL_parser->lex_re_reparsing);
2450 SAVEI32(PL_copline);
2452 /* The here-doc parser needs to be able to peek into outer lexing
2453 scopes to find the body of the here-doc. So we put PL_linestr and
2454 PL_bufptr into lex_shared, to ‘share’ those values.
2456 PL_parser->lex_shared->ls_linestr = PL_linestr;
2457 PL_parser->lex_shared->ls_bufptr = PL_bufptr;
2459 PL_linestr = PL_lex_stuff;
2460 PL_lex_repl = PL_parser->lex_sub_repl;
2461 PL_lex_stuff = NULL;
2462 PL_parser->lex_sub_repl = NULL;
2464 /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2465 set for an inner quote-like operator and then an error causes scope-
2466 popping. We must not have a PL_lex_stuff value left dangling, as
2467 that breaks assumptions elsewhere. See bug #123617. */
2468 SAVEGENERICSV(PL_lex_stuff);
2469 SAVEGENERICSV(PL_parser->lex_sub_repl);
2471 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2472 = SvPVX(PL_linestr);
2473 PL_bufend += SvCUR(PL_linestr);
2474 PL_last_lop = PL_last_uni = NULL;
2475 SAVEFREESV(PL_linestr);
2476 if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2478 PL_lex_dojoin = FALSE;
2479 PL_lex_brackets = PL_lex_formbrack = 0;
2480 PL_lex_allbrackets = 0;
2481 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2482 Newx(PL_lex_brackstack, 120, char);
2483 Newx(PL_lex_casestack, 12, char);
2484 PL_lex_casemods = 0;
2485 *PL_lex_casestack = '\0';
2487 PL_lex_state = LEX_INTERPCONCAT;
2489 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2490 PL_copline = NOLINE;
2492 Newxz(shared, 1, LEXSHARED);
2493 shared->ls_prev = PL_parser->lex_shared;
2494 PL_parser->lex_shared = shared;
2496 PL_lex_inwhat = PL_parser->lex_sub_inwhat;
2497 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2498 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2499 PL_lex_inpat = PL_parser->lex_sub_op;
2501 PL_lex_inpat = NULL;
2503 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2504 PL_in_eval &= ~EVAL_RE_REPARSING;
2511 * Restores lexer state after a S_sublex_push.
2517 if (!PL_lex_starts++) {
2518 SV * const sv = newSVpvs("");
2519 if (SvUTF8(PL_linestr))
2521 PL_expect = XOPERATOR;
2522 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
2526 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2527 PL_lex_state = LEX_INTERPCASEMOD;
2531 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2532 assert(PL_lex_inwhat != OP_TRANSR);
2534 assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2535 PL_linestr = PL_lex_repl;
2537 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2538 PL_bufend += SvCUR(PL_linestr);
2539 PL_last_lop = PL_last_uni = NULL;
2540 PL_lex_dojoin = FALSE;
2541 PL_lex_brackets = 0;
2542 PL_lex_allbrackets = 0;
2543 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2544 PL_lex_casemods = 0;
2545 *PL_lex_casestack = '\0';
2547 if (SvEVALED(PL_lex_repl)) {
2548 PL_lex_state = LEX_INTERPNORMAL;
2550 /* we don't clear PL_lex_repl here, so that we can check later
2551 whether this is an evalled subst; that means we rely on the
2552 logic to ensure sublex_done() is called again only via the
2553 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2556 PL_lex_state = LEX_INTERPCONCAT;
2559 if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2560 CopLINE(PL_curcop) +=
2561 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines
2562 + PL_parser->herelines;
2563 PL_parser->herelines = 0;
2568 const line_t l = CopLINE(PL_curcop);
2570 if (PL_parser->sub_error_count != PL_error_count) {
2571 if (PL_parser->sub_no_recover) {
2576 if (PL_multi_close == '<')
2577 PL_parser->herelines += l - PL_multi_end;
2578 PL_bufend = SvPVX(PL_linestr);
2579 PL_bufend += SvCUR(PL_linestr);
2580 PL_expect = XOPERATOR;
2586 S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e)
2588 /* This justs wraps get_and_check_backslash_N_name() to output any error
2589 * message it returns. */
2591 const char * error_msg = NULL;
2594 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER;
2596 /* charnames doesn't work well if there have been errors found */
2597 if (PL_error_count > 0) {
2601 result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg);
2604 yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0);
2611 Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
2612 const char* const e,
2614 const char ** error_msg)
2616 /* <s> points to first character of interior of \N{}, <e> to one beyond the
2617 * interior, hence to the "}". Finds what the name resolves to, returning
2618 * an SV* containing it; NULL if no valid one found.
2620 * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it
2621 * doesn't have to be. */
2629 const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2632 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2635 assert(s > (char *) 3);
2637 res = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0);
2640 SvREFCNT_dec_NN(res);
2641 /* diag_listed_as: Unknown charname '%s' */
2642 *error_msg = Perl_form(aTHX_ "Unknown charname ''");
2646 res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2647 /* include the <}> */
2648 e - backslash_ptr + 1, error_msg);
2650 SvREFCNT_dec_NN(res);
2654 /* See if the charnames handler is the Perl core's, and if so, we can skip
2655 * the validation needed for a user-supplied one, as Perl's does its own
2657 table = GvHV(PL_hintgv); /* ^H */
2658 cvp = hv_fetchs(table, "charnames", FALSE);
2659 if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2660 SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2662 const char * const name = HvNAME(stash);
2663 if (memEQs(name, HvNAMELEN(stash), "_charnames")) {
2668 /* Here, it isn't Perl's charname handler. We can't rely on a
2669 * user-supplied handler to validate the input name. For non-ut8 input,
2670 * look to see that the first character is legal. Then loop through the
2671 * rest checking that each is a continuation */
2673 /* This code makes the reasonable assumption that the only Latin1-range
2674 * characters that begin a character name alias are alphabetic, otherwise
2675 * would have to create a isCHARNAME_BEGIN macro */
2678 if (! isALPHAU(*s)) {
2683 if (! isCHARNAME_CONT(*s)) {
2686 if (*s == ' ' && *(s-1) == ' ') {
2693 /* Similarly for utf8. For invariants can check directly; for other
2694 * Latin1, can calculate their code point and check; otherwise use an
2696 if (UTF8_IS_INVARIANT(*s)) {
2697 if (! isALPHAU(*s)) {
2701 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2702 if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
2708 if (! _invlist_contains_cp(PL_utf8_charname_begin,
2709 utf8_to_uvchr_buf((U8 *) s,
2719 if (UTF8_IS_INVARIANT(*s)) {
2720 if (! isCHARNAME_CONT(*s)) {
2723 if (*s == ' ' && *(s-1) == ' ') {
2728 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2729 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
2736 if (! _invlist_contains_cp(PL_utf8_charname_continue,
2737 utf8_to_uvchr_buf((U8 *) s,
2747 if (*(s-1) == ' ') {
2748 /* diag_listed_as: charnames alias definitions may not contain
2749 trailing white-space; marked by <-- HERE in %s
2751 *error_msg = Perl_form(aTHX_
2752 "charnames alias definitions may not contain trailing "
2753 "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2754 (int)(s - backslash_ptr + 1), backslash_ptr,
2755 (int)(e - s + 1), s + 1);
2759 if (SvUTF8(res)) { /* Don't accept malformed charname value */
2760 const U8* first_bad_char_loc;
2762 const char* const str = SvPV_const(res, len);
2763 if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len,
2764 &first_bad_char_loc)))
2766 _force_out_malformed_utf8_message(first_bad_char_loc,
2767 (U8 *) PL_parser->bufend,
2769 0 /* 0 means don't die */ );
2770 /* diag_listed_as: Malformed UTF-8 returned by \N{%s}
2771 immediately after '%s' */
2772 *error_msg = Perl_form(aTHX_
2773 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2774 (int) (e - backslash_ptr + 1), backslash_ptr,
2775 (int) ((char *) first_bad_char_loc - str), str);
2784 /* The final %.*s makes sure that should the trailing NUL be missing
2785 * that this print won't run off the end of the string */
2786 /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
2788 *error_msg = Perl_form(aTHX_
2789 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2790 (int)(s - backslash_ptr + 1), backslash_ptr,
2791 (int)(e - s + 1), s + 1);
2796 /* diag_listed_as: charnames alias definitions may not contain a
2797 sequence of multiple spaces; marked by <-- HERE
2799 *error_msg = Perl_form(aTHX_
2800 "charnames alias definitions may not contain a sequence of "
2801 "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
2802 (int)(s - backslash_ptr + 1), backslash_ptr,
2803 (int)(e - s + 1), s + 1);
2810 Extracts the next constant part of a pattern, double-quoted string,
2811 or transliteration. This is terrifying code.
2813 For example, in parsing the double-quoted string "ab\x63$d", it would
2814 stop at the '$' and return an OP_CONST containing 'abc'.
2816 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2817 processing a pattern (PL_lex_inpat is true), a transliteration
2818 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2820 Returns a pointer to the character scanned up to. If this is
2821 advanced from the start pointer supplied (i.e. if anything was
2822 successfully parsed), will leave an OP_CONST for the substring scanned
2823 in pl_yylval. Caller must intuit reason for not parsing further
2824 by looking at the next characters herself.
2828 \N{FOO} => \N{U+hex_for_character_FOO}
2829 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2832 all other \-char, including \N and \N{ apart from \N{ABC}
2835 @ and $ where it appears to be a var, but not for $ as tail anchor
2839 In transliterations:
2840 characters are VERY literal, except for - not at the start or end
2841 of the string, which indicates a range. However some backslash sequences
2842 are recognized: \r, \n, and the like
2843 \007 \o{}, \x{}, \N{}
2844 If all elements in the transliteration are below 256,
2845 scan_const expands the range to the full set of intermediate
2846 characters. If the range is in utf8, the hyphen is replaced with
2847 a certain range mark which will be handled by pmtrans() in op.c.
2849 In double-quoted strings:
2851 all those recognized in transliterations
2852 deprecated backrefs: \1 (in substitution replacements)
2853 case and quoting: \U \Q \E
2856 scan_const does *not* construct ops to handle interpolated strings.
2857 It stops processing as soon as it finds an embedded $ or @ variable
2858 and leaves it to the caller to work out what's going on.
2860 embedded arrays (whether in pattern or not) could be:
2861 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2863 $ in double-quoted strings must be the symbol of an embedded scalar.
2865 $ in pattern could be $foo or could be tail anchor. Assumption:
2866 it's a tail anchor if $ is the last thing in the string, or if it's
2867 followed by one of "()| \r\n\t"
2869 \1 (backreferences) are turned into $1 in substitutions
2871 The structure of the code is
2872 while (there's a character to process) {
2873 handle transliteration ranges
2874 skip regexp comments /(?#comment)/ and codes /(?{code})/
2875 skip #-initiated comments in //x patterns
2876 check for embedded arrays
2877 check for embedded scalars
2879 deprecate \1 in substitution replacements
2880 handle string-changing backslashes \l \U \Q \E, etc.
2881 switch (what was escaped) {
2882 handle \- in a transliteration (becomes a literal -)
2883 if a pattern and not \N{, go treat as regular character
2884 handle \132 (octal characters)
2885 handle \x15 and \x{1234} (hex characters)
2886 handle \N{name} (named characters, also \N{3,5} in a pattern)
2887 handle \cV (control characters)
2888 handle printf-style backslashes (\f, \r, \n, etc)
2891 } (end if backslash)
2892 handle regular character
2893 } (end while character to read)
2898 S_scan_const(pTHX_ char *start)
2900 char *send = PL_bufend; /* end of the constant */
2901 SV *sv = newSV(send - start); /* sv for the constant. See note below
2903 char *s = start; /* start of the constant */
2904 char *d = SvPVX(sv); /* destination for copies */
2905 bool dorange = FALSE; /* are we in a translit range? */
2906 bool didrange = FALSE; /* did we just finish a range? */
2907 bool in_charclass = FALSE; /* within /[...]/ */
2908 bool s_is_utf8 = cBOOL(UTF); /* Is the source string assumed to be
2909 UTF8? But, this can show as true
2910 when the source isn't utf8, as for
2911 example when it is entirely composed
2913 bool d_is_utf8 = FALSE; /* Output constant is UTF8 */
2914 STRLEN utf8_variant_count = 0; /* When not in UTF-8, this counts the
2915 number of characters found so far
2916 that will expand (into 2 bytes)
2917 should we have to convert to
2919 SV *res; /* result from charnames */
2920 STRLEN offset_to_max = 0; /* The offset in the output to where the range
2921 high-end character is temporarily placed */
2923 /* Does something require special handling in tr/// ? This avoids extra
2924 * work in a less likely case. As such, khw didn't feel it was worth
2925 * adding any branches to the more mainline code to handle this, which
2926 * means that this doesn't get set in some circumstances when things like
2927 * \x{100} get expanded out. As a result there needs to be extra testing
2928 * done in the tr code */
2929 bool has_above_latin1 = FALSE;
2931 /* Note on sizing: The scanned constant is placed into sv, which is
2932 * initialized by newSV() assuming one byte of output for every byte of
2933 * input. This routine expects newSV() to allocate an extra byte for a
2934 * trailing NUL, which this routine will append if it gets to the end of
2935 * the input. There may be more bytes of input than output (eg., \N{LATIN
2936 * CAPITAL LETTER A}), or more output than input if the constant ends up
2937 * recoded to utf8, but each time a construct is found that might increase
2938 * the needed size, SvGROW() is called. Its size parameter each time is
2939 * based on the best guess estimate at the time, namely the length used so
2940 * far, plus the length the current construct will occupy, plus room for
2941 * the trailing NUL, plus one byte for every input byte still unscanned */
2943 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
2946 int backslash_N = 0; /* ? was the character from \N{} */
2947 int non_portable_endpoint = 0; /* ? In a range is an endpoint
2948 platform-specific like \x65 */
2951 PERL_ARGS_ASSERT_SCAN_CONST;
2953 assert(PL_lex_inwhat != OP_TRANSR);
2955 /* Protect sv from errors and fatal warnings. */
2956 ENTER_with_name("scan_const");
2959 /* A bunch of code in the loop below assumes that if s[n] exists and is not
2960 * NUL, then s[n+1] exists. This assertion makes sure that assumption is
2962 assert(*send == '\0');
2965 || dorange /* Handle tr/// range at right edge of input */
2968 /* get transliterations out of the way (they're most literal) */
2969 if (PL_lex_inwhat == OP_TRANS) {
2971 /* But there isn't any special handling necessary unless there is a
2972 * range, so for most cases we just drop down and handle the value
2973 * as any other. There are two exceptions.
2975 * 1. A hyphen indicates that we are actually going to have a
2976 * range. In this case, skip the '-', set a flag, then drop
2977 * down to handle what should be the end range value.
2978 * 2. After we've handled that value, the next time through, that
2979 * flag is set and we fix up the range.
2981 * Ranges entirely within Latin1 are expanded out entirely, in
2982 * order to make the transliteration a simple table look-up.
2983 * Ranges that extend above Latin1 have to be done differently, so
2984 * there is no advantage to expanding them here, so they are
2985 * stored here as Min, RANGE_INDICATOR, Max. 'RANGE_INDICATOR' is
2986 * a byte that can't occur in legal UTF-8, and hence can signify a
2987 * hyphen without any possible ambiguity. On EBCDIC machines, if
2988 * the range is expressed as Unicode, the Latin1 portion is
2989 * expanded out even if the range extends above Latin1. This is
2990 * because each code point in it has to be processed here
2991 * individually to get its native translation */
2995 /* Here, we don't think we're in a range. If the new character
2996 * is not a hyphen; or if it is a hyphen, but it's too close to
2997 * either edge to indicate a range, or if we haven't output any
2998 * characters yet then it's a regular character. */
2999 if (*s != '-' || s >= send - 1 || s == start || d == SvPVX(sv))
3002 /* A regular character. Process like any other, but first
3003 * clear any flags */
3007 non_portable_endpoint = 0;
3010 /* The tests here for being above Latin1 and similar ones
3011 * in the following 'else' suffice to find all such
3012 * occurences in the constant, except those added by a
3013 * backslash escape sequence, like \x{100}. Mostly, those
3014 * set 'has_above_latin1' as appropriate */
3015 if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3016 has_above_latin1 = TRUE;
3019 /* Drops down to generic code to process current byte */
3021 else { /* Is a '-' in the context where it means a range */
3022 if (didrange) { /* Something like y/A-C-Z// */
3023 Perl_croak(aTHX_ "Ambiguous range in transliteration"
3029 s++; /* Skip past the hyphen */
3031 /* d now points to where the end-range character will be
3032 * placed. Drop down to get that character. We'll finish
3033 * processing the range the next time through the loop */
3035 if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3036 has_above_latin1 = TRUE;
3039 /* Drops down to generic code to process current byte */
3041 } /* End of not a range */
3043 /* Here we have parsed a range. Now must handle it. At this
3045 * 'sv' is a SV* that contains the output string we are
3046 * constructing. The final two characters in that string
3047 * are the range start and range end, in order.
3048 * 'd' points to just beyond the range end in the 'sv' string,
3049 * where we would next place something
3054 IV range_max; /* last character in range */
3056 Size_t offset_to_min = 0;
3059 bool convert_unicode;
3060 IV real_range_max = 0;
3062 /* Get the code point values of the range ends. */
3063 max_ptr = (d_is_utf8) ? (char *) utf8_hop( (U8*) d, -1) : d - 1;
3064 offset_to_max = max_ptr - SvPVX_const(sv);
3066 /* We know the utf8 is valid, because we just constructed
3067 * it ourselves in previous loop iterations */
3068 min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
3069 range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
3070 range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
3072 /* This compensates for not all code setting
3073 * 'has_above_latin1', so that we don't skip stuff that
3074 * should be executed */
3075 if (range_max > 255) {
3076 has_above_latin1 = TRUE;
3080 min_ptr = max_ptr - 1;
3081 range_min = * (U8*) min_ptr;
3082 range_max = * (U8*) max_ptr;
3085 /* If the range is just a single code point, like tr/a-a/.../,
3086 * that code point is already in the output, twice. We can
3087 * just back up over the second instance and avoid all the rest
3088 * of the work. But if it is a variant character, it's been
3089 * counted twice, so decrement. (This unlikely scenario is
3090 * special cased, like the one for a range of 2 code points
3091 * below, only because the main-line code below needs a range
3092 * of 3 or more to work without special casing. Might as well
3093 * get it out of the way now.) */
3094 if (UNLIKELY(range_max == range_min)) {
3096 if (! d_is_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
3097 utf8_variant_count--;
3103 /* On EBCDIC platforms, we may have to deal with portable
3104 * ranges. These happen if at least one range endpoint is a
3105 * Unicode value (\N{...}), or if the range is a subset of
3106 * [A-Z] or [a-z], and both ends are literal characters,
3107 * like 'A', and not like \x{C1} */
3109 cBOOL(backslash_N) /* \N{} forces Unicode,
3110 hence portable range */
3111 || ( ! non_portable_endpoint
3112 && (( isLOWER_A(range_min) && isLOWER_A(range_max))
3113 || (isUPPER_A(range_min) && isUPPER_A(range_max))));
3114 if (convert_unicode) {
3116 /* Special handling is needed for these portable ranges.
3117 * They are defined to be in Unicode terms, which includes
3118 * all the Unicode code points between the end points.
3119 * Convert to Unicode to get the Unicode range. Later we
3120 * will convert each code point in the range back to
3122 range_min = NATIVE_TO_UNI(range_min);
3123 range_max = NATIVE_TO_UNI(range_max);
3127 if (range_min > range_max) {
3129 if (convert_unicode) {
3130 /* Need to convert back to native for meaningful
3131 * messages for this platform */
3132 range_min = UNI_TO_NATIVE(range_min);
3133 range_max = UNI_TO_NATIVE(range_max);
3136 /* Use the characters themselves for the error message if
3137 * ASCII printables; otherwise some visible representation
3139 if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
3141 "Invalid range \"%c-%c\" in transliteration operator",
3142 (char)range_min, (char)range_max);
3145 else if (convert_unicode) {
3146 /* diag_listed_as: Invalid range "%s" in transliteration operator */
3148 "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04"
3149 UVXf "}\" in transliteration operator",
3150 range_min, range_max);
3154 /* diag_listed_as: Invalid range "%s" in transliteration operator */
3156 "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
3157 " in transliteration operator",
3158 range_min, range_max);
3162 /* If the range is exactly two code points long, they are
3163 * already both in the output */
3164 if (UNLIKELY(range_min + 1 == range_max)) {
3168 /* Here the range contains at least 3 code points */
3172 /* If everything in the transliteration is below 256, we
3173 * can avoid special handling later. A translation table
3174 * for each of those bytes is created by op.c. So we
3175 * expand out all ranges to their constituent code points.
3176 * But if we've encountered something above 255, the
3177 * expanding won't help, so skip doing that. But if it's
3178 * EBCDIC, we may have to look at each character below 256
3179 * if we have to convert to/from Unicode values */
3180 if ( has_above_latin1
3182 && (range_min > 255 || ! convert_unicode)
3185 const STRLEN off = d - SvPVX(sv);
3186 const STRLEN extra = 1 + (send - s) + 1;
3189 /* Move the high character one byte to the right; then
3190 * insert between it and the range begin, an illegal
3191 * byte which serves to indicate this is a range (using
3192 * a '-' would be ambiguous). */
3194 if (off + extra > SvLEN(sv)) {
3195 d = off + SvGROW(sv, off + extra);
3196 max_ptr = d - off + offset_to_max;
3200 while (e-- > max_ptr) {
3203 *(e + 1) = (char) RANGE_INDICATOR;
3207 /* Here, we're going to expand out the range. For EBCDIC
3208 * the range can extend above 255 (not so in ASCII), so
3209 * for EBCDIC, split it into the parts above and below
3212 if (range_max > 255) {
3213 real_range_max = range_max;
3219 /* Here we need to expand out the string to contain each
3220 * character in the range. Grow the output to handle this.
3221 * For non-UTF8, we need a byte for each code point in the
3222 * range, minus the three that we've already allocated for: the
3223 * hyphen, the min, and the max. For UTF-8, we need this
3224 * plus an extra byte for each code point that occupies two
3225 * bytes (is variant) when in UTF-8 (except we've already
3226 * allocated for the end points, including if they are
3227 * variants). For ASCII platforms and Unicode ranges on EBCDIC
3228 * platforms, it's easy to calculate a precise number. To
3229 * start, we count the variants in the range, which we need
3230 * elsewhere in this function anyway. (For the case where it
3231 * isn't easy to calculate, 'extras' has been initialized to 0,
3232 * and the calculation is done in a loop further down.) */
3234 if (convert_unicode)
3237 /* This is executed unconditionally on ASCII, and for
3238 * Unicode ranges on EBCDIC. Under these conditions, all
3239 * code points above a certain value are variant; and none
3240 * under that value are. We just need to find out how much
3241 * of the range is above that value. We don't count the
3242 * end points here, as they will already have been counted
3243 * as they were parsed. */
3244 if (range_min >= UTF_CONTINUATION_MARK) {
3246 /* The whole range is made up of variants */
3247 extras = (range_max - 1) - (range_min + 1) + 1;
3249 else if (range_max >= UTF_CONTINUATION_MARK) {
3251 /* Only the higher portion of the range is variants */
3252 extras = (range_max - 1) - UTF_CONTINUATION_MARK + 1;
3255 utf8_variant_count += extras;
3258 /* The base growth is the number of code points in the range,
3259 * not including the endpoints, which have already been sized
3260 * for (and output). We don't subtract for the hyphen, as it
3261 * has been parsed but not output, and the SvGROW below is
3262 * based only on what's been output plus what's left to parse.
3264 grow = (range_max - 1) - (range_min + 1) + 1;
3268 /* In some cases in EBCDIC, we haven't yet calculated a
3269 * precise amount needed for the UTF-8 variants. Just
3270 * assume the worst case, that everything will expand by a
3272 if (! convert_unicode) {
3278 /* Otherwise we know exactly how many variants there
3279 * are in the range. */
3284 /* Grow, but position the output to overwrite the range min end
3285 * point, because in some cases we overwrite that */
3286 SvCUR_set(sv, d - SvPVX_const(sv));
3287 offset_to_min = min_ptr - SvPVX_const(sv);
3289 /* See Note on sizing above. */
3290 d = offset_to_min + SvGROW(sv, SvCUR(sv)
3293 + 1 /* Trailing NUL */ );
3295 /* Now, we can expand out the range. */
3297 if (convert_unicode) {
3300 /* Recall that the min and max are now in Unicode terms, so
3301 * we have to convert each character to its native
3304 for (i = range_min; i <= range_max; i++) {
3305 append_utf8_from_native_byte(
3306 LATIN1_TO_NATIVE((U8) i),
3311 for (i = range_min; i <= range_max; i++) {
3312 *d++ = (char)LATIN1_TO_NATIVE((U8) i);
3318 /* Always gets run for ASCII, and sometimes for EBCDIC. */
3320 /* Here, no conversions are necessary, which means that the
3321 * first character in the range is already in 'd' and
3322 * valid, so we can skip overwriting it */
3326 for (i = range_min + 1; i <= range_max; i++) {
3327 append_utf8_from_native_byte((U8) i, (U8 **) &d);
3333 assert(range_min + 1 <= range_max);
3334 for (i = range_min + 1; i < range_max; i++) {
3336 /* In this case on EBCDIC, we haven't calculated
3337 * the variants. Do it here, as we go along */
3338 if (! UVCHR_IS_INVARIANT(i)) {
3339 utf8_variant_count++;
3345 /* The range_max is done outside the loop so as to
3346 * avoid having to special case not incrementing
3347 * 'utf8_variant_count' on EBCDIC (it's already been
3348 * counted when originally parsed) */
3349 *d++ = (char) range_max;
3354 /* If the original range extended above 255, add in that
3356 if (real_range_max) {
3357 *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
3358 *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
3359 if (real_range_max > 0x100) {
3360 if (real_range_max > 0x101) {
3361 *d++ = (char) RANGE_INDICATOR;
3363 d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
3369 /* mark the range as done, and continue */
3373 non_portable_endpoint = 0;
3377 } /* End of is a range */
3378 } /* End of transliteration. Joins main code after these else's */
3379 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3382 while (s1 >= start && *s1-- == '\\')
3385 in_charclass = TRUE;
3387 else if (*s == ']' && PL_lex_inpat && in_charclass) {
3390 while (s1 >= start && *s1-- == '\\')
3393 in_charclass = FALSE;
3395 /* skip for regexp comments /(?#comment)/, except for the last
3396 * char, which will be done separately. Stop on (?{..}) and
3398 else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3401 PERL_UINT_FAST8_T len = UTF8SKIP(s);
3403 while (s + len < send && *s != ')') {
3404 Copy(s, d, len, U8);
3407 len = UTF8_SAFE_SKIP(s, send);
3410 else while (s+1 < send && *s != ')') {
3414 else if (!PL_lex_casemods
3415 && ( s[2] == '{' /* This should match regcomp.c */
3416 || (s[2] == '?' && s[3] == '{')))
3421 /* likewise skip #-initiated comments in //x patterns */
3425 && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
3427 while (s < send && *s != '\n')
3430 /* no further processing of single-quoted regex */
3431 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3432 goto default_action;
3434 /* check for embedded arrays
3435 * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3437 else if (*s == '@' && s[1]) {
3439 ? isIDFIRST_utf8_safe(s+1, send)
3440 : isWORDCHAR_A(s[1]))
3444 if (strchr(":'{$", s[1]))
3446 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3447 break; /* in regexp, neither @+ nor @- are interpolated */
3449 /* check for embedded scalars. only stop if we're sure it's a
3451 else if (*s == '$') {
3452 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
3454 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
3456 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3457 "Possible unintended interpolation of $\\ in regex");
3459 break; /* in regexp, $ might be tail anchor */
3463 /* End of else if chain - OP_TRANS rejoin rest */
3465 if (UNLIKELY(s >= send)) {
3471 if (*s == '\\' && s+1 < send) {
3472 char* e; /* Can be used for ending '}', etc. */
3476 /* warn on \1 - \9 in substitution replacements, but note that \11
3477 * is an octal; and \19 is \1 followed by '9' */
3478 if (PL_lex_inwhat == OP_SUBST
3484 /* diag_listed_as: \%d better written as $%d */
3485 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3490 /* string-change backslash escapes */
3491 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
3495 /* In a pattern, process \N, but skip any other backslash escapes.
3496 * This is because we don't want to translate an escape sequence
3497 * into a meta symbol and have the regex compiler use the meta
3498 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3499 * in spite of this, we do have to process \N here while the proper
3500 * charnames handler is in scope. See bugs #56444 and #62056.
3502 * There is a complication because \N in a pattern may also stand
3503 * for 'match a non-nl', and not mean a charname, in which case its
3504 * processing should be deferred to the regex compiler. To be a
3505 * charname it must be followed immediately by a '{', and not look
3506 * like \N followed by a curly quantifier, i.e., not something like
3507 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3509 else if (PL_lex_inpat
3512 || regcurly(s + 1)))
3515 goto default_action;
3521 if ((isALPHANUMERIC(*s)))
3522 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3523 "Unrecognized escape \\%c passed through",
3525 /* default action is to copy the quoted character */
3526 goto default_action;
3529 /* eg. \132 indicates the octal constant 0132 */
3530 case '0': case '1': case '2': case '3':
3531 case '4': case '5': case '6': case '7':
3533 I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
3535 uv = grok_oct(s, &len, &flags, NULL);
3537 if (len < 3 && s < send && isDIGIT(*s)
3538 && ckWARN(WARN_MISC))
3540 Perl_warner(aTHX_ packWARN(WARN_MISC),
3541 "%s", form_short_octal_warning(s, len));
3544 goto NUM_ESCAPE_INSERT;
3546 /* eg. \o{24} indicates the octal constant \024 */
3551 bool valid = grok_bslash_o(&s, send,
3553 TRUE, /* Output warning */
3554 FALSE, /* Not strict */
3555 TRUE, /* Output warnings for
3560 uv = 0; /* drop through to ensure range ends are set */
3562 goto NUM_ESCAPE_INSERT;
3565 /* eg. \x24 indicates the hex constant 0x24 */
3570 bool valid = grok_bslash_x(&s, send,
3572 TRUE, /* Output warning */
3573 FALSE, /* Not strict */
3574 TRUE, /* Output warnings for
3579 uv = 0; /* drop through to ensure range ends are set */
3584 /* Insert oct or hex escaped character. */
3586 /* Here uv is the ordinal of the next character being added */
3587 if (UVCHR_IS_INVARIANT(uv)) {
3591 if (!d_is_utf8 && uv > 255) {
3593 /* Here, 'uv' won't fit unless we convert to UTF-8.
3594 * If we've only seen invariants so far, all we have to
3595 * do is turn on the flag */
3596 if (utf8_variant_count == 0) {
3600 SvCUR_set(sv, d - SvPVX_const(sv));
3604 sv_utf8_upgrade_flags_grow(
3606 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3608 /* Since we're having to grow here,
3609 * make sure we have enough room for
3610 * this escape and a NUL, so the
3611 * code immediately below won't have
3612 * to actually grow again */
3614 + (STRLEN)(send - s) + 1);
3615 d = SvPVX(sv) + SvCUR(sv);
3618 has_above_latin1 = TRUE;
3624 utf8_variant_count++;
3627 /* Usually, there will already be enough room in 'sv'
3628 * since such escapes are likely longer than any UTF-8
3629 * sequence they can end up as. This isn't the case on
3630 * EBCDIC where \x{40000000} contains 12 bytes, and the
3631 * UTF-8 for it contains 14. And, we have to allow for
3632 * a trailing NUL. It probably can't happen on ASCII
3633 * platforms, but be safe. See Note on sizing above. */
3634 const STRLEN needed = d - SvPVX(sv)
3638 if (UNLIKELY(needed > SvLEN(sv))) {
3639 SvCUR_set(sv, d - SvPVX_const(sv));
3640 d = SvCUR(sv) + SvGROW(sv, needed);
3643 d = (char*)uvchr_to_utf8((U8*)d, uv);
3647 non_portable_endpoint++;
3652 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3653 * named character, like \N{LATIN SMALL LETTER A}, or a named
3654 * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3655 * GRAVE} (except y/// can't handle the latter, croaking). For
3656 * convenience all three forms are referred to as "named
3657 * characters" below.
3659 * For patterns, \N also can mean to match a non-newline. Code
3660 * before this 'switch' statement should already have handled
3661 * this situation, and hence this code only has to deal with
3662 * the named character cases.
3664 * For non-patterns, the named characters are converted to
3665 * their string equivalents. In patterns, named characters are
3666 * not converted to their ultimate forms for the same reasons
3667 * that other escapes aren't (mainly that the ultimate
3668 * character could be considered a meta-symbol by the regex
3669 * compiler). Instead, they are converted to the \N{U+...}
3670 * form to get the value from the charnames that is in effect
3671 * right now, while preserving the fact that it was a named
3672 * character, so that the regex compiler knows this.
3674 * The structure of this section of code (besides checking for
3675 * errors and upgrading to utf8) is:
3676 * If the named character is of the form \N{U+...}, pass it
3677 * through if a pattern; otherwise convert the code point
3679 * Otherwise must be some \N{NAME}: convert to
3680 * \N{U+c1.c2...} if a pattern; otherwise convert to utf8
3682 * Transliteration is an exception. The conversion to utf8 is
3683 * only done if the code point requires it to be representable.
3685 * Here, 's' points to the 'N'; the test below is guaranteed to
3686 * succeed if we are being called on a pattern, as we already
3687 * know from a test above that the next character is a '{'. A
3688 * non-pattern \N must mean 'named character', which requires
3692 yyerror("Missing braces on \\N{}");
3698 /* If there is no matching '}', it is an error. */
3699 if (! (e = (char *) memchr(s, '}', send - s))) {
3700 if (! PL_lex_inpat) {
3701 yyerror("Missing right brace on \\N{}");
3703 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3705 yyquit(); /* Have exhausted the input. */
3708 /* Here it looks like a named character */
3710 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3711 s += 2; /* Skip to next char after the 'U+' */
3714 /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3715 /* Check the syntax. */
3718 if (!isXDIGIT(*s)) {
3721 "Invalid hexadecimal number in \\N{U+...}"
3730 else if ((*s == '.' || *s == '_')
3736 /* Pass everything through unchanged.
3737 * +1 is for the '}' */
3738 Copy(orig_s, d, e - orig_s + 1, char);
3739 d += e - orig_s + 1;
3741 else { /* Not a pattern: convert the hex to string */
3742 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3743 | PERL_SCAN_SILENT_ILLDIGIT
3744 | PERL_SCAN_DISALLOW_PREFIX;
3746 uv = grok_hex(s, &len, &flags, NULL);
3747 if (len == 0 || (len != (STRLEN)(e - s)))
3750 /* For non-tr///, if the destination is not in utf8,
3751 * unconditionally recode it to be so. This is
3752 * because \N{} implies Unicode semantics, and scalars
3753 * have to be in utf8 to guarantee those semantics.
3754 * tr/// doesn't care about Unicode rules, so no need
3755 * there to upgrade to UTF-8 for small enough code
3757 if (! d_is_utf8 && ( uv > 0xFF
3758 || PL_lex_inwhat != OP_TRANS))
3760 /* See Note on sizing above. */
3761 const STRLEN extra = OFFUNISKIP(uv) + (send - e) + 1;
3763 SvCUR_set(sv, d - SvPVX_const(sv));
3767 if (utf8_variant_count == 0) {
3769 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
3772 sv_utf8_upgrade_flags_grow(
3774 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3776 d = SvPVX(sv) + SvCUR(sv);
3780 has_above_latin1 = TRUE;
3783 /* Add the (Unicode) code point to the output. */
3784 if (! d_is_utf8 || OFFUNI_IS_INVARIANT(uv)) {
3785 *d++ = (char) LATIN1_TO_NATIVE(uv);
3788 d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv, 0);
3792 else /* Here is \N{NAME} but not \N{U+...}. */
3793 if (! (res = get_and_check_backslash_N_name_wrapper(s, e)))
3794 { /* Failed. We should die eventually, but for now use a NUL
3798 else { /* Successfully evaluated the name */
3800 const char *str = SvPV_const(res, len);
3803 if (! len) { /* The name resolved to an empty string */
3804 const char empty_N[] = "\\N{_}";
3805 Copy(empty_N, d, sizeof(empty_N) - 1, char);
3806 d += sizeof(empty_N) - 1;
3809 /* In order to not lose information for the regex
3810 * compiler, pass the result in the specially made
3811 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3812 * the code points in hex of each character
3813 * returned by charnames */
3815 const char *str_end = str + len;
3816 const STRLEN off = d - SvPVX_const(sv);
3818 if (! SvUTF8(res)) {
3819 /* For the non-UTF-8 case, we can determine the
3820 * exact length needed without having to parse
3821 * through the string. Each character takes up
3822 * 2 hex digits plus either a trailing dot or
3824 const char initial_text[] = "\\N{U+";
3825 const STRLEN initial_len = sizeof(initial_text)
3827 d = off + SvGROW(sv, off
3830 /* +1 for trailing NUL */
3833 + (STRLEN)(send - e));
3834 Copy(initial_text, d, initial_len, char);
3836 while (str < str_end) {
3839 my_snprintf(hex_string,
3843 /* The regex compiler is
3844 * expecting Unicode, not
3846 NATIVE_TO_LATIN1(*str));
3847 PERL_MY_SNPRINTF_POST_GUARD(len,
3848 sizeof(hex_string));
3849 Copy(hex_string, d, 3, char);
3853 d--; /* Below, we will overwrite the final
3854 dot with a right brace */
3857 STRLEN char_length; /* cur char's byte length */
3859 /* and the number of bytes after this is
3860 * translated into hex digits */
3861 STRLEN output_length;
3863 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3864 * for max('U+', '.'); and 1 for NUL */
3865 char hex_string[2 * UTF8_MAXBYTES + 5];
3867 /* Get the first character of the result. */
3868 U32 uv = utf8n_to_uvchr((U8 *) str,
3872 /* Convert first code point to Unicode hex,
3873 * including the boiler plate before it. */
3875 my_snprintf(hex_string, sizeof(hex_string),
3877 (unsigned int) NATIVE_TO_UNI(uv));
3879 /* Make sure there is enough space to hold it */
3880 d = off + SvGROW(sv, off
3882 + (STRLEN)(send - e)
3883 + 2); /* '}' + NUL */
3885 Copy(hex_string, d, output_length, char);
3888 /* For each subsequent character, append dot and
3889 * its Unicode code point in hex */
3890 while ((str += char_length) < str_end) {
3891 const STRLEN off = d - SvPVX_const(sv);
3892 U32 uv = utf8n_to_uvchr((U8 *) str,
3897 my_snprintf(hex_string,
3900 (unsigned int) NATIVE_TO_UNI(uv));
3902 d = off + SvGROW(sv, off
3904 + (STRLEN)(send - e)
3905 + 2); /* '}' + NUL */
3906 Copy(hex_string, d, output_length, char);
3911 *d++ = '}'; /* Done. Add the trailing brace */
3914 else { /* Here, not in a pattern. Convert the name to a
3917 if (PL_lex_inwhat == OP_TRANS) {
3918 str = SvPV_const(res, len);
3919 if (len > ((SvUTF8(res))
3923 yyerror(Perl_form(aTHX_
3924 "%.*s must not be a named sequence"
3925 " in transliteration operator",
3926 /* +1 to include the "}" */
3927 (int) (e + 1 - start), start));
3929 goto end_backslash_N;
3932 if (SvUTF8(res) && UTF8_IS_ABOVE_LATIN1(*str)) {
3933 has_above_latin1 = TRUE;
3937 else if (! SvUTF8(res)) {
3938 /* Make sure \N{} return is UTF-8. This is because
3939 * \N{} implies Unicode semantics, and scalars have
3940 * to be in utf8 to guarantee those semantics; but
3941 * not needed in tr/// */
3942 sv_utf8_upgrade_flags(res, 0);
3943 str = SvPV_const(res, len);
3946 /* Upgrade destination to be utf8 if this new
3948 if (! d_is_utf8 && SvUTF8(res)) {
3949 /* See Note on sizing above. */
3950 const STRLEN extra = len + (send - s) + 1;
3952 SvCUR_set(sv, d - SvPVX_const(sv));
3956 if (utf8_variant_count == 0) {
3958 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
3961 sv_utf8_upgrade_flags_grow(sv,
3962 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3964 d = SvPVX(sv) + SvCUR(sv);
3967 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3969 /* See Note on sizing above. (NOTE: SvCUR() is not
3970 * set correctly here). */
3971 const STRLEN extra = len + (send - e) + 1;
3972 const STRLEN off = d - SvPVX_const(sv);
3973 d = off + SvGROW(sv, off + extra);
3975 Copy(str, d, len, char);
3981 } /* End \N{NAME} */
3985 backslash_N++; /* \N{} is defined to be Unicode */
3987 s = e + 1; /* Point to just after the '}' */
3990 /* \c is a control character */
3994 *d++ = grok_bslash_c(*s, 1);
3997 yyerror("Missing control char name in \\c");
3998 yyquit(); /* Are at end of input, no sense continuing */
4001 non_portable_endpoint++;
4005 /* printf-style backslashes, formfeeds, newlines, etc */
4031 } /* end if (backslash) */
4034 /* Just copy the input to the output, though we may have to convert
4037 * If the input has the same representation in UTF-8 as not, it will be
4038 * a single byte, and we don't care about UTF8ness; just copy the byte */
4039 if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
4042 else if (! s_is_utf8 && ! d_is_utf8) {
4043 /* If neither source nor output is UTF-8, is also a single byte,
4044 * just copy it; but this byte counts should we later have to
4045 * convert to UTF-8 */
4047 utf8_variant_count++;
4049 else if (s_is_utf8 && d_is_utf8) { /* Both UTF-8, can just copy */
4050 const STRLEN len = UTF8SKIP(s);
4052 /* We expect the source to have already been checked for
4054 assert(isUTF8_CHAR((U8 *) s, (U8 *) send));
4056 Copy(s, d, len, U8);
4060 else if (s_is_utf8) { /* UTF8ness matters: convert output to utf8 */
4061 STRLEN need = send - s + 1; /* See Note on sizing above. */
4063 SvCUR_set(sv, d - SvPVX_const(sv));
4067 if (utf8_variant_count == 0) {
4069 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
4072 sv_utf8_upgrade_flags_grow(sv,
4073 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4075 d = SvPVX(sv) + SvCUR(sv);
4078 goto default_action; /* Redo, having upgraded so both are UTF-8 */
4080 else { /* UTF8ness matters: convert this non-UTF8 source char to
4081 UTF-8 for output. It will occupy 2 bytes, but don't include
4082 the input byte since we haven't incremented 's' yet. See
4083 Note on sizing above. */
4084 const STRLEN off = d - SvPVX(sv);
4085 const STRLEN extra = 2 + (send - s - 1) + 1;
4086 if (off + extra > SvLEN(sv)) {
4087 d = off + SvGROW(sv, off + extra);
4089 *d++ = UTF8_EIGHT_BIT_HI(*s);
4090 *d++ = UTF8_EIGHT_BIT_LO(*s);
4093 } /* while loop to process each character */
4096 const STRLEN off = d - SvPVX(sv);
4098 /* See if room for the terminating NUL */
4099 if (UNLIKELY(off >= SvLEN(sv))) {
4103 if (off > SvLEN(sv))
4105 Perl_croak(aTHX_ "panic: constant overflowed allocated space,"
4106 " %" UVuf " >= %" UVuf, (UV)off, (UV)SvLEN(sv));
4108 /* Whew! Here we don't have room for the terminating NUL, but
4109 * everything else so far has fit. It's not too late to grow
4110 * to fit the NUL and continue on. But it is a bug, as the code
4111 * above was supposed to have made room for this, so under
4112 * DEBUGGING builds, we panic anyway. */
4113 d = off + SvGROW(sv, off + 1);
4117 /* terminate the string and set up the sv */
4119 SvCUR_set(sv, d - SvPVX_const(sv));
4126 /* shrink the sv if we allocated more than we used */
4127 if (SvCUR(sv) + 5 < SvLEN(sv)) {
4128 SvPV_shrink_to_cur(sv);
4131 /* return the substring (via pl_yylval) only if we parsed anything */
4134 for (; s2 < s; s2++) {
4136 COPLINE_INC_WITH_HERELINES;
4138 SvREFCNT_inc_simple_void_NN(sv);
4139 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
4140 && ! PL_parser->lex_re_reparsing)
4142 const char *const key = PL_lex_inpat ? "qr" : "q";
4143 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
4147 if (PL_lex_inwhat == OP_TRANS) {
4150 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
4153 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
4161 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
4162 type, typelen, NULL);
4164 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
4166 LEAVE_with_name("scan_const");
4171 * Returns TRUE if there's more to the expression (e.g., a subscript),
4174 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
4176 * ->[ and ->{ return TRUE
4177 * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
4178 * { and [ outside a pattern are always subscripts, so return TRUE
4179 * if we're outside a pattern and it's not { or [, then return FALSE
4180 * if we're in a pattern and the first char is a {
4181 * {4,5} (any digits around the comma) returns FALSE
4182 * if we're in a pattern and the first char is a [
4184 * [SOMETHING] has a funky algorithm to decide whether it's a
4185 * character class or not. It has to deal with things like
4186 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
4187 * anything else returns TRUE
4190 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
4193 S_intuit_more(pTHX_ char *s, char *e)
4195 PERL_ARGS_ASSERT_INTUIT_MORE;
4197 if (PL_lex_brackets)
4199 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
4201 if (*s == '-' && s[1] == '>'
4202 && FEATURE_POSTDEREF_QQ_IS_ENABLED
4203 && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
4204 ||(s[2] == '@' && strchr("*[{",s[3])) ))
4206 if (*s != '{' && *s != '[')
4208 PL_parser->sub_no_recover = TRUE;
4212 /* In a pattern, so maybe we have {n,m}. */
4220 /* On the other hand, maybe we have a character class */
4223 if (*s == ']' || *s == '^')
4226 /* this is terrifying, and it works */
4229 const char * const send = (char *) memchr(s, ']', e - s);
4230 unsigned char un_char, last_un_char;
4231 char tmpbuf[sizeof PL_tokenbuf * 4];
4233 if (!send) /* has to be an expression */
4235 weight = 2; /* let's weigh the evidence */
4239 else if (isDIGIT(*s)) {
4241 if (isDIGIT(s[1]) && s[2] == ']')
4247 Zero(seen,256,char);
4249 for (; s < send; s++) {
4250 last_un_char = un_char;
4251 un_char = (unsigned char)*s;
4256 weight -= seen[un_char] * 10;
4257 if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
4259 scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4260 len = (int)strlen(tmpbuf);
4261 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
4262 UTF ? SVf_UTF8 : 0, SVt_PV))
4269 && strchr("[#!%*<>()-=",s[1]))
4271 if (/*{*/ strchr("])} =",s[2]))
4280 if (strchr("wds]",s[1]))
4282 else if (seen[(U8)'\''] || seen[(U8)'"'])
4284 else if (strchr("rnftbxcav",s[1]))
4286 else if (isDIGIT(s[1])) {
4288 while (s[1] && isDIGIT(s[1]))
4298 if (strchr("aA01! ",last_un_char))
4300 if (strchr("zZ79~",s[1]))
4302 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4303 weight -= 5; /* cope with negative subscript */
4306 if (!isWORDCHAR(last_un_char)
4307 && !(last_un_char == '$' || last_un_char == '@'
4308 || last_un_char == '&')
4309 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4313 if (keyword(d, s - d, 0))
4316 if (un_char == last_un_char + 1)
4318 weight -= seen[un_char];
4323 if (weight >= 0) /* probably a character class */
4333 * Does all the checking to disambiguate
4335 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
4336 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4338 * First argument is the stuff after the first token, e.g. "bar".
4340 * Not a method if foo is a filehandle.
4341 * Not a method if foo is a subroutine prototyped to take a filehandle.
4342 * Not a method if it's really "Foo $bar"
4343 * Method if it's "foo $bar"
4344 * Not a method if it's really "print foo $bar"
4345 * Method if it's really "foo package::" (interpreted as package->foo)
4346 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4347 * Not a method if bar is a filehandle or package, but is quoted with
4352 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
4354 char *s = start + (*start == '$');
4355 char tmpbuf[sizeof PL_tokenbuf];
4358 /* Mustn't actually add anything to a symbol table.
4359 But also don't want to "initialise" any placeholder
4360 constants that might already be there into full
4361 blown PVGVs with attached PVCV. */
4363 ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
4365 PERL_ARGS_ASSERT_INTUIT_METHOD;
4367 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4369 if (cv && SvPOK(cv)) {
4370 const char *proto = CvPROTO(cv);
4372 while (*proto && (isSPACE(*proto) || *proto == ';'))
4379 if (*start == '$') {
4380 SSize_t start_off = start - SvPVX(PL_linestr);
4381 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
4382 || isUPPER(*PL_tokenbuf))
4384 /* this could be $# */
4387 PL_bufptr = SvPVX(PL_linestr) + start_off;
4389 return *s == '(' ? FUNCMETH : METHOD;
4392 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4393 /* start is the beginning of the possible filehandle/object,
4394 * and s is the end of it
4395 * tmpbuf is a copy of it (but with single quotes as double colons)
4398 if (!keyword(tmpbuf, len, 0)) {
4399 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4404 indirgv = gv_fetchpvn_flags(tmpbuf, len,
4405 GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
4407 if (indirgv && SvTYPE(indirgv) != SVt_NULL
4408 && (!isGV(indirgv) || GvCVu(indirgv)))
4410 /* filehandle or package name makes it a method */
4411 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4413 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4414 return 0; /* no assumptions -- "=>" quotes bareword */
4416 NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0,
4417 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4418 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4420 force_next(BAREWORD);
4422 return *s == '(' ? FUNCMETH : METHOD;
4428 /* Encoded script support. filter_add() effectively inserts a
4429 * 'pre-processing' function into the current source input stream.
4430 * Note that the filter function only applies to the current source file
4431 * (e.g., it will not affect files 'require'd or 'use'd by this one).
4433 * The datasv parameter (which may be NULL) can be used to pass
4434 * private data to this instance of the filter. The filter function
4435 * can recover the SV using the FILTER_DATA macro and use it to
4436 * store private buffers and state information.
4438 * The supplied datasv parameter is upgraded to a PVIO type
4439 * and the IoDIRP/IoANY field is used to store the function pointer,
4440 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4441 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4442 * private use must be set using malloc'd pointers.
4446 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4454 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4455 Perl_croak(aTHX_ "Source filters apply only to byte streams");
4457 if (!PL_rsfp_filters)
4458 PL_rsfp_filters = newAV();
4461 SvUPGRADE(datasv, SVt_PVIO);
4462 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4463 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4464 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4465 FPTR2DPTR(void *, IoANY(datasv)),
4466 SvPV_nolen(datasv)));
4467 av_unshift(PL_rsfp_filters, 1);
4468 av_store(PL_rsfp_filters, 0, datasv) ;
4470 !PL_parser->filtered
4471 && PL_parser->lex_flags & LEX_EVALBYTES
4472 && PL_bufptr < PL_bufend
4474 const char *s = PL_bufptr;
4475 while (s < PL_bufend) {
4477 SV *linestr = PL_parser->linestr;
4478 char *buf = SvPVX(linestr);
4479 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4480 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4481 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4482 STRLEN const linestart_pos = PL_parser->linestart - buf;
4483 STRLEN const last_uni_pos =
4484 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4485 STRLEN const last_lop_pos =
4486 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4487 av_push(PL_rsfp_filters, linestr);
4488 PL_parser->linestr =
4489 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4490 buf = SvPVX(PL_parser->linestr);
4491 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4492 PL_parser->bufptr = buf + bufptr_pos;
4493 PL_parser->oldbufptr = buf + oldbufptr_pos;
4494 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4495 PL_parser->linestart = buf + linestart_pos;
4496 if (PL_parser->last_uni)
4497 PL_parser->last_uni = buf + last_uni_pos;
4498 if (PL_parser->last_lop)
4499 PL_parser->last_lop = buf + last_lop_pos;
4500 SvLEN_set(linestr, SvCUR(linestr));
4501 SvCUR_set(linestr, s - SvPVX(linestr));
4502 PL_parser->filtered = 1;
4512 /* Delete most recently added instance of this filter function. */
4514 Perl_filter_del(pTHX_ filter_t funcp)
4518 PERL_ARGS_ASSERT_FILTER_DEL;
4521 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4522 FPTR2DPTR(void*, funcp)));
4524 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4526 /* if filter is on top of stack (usual case) just pop it off */
4527 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4528 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4529 sv_free(av_pop(PL_rsfp_filters));
4533 /* we need to search for the correct entry and clear it */
4534 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4538 /* Invoke the idxth filter function for the current rsfp. */
4539 /* maxlen 0 = read one text line */
4541 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4546 /* This API is bad. It should have been using unsigned int for maxlen.
4547 Not sure if we want to change the API, but if not we should sanity
4548 check the value here. */
4549 unsigned int correct_length = maxlen < 0 ? PERL_INT_MAX : maxlen;
4551 PERL_ARGS_ASSERT_FILTER_READ;
4553 if (!PL_parser || !PL_rsfp_filters)
4555 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
4556 /* Provide a default input filter to make life easy. */
4557 /* Note that we append to the line. This is handy. */
4558 DEBUG_P(PerlIO_printf(Perl_debug_log,
4559 "filter_read %d: from rsfp\n", idx));
4560 if (correct_length) {
4563 const int old_len = SvCUR(buf_sv);
4565 /* ensure buf_sv is large enough */
4566 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4567 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4568 correct_length)) <= 0) {
4569 if (PerlIO_error(PL_rsfp))
4570 return -1; /* error */
4572 return 0 ; /* end of file */
4574 SvCUR_set(buf_sv, old_len + len) ;
4575 SvPVX(buf_sv)[old_len + len] = '\0';
4578 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4579 if (PerlIO_error(PL_rsfp))
4580 return -1; /* error */
4582 return 0 ; /* end of file */
4585 return SvCUR(buf_sv);
4587 /* Skip this filter slot if filter has been deleted */
4588 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4589 DEBUG_P(PerlIO_printf(Perl_debug_log,
4590 "filter_read %d: skipped (filter deleted)\n",
4592 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4594 if (SvTYPE(datasv) != SVt_PVIO) {
4595 if (correct_length) {
4597 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4598 if (!remainder) return 0; /* eof */
4599 if (correct_length > remainder) correct_length = remainder;
4600 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4601 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4604 const char *s = SvEND(datasv);
4605 const char *send = SvPVX(datasv) + SvLEN(datasv);
4613 if (s == send) return 0; /* eof */
4614 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4615 SvCUR_set(datasv, s-SvPVX(datasv));
4617 return SvCUR(buf_sv);
4619 /* Get function pointer hidden within datasv */
4620 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4621 DEBUG_P(PerlIO_printf(Perl_debug_log,
4622 "filter_read %d: via function %p (%s)\n",
4623 idx, (void*)datasv, SvPV_nolen_const(datasv)));
4624 /* Call function. The function is expected to */
4625 /* call "FILTER_READ(idx+1, buf_sv)" first. */
4626 /* Return: <0:error, =0:eof, >0:not eof */
4628 save_scalar(PL_errgv);
4629 ret = (*funcp)(aTHX_ idx, buf_sv, correct_length);
4635 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4637 PERL_ARGS_ASSERT_FILTER_GETS;
4639 #ifdef PERL_CR_FILTER
4640 if (!PL_rsfp_filters) {
4641 filter_add(S_cr_textfilter,NULL);
4644 if (PL_rsfp_filters) {
4646 SvCUR_set(sv, 0); /* start with empty line */
4647 if (FILTER_READ(0, sv, 0) > 0)
4648 return ( SvPVX(sv) ) ;
4653 return (sv_gets(sv, PL_rsfp, append));
4657 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4661 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4663 if (memEQs(pkgname, len, "__PACKAGE__"))
4667 && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
4668 && (gv = gv_fetchpvn_flags(pkgname,
4670 ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4672 return GvHV(gv); /* Foo:: */
4675 /* use constant CLASS => 'MyClass' */
4676 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4677 if (gv && GvCV(gv)) {
4678 SV * const sv = cv_const_sv(GvCV(gv));
4680 return gv_stashsv(sv, 0);
4683 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4688 S_tokenize_use(pTHX_ int is_use, char *s) {
4689 PERL_ARGS_ASSERT_TOKENIZE_USE;
4691 if (PL_expect != XSTATE)
4692 /* diag_listed_as: "use" not allowed in expression */
4693 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4694 is_use ? "use" : "no"));
4697 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4698 s = force_version(s, TRUE);
4699 if (*s == ';' || *s == '}'
4700 || (s = skipspace(s), (*s == ';' || *s == '}'))) {
4701 NEXTVAL_NEXTTOKE.opval = NULL;
4702 force_next(BAREWORD);
4704 else if (*s == 'v') {
4705 s = force_word(s,BAREWORD,FALSE,TRUE);
4706 s = force_version(s, FALSE);
4710 s = force_word(s,BAREWORD,FALSE,TRUE);
4711 s = force_version(s, FALSE);
4713 pl_yylval.ival = is_use;
4717 static const char* const exp_name[] =
4718 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4719 "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
4720 "SIGVAR", "TERMORDORDOR"
4724 #define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
4726 S_word_takes_any_delimiter(char *p, STRLEN len)
4728 return (len == 1 && strchr("msyq", p[0]))
4730 && ((p[0] == 't' && p[1] == 'r')
4731 || (p[0] == 'q' && strchr("qwxr", p[1]))));
4735 S_check_scalar_slice(pTHX_ char *s)
4738 while (SPACE_OR_TAB(*s)) s++;
4739 if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2,
4745 while ( isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
4746 || (*s && strchr(" \t$#+-'\"", *s)))
4748 s += UTF ? UTF8SKIP(s) : 1;
4750 if (*s == '}' || *s == ']')
4751 pl_yylval.ival = OPpSLICEWARNING;
4754 #define lex_token_boundary() S_lex_token_boundary(aTHX)
4756 S_lex_token_boundary(pTHX)
4758 PL_oldoldbufptr = PL_oldbufptr;
4759 PL_oldbufptr = PL_bufptr;
4762 #define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s)
4764 S_vcs_conflict_marker(pTHX_ char *s)
4766 lex_token_boundary();
4768 yyerror("Version control conflict marker");
4769 while (s < PL_bufend && *s != '\n')
4775 yyl_sigvar(pTHX_ char *s)
4777 /* we expect the sigil and optional var name part of a
4778 * signature element here. Since a '$' is not necessarily
4779 * followed by a var name, handle it specially here; the general
4780 * yylex code would otherwise try to interpret whatever follows
4781 * as a var; e.g. ($, ...) would be seen as the var '$,'
4788 PL_bufptr = s; /* for error reporting */
4793 /* spot stuff that looks like an prototype */
4794 if (strchr("$:@%&*;\\[]", *s)) {
4795 yyerror("Illegal character following sigil in a subroutine signature");
4798 /* '$#' is banned, while '$ # comment' isn't */
4800 yyerror("'#' not allowed immediately following a sigil in a subroutine signature");
4804 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
4805 char *dest = PL_tokenbuf + 1;
4806 /* read var name, including sigil, into PL_tokenbuf */
4807 PL_tokenbuf[0] = sigil;
4808 parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
4809 0, cBOOL(UTF), FALSE, FALSE);
4811 assert(PL_tokenbuf[1]); /* we have a variable name */
4819 /* parse the = for the default ourselves to avoid '+=' etc being accepted here
4820 * as the ASSIGNOP, and exclude other tokens that start with =
4822 if (*s == '=' && (!s[1] || strchr("=~>", s[1]) == 0)) {
4823 /* save now to report with the same context as we did when
4824 * all ASSIGNOPS were accepted */
4828 NEXTVAL_NEXTTOKE.ival = 0;
4829 force_next(ASSIGNOP);
4832 else if (*s == ',' || *s == ')') {
4833 PL_expect = XOPERATOR;
4836 /* make sure the context shows the unexpected character and
4837 * hopefully a bit more */
4839 while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
4841 PL_bufptr = s; /* for error reporting */
4842 yyerror("Illegal operator following parameter in a subroutine signature");
4846 NEXTVAL_NEXTTOKE.ival = sigil;
4847 force_next('p'); /* force a signature pending identifier */
4854 case ',': /* handle ($a,,$b) */
4859 yyerror("A signature parameter must start with '$', '@' or '%'");
4860 /* very crude error recovery: skip to likely next signature
4862 while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
4871 yyl_dollar(pTHX_ char *s)
4875 if (PL_expect == XPOSTDEREF) {
4878 POSTDEREF(DOLSHARP);
4884 && ( isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
4885 || strchr("{$:+-@", s[2])))
4887 PL_tokenbuf[0] = '@';
4888 s = scan_ident(s + 1, PL_tokenbuf + 1,
4889 sizeof PL_tokenbuf - 1, FALSE);
4890 if (PL_expect == XOPERATOR) {
4892 if (PL_bufptr > s) {
4894 PL_bufptr = PL_oldbufptr;
4896 no_op("Array length", d);
4898 if (!PL_tokenbuf[1])
4900 PL_expect = XOPERATOR;
4901 force_ident_maybe_lex('#');
4905 PL_tokenbuf[0] = '$';
4906 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
4907 if (PL_expect == XOPERATOR) {
4909 if (PL_bufptr > s) {
4911 PL_bufptr = PL_oldbufptr;
4915 if (!PL_tokenbuf[1]) {
4917 yyerror("Final $ should be \\$ or $name");
4922 const char tmp = *s;
4923 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
4926 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
4927 && intuit_more(s, PL_bufend)) {
4929 PL_tokenbuf[0] = '@';
4930 if (ckWARN(WARN_SYNTAX)) {
4934 || isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)
4937 t += UTF ? UTF8SKIP(t) : 1;
4940 PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
4941 while (t < PL_bufend && *t != ']')
4943 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4944 "Multidimensional syntax %" UTF8f " not supported",
4945 UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
4949 else if (*s == '{') {
4951 PL_tokenbuf[0] = '%';
4952 if ( strEQ(PL_tokenbuf+1, "SIG")
4953 && ckWARN(WARN_SYNTAX)
4954 && (t = (char *) memchr(s, '}', PL_bufend - s))
4955 && (t = (char *) memchr(t, '=', PL_bufend - t)))
4957 char tmpbuf[sizeof PL_tokenbuf];
4960 } while (isSPACE(*t));
4961 if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
4963 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
4968 && get_cvn_flags(tmpbuf, len, UTF
4972 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4973 "You need to quote \"%" UTF8f "\"",
4974 UTF8fARG(UTF, len, tmpbuf));
4981 PL_expect = XOPERATOR;
4982 if ((PL_lex_state == LEX_NORMAL || PL_lex_brackets) && isSPACE((char)tmp)) {
4983 const bool islop = (PL_last_lop == PL_oldoldbufptr);
4984 if (!islop || PL_last_lop_op == OP_GREPSTART)
4985 PL_expect = XOPERATOR;
4986 else if (strchr("$@\"'`q", *s))
4987 PL_expect = XTERM; /* e.g. print $fh "foo" */
4988 else if ( strchr("&*<%", *s)
4989 && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
4991 PL_expect = XTERM; /* e.g. print $fh &sub */
4993 else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
4994 char tmpbuf[sizeof PL_tokenbuf];
4997 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4998 if ((t2 = keyword(tmpbuf, len, 0))) {
4999 /* binary operators exclude handle interpretations */
5011 PL_expect = XTERM; /* e.g. print $fh length() */
5016 PL_expect = XTERM; /* e.g. print $fh subr() */
5019 else if (isDIGIT(*s))
5020 PL_expect = XTERM; /* e.g. print $fh 3 */
5021 else if (*s == '.' && isDIGIT(s[1]))
5022 PL_expect = XTERM; /* e.g. print $fh .3 */
5023 else if ((*s == '?' || *s == '-' || *s == '+')
5024 && !isSPACE(s[1]) && s[1] != '=')
5025 PL_expect = XTERM; /* e.g. print $fh -1 */
5026 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5028 PL_expect = XTERM; /* e.g. print $fh /.../
5029 XXX except DORDOR operator
5031 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5033 PL_expect = XTERM; /* print $fh <<"EOF" */
5036 force_ident_maybe_lex('$');
5041 yyl_sub(pTHX_ char *s, const int key)
5043 char * const tmpbuf = PL_tokenbuf + 1;
5044 bool have_name, have_proto;
5046 SV *format_name = NULL;
5047 bool is_sigsub = FEATURE_SIGNATURES_IS_ENABLED;
5049 SSize_t off = s-SvPVX(PL_linestr);
5052 s = skipspace(s); /* can move PL_linestr */
5054 d = SvPVX(PL_linestr)+off;
5056 SAVEBOOL(PL_parser->sig_seen);
5057 PL_parser->sig_seen = FALSE;
5059 if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
5061 || (*s == ':' && s[1] == ':'))
5064 PL_expect = XATTRBLOCK;
5065 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
5067 if (key == KEY_format)
5068 format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
5070 if (memchr(tmpbuf, ':', len) || key != KEY_sub
5072 PL_tokenbuf, len + 1, 0
5074 sv_setpvn(PL_subname, tmpbuf, len);
5076 sv_setsv(PL_subname,PL_curstname);
5077 sv_catpvs(PL_subname,"::");
5078 sv_catpvn(PL_subname,tmpbuf,len);
5080 if (SvUTF8(PL_linestr))
5081 SvUTF8_on(PL_subname);
5087 if (key == KEY_my || key == KEY_our || key==KEY_state) {
5089 /* diag_listed_as: Missing name in "%s sub" */
5091 "Missing name in \"%s\"", PL_bufptr);
5093 PL_expect = XATTRTERM;
5094 sv_setpvs(PL_subname,"?");
5098 if (key == KEY_format) {
5100 NEXTVAL_NEXTTOKE.opval
5101 = newSVOP(OP_CONST,0, format_name);
5102 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
5103 force_next(BAREWORD);
5108 /* Look for a prototype */
5109 if (*s == '(' && !is_sigsub) {
5110 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5112 Perl_croak(aTHX_ "Prototype not terminated");
5113 COPLINE_SET_FROM_MULTI_END;
5114 (void)validate_proto(PL_subname, PL_lex_stuff,
5115 ckWARN(WARN_ILLEGALPROTO), 0);
5123 if ( !(*s == ':' && s[1] != ':')
5124 && (*s != '{' && *s != '(') && key != KEY_format)
5126 assert(key == KEY_sub || key == KEY_AUTOLOAD ||
5127 key == KEY_DESTROY || key == KEY_BEGIN ||
5128 key == KEY_UNITCHECK || key == KEY_CHECK ||
5129 key == KEY_INIT || key == KEY_END ||
5130 key == KEY_my || key == KEY_state ||
5133 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5134 else if (*s != ';' && *s != '}')
5135 Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname));
5139 NEXTVAL_NEXTTOKE.opval =
5140 newSVOP(OP_CONST, 0, PL_lex_stuff);
5141 PL_lex_stuff = NULL;
5146 sv_setpvs(PL_subname, "__ANON__");
5148 sv_setpvs(PL_subname, "__ANON__::__ANON__");
5154 force_ident_maybe_lex('&');
5162 yyl_interpcasemod(pTHX_ char *s)
5165 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
5167 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
5168 PL_bufptr, PL_bufend, *PL_bufptr);
5171 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
5173 if (PL_lex_casemods) {
5174 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
5175 PL_lex_casestack[PL_lex_casemods] = '\0';
5177 if (PL_bufptr != PL_bufend
5178 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
5179 || oldmod == 'F')) {
5181 PL_lex_state = LEX_INTERPCONCAT;
5183 PL_lex_allbrackets--;
5186 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
5187 /* Got an unpaired \E */
5188 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
5189 "Useless use of \\E");
5191 if (PL_bufptr != PL_bufend)
5193 PL_lex_state = LEX_INTERPCONCAT;
5198 PerlIO_printf(Perl_debug_log, "### Saw case modifier\n");
5201 if (s[1] == '\\' && s[2] == 'E') {
5203 PL_lex_state = LEX_INTERPCONCAT;
5208 if ( memBEGINs(s, (STRLEN) (PL_bufend - s), "L\\u")
5209 || memBEGINs(s, (STRLEN) (PL_bufend - s), "U\\l"))
5211 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
5213 if ((*s == 'L' || *s == 'U' || *s == 'F')
5214 && (strpbrk(PL_lex_casestack, "LUF")))
5216 PL_lex_casestack[--PL_lex_casemods] = '\0';
5217 PL_lex_allbrackets--;
5220 if (PL_lex_casemods > 10)
5221 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
5222 PL_lex_casestack[PL_lex_casemods++] = *s;
5223 PL_lex_casestack[PL_lex_casemods] = '\0';
5224 PL_lex_state = LEX_INTERPCONCAT;
5225 NEXTVAL_NEXTTOKE.ival = 0;
5226 force_next((2<<24)|'(');
5228 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
5230 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
5232 NEXTVAL_NEXTTOKE.ival = OP_LC;
5234 NEXTVAL_NEXTTOKE.ival = OP_UC;
5236 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
5238 NEXTVAL_NEXTTOKE.ival = OP_FC;
5240 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
5244 if (PL_lex_starts) {
5247 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5248 if (PL_lex_casemods == 1 && PL_lex_inpat)
5251 AopNOASSIGN(OP_CONCAT);
5259 yyl_secondclass_keyword(pTHX_ char *s, STRLEN len, int key, I32 *orig_keyword,
5260 GV **pgv, GV ***pgvp)
5262 GV *ogv = NULL; /* override (winner) */
5263 GV *hgv = NULL; /* hidden (loser) */
5266 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5268 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5269 (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
5271 && (cv = GvCVu(gv)))
5273 if (GvIMPORTED_CV(gv))
5275 else if (! CvMETHOD(cv))
5279 && (*pgvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf, len, FALSE))
5281 && (isGV_with_GP(gv)
5282 ? GvCVu(gv) && GvIMPORTED_CV(gv)
5283 : SvPCS_IMPORTED(gv)
5284 && (gv_init(gv, PL_globalstash, PL_tokenbuf,
5294 *orig_keyword = key;
5295 return 0; /* overridden by import or by GLOBAL */
5297 else if (gv && !*pgvp
5298 && -key==KEY_lock /* XXX generalizable kludge */
5301 return 0; /* any sub overrides "weak" keyword */
5303 else { /* no override */
5305 if (key == KEY_dump) {
5306 Perl_croak(aTHX_ "dump() must be written as CORE::dump() as of Perl 5.30");
5310 if (hgv && key != KEY_x) /* never ambiguous */
5311 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5312 "Ambiguous call resolved as CORE::%s(), "
5313 "qualify as such or use &",
5320 yyl_qw(pTHX_ char *s, STRLEN len)
5324 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5326 missingterm(NULL, 0);
5328 COPLINE_SET_FROM_MULTI_END;
5329 PL_expect = XOPERATOR;
5330 if (SvCUR(PL_lex_stuff)) {
5331 int warned_comma = !ckWARN(WARN_QW);
5332 int warned_comment = warned_comma;
5333 char *d = SvPV_force(PL_lex_stuff, len);
5335 for (; isSPACE(*d) && len; --len, ++d)
5340 if (!warned_comma || !warned_comment) {
5341 for (; !isSPACE(*d) && len; --len, ++d) {
5342 if (!warned_comma && *d == ',') {
5343 Perl_warner(aTHX_ packWARN(WARN_QW),
5344 "Possible attempt to separate words with commas");
5347 else if (!warned_comment && *d == '#') {
5348 Perl_warner(aTHX_ packWARN(WARN_QW),
5349 "Possible attempt to put comments in qw() list");
5355 for (; !isSPACE(*d) && len; --len, ++d)
5358 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
5359 words = op_append_elem(OP_LIST, words,
5360 newSVOP(OP_CONST, 0, tokeq(sv)));
5365 words = newNULLLIST();
5366 SvREFCNT_dec_NN(PL_lex_stuff);
5367 PL_lex_stuff = NULL;
5368 PL_expect = XOPERATOR;
5369 pl_yylval.opval = sawparens(words);
5374 yyl_hyphen(pTHX_ char *s)
5376 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5384 while (s < PL_bufend && SPACE_OR_TAB(*s))
5387 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=>")) {
5388 s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE);
5389 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5390 OPERATOR('-'); /* unary minus */
5393 case 'r': ftst = OP_FTEREAD; break;
5394 case 'w': ftst = OP_FTEWRITE; break;
5395 case 'x': ftst = OP_FTEEXEC; break;
5396 case 'o': ftst = OP_FTEOWNED; break;
5397 case 'R': ftst = OP_FTRREAD; break;
5398 case 'W': ftst = OP_FTRWRITE; break;
5399 case 'X': ftst = OP_FTREXEC; break;
5400 case 'O': ftst = OP_FTROWNED; break;
5401 case 'e': ftst = OP_FTIS; break;
5402 case 'z': ftst = OP_FTZERO; break;
5403 case 's': ftst = OP_FTSIZE; break;
5404 case 'f': ftst = OP_FTFILE; break;
5405 case 'd': ftst = OP_FTDIR; break;
5406 case 'l': ftst = OP_FTLINK; break;
5407 case 'p': ftst = OP_FTPIPE; break;
5408 case 'S': ftst = OP_FTSOCK; break;
5409 case 'u': ftst = OP_FTSUID; break;
5410 case 'g': ftst = OP_FTSGID; break;
5411 case 'k': ftst = OP_FTSVTX; break;
5412 case 'b': ftst = OP_FTBLK; break;
5413 case 'c': ftst = OP_FTCHR; break;
5414 case 't': ftst = OP_FTTTY; break;
5415 case 'T': ftst = OP_FTTEXT; break;
5416 case 'B': ftst = OP_FTBINARY; break;
5417 case 'M': case 'A': case 'C':
5418 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5420 case 'M': ftst = OP_FTMTIME; break;
5421 case 'A': ftst = OP_FTATIME; break;
5422 case 'C': ftst = OP_FTCTIME; break;
5430 PL_last_uni = PL_oldbufptr;
5431 PL_last_lop_op = (OPCODE)ftst;
5433 PerlIO_printf(Perl_debug_log, "### Saw file test %c\n", (int)tmp);
5438 /* Assume it was a minus followed by a one-letter named
5439 * subroutine call (or a -bareword), then. */
5441 PerlIO_printf(Perl_debug_log,
5442 "### '-%c' looked like a file test but was not\n",
5449 const char tmp = *s++;
5452 if (PL_expect == XOPERATOR)
5457 else if (*s == '>') {
5460 if (((*s == '$' || *s == '&') && s[1] == '*')
5461 ||(*s == '$' && s[1] == '#' && s[2] == '*')
5462 ||((*s == '@' || *s == '%') && strchr("*[{", s[1]))
5463 ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5466 PL_expect = XPOSTDEREF;
5469 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5470 s = force_word(s,METHOD,FALSE,TRUE);
5478 if (PL_expect == XOPERATOR) {
5480 && !PL_lex_allbrackets
5481 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5489 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5491 OPERATOR('-'); /* unary minus */
5497 yyl_plus(pTHX_ char *s)
5499 const char tmp = *s++;
5502 if (PL_expect == XOPERATOR)
5507 if (PL_expect == XOPERATOR) {
5509 && !PL_lex_allbrackets
5510 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5518 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5525 yyl_star(pTHX_ char *s)
5527 if (PL_expect == XPOSTDEREF)
5530 if (PL_expect != XOPERATOR) {
5531 s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5532 PL_expect = XOPERATOR;
5533 force_ident(PL_tokenbuf, '*');
5542 if (*s == '=' && !PL_lex_allbrackets
5543 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5552 && !PL_lex_allbrackets
5553 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5563 yyl_percent(pTHX_ char *s)
5565 if (PL_expect == XOPERATOR) {
5567 && !PL_lex_allbrackets
5568 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5575 else if (PL_expect == XPOSTDEREF)
5578 PL_tokenbuf[0] = '%';
5579 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5581 if (!PL_tokenbuf[1]) {
5584 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5585 && intuit_more(s, PL_bufend)) {
5587 PL_tokenbuf[0] = '@';
5589 PL_expect = XOPERATOR;
5590 force_ident_maybe_lex('%');
5595 yyl_caret(pTHX_ char *s)
5598 const bool bof = cBOOL(FEATURE_BITWISE_IS_ENABLED);
5599 if (bof && s[1] == '.')
5601 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5602 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5608 BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
5612 yyl_colon(pTHX_ char *s)
5616 switch (PL_expect) {
5618 if (!PL_in_my || (PL_lex_state != LEX_NORMAL && !PL_lex_brackets))
5620 PL_bufptr = s; /* update in case we back off */
5623 "Use of := for an empty attribute list is not allowed");
5630 PL_expect = XTERMBLOCK;
5632 /* NB: as well as parsing normal attributes, we also end up
5633 * here if there is something looking like attributes
5634 * following a signature (which is illegal, but used to be
5635 * legal in 5.20..5.26). If the latter, we still parse the
5636 * attributes so that error messages(s) are less confusing,
5637 * but ignore them (parser->sig_seen).
5641 while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5642 bool sig = PL_parser->sig_seen;
5646 char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5647 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5648 if (tmp < 0) tmp = -tmp;
5663 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5665 d = scan_str(d,TRUE,TRUE,FALSE,NULL);
5670 Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list");
5672 COPLINE_SET_FROM_MULTI_END;
5675 sv_catsv(sv, PL_lex_stuff);
5676 attrs = op_append_elem(OP_LIST, attrs,
5677 newSVOP(OP_CONST, 0, sv));
5678 SvREFCNT_dec_NN(PL_lex_stuff);
5679 PL_lex_stuff = NULL;
5682 /* NOTE: any CV attrs applied here need to be part of
5683 the CVf_BUILTIN_ATTRS define in cv.h! */
5684 if (!PL_in_my && memEQs(SvPVX(sv), len, "lvalue")) {
5687 CvLVALUE_on(PL_compcv);
5689 else if (!PL_in_my && memEQs(SvPVX(sv), len, "method")) {
5692 CvMETHOD_on(PL_compcv);
5694 else if (!PL_in_my && memEQs(SvPVX(sv), len, "const")) {
5697 Perl_ck_warner_d(aTHX_
5698 packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
5699 ":const is experimental"
5701 CvANONCONST_on(PL_compcv);
5702 if (!CvANON(PL_compcv))
5703 yyerror(":const is not permitted on named "
5707 /* After we've set the flags, it could be argued that
5708 we don't need to do the attributes.pm-based setting
5709 process, and shouldn't bother appending recognized
5710 flags. To experiment with that, uncomment the
5711 following "else". (Note that's already been
5712 uncommented. That keeps the above-applied built-in
5713 attributes from being intercepted (and possibly
5714 rejected) by a package's attribute routines, but is
5715 justified by the performance win for the common case
5716 of applying only built-in attributes.) */
5718 attrs = op_append_elem(OP_LIST, attrs,
5719 newSVOP(OP_CONST, 0,
5723 if (*s == ':' && s[1] != ':')
5726 break; /* require real whitespace or :'s */
5727 /* XXX losing whitespace on sequential attributes here */
5732 && !(PL_expect == XOPERATOR
5733 ? (*s == '=' || *s == ')')
5734 : (*s == '{' || *s == '(')))
5736 const char q = ((*s == '\'') ? '"' : '\'');
5737 /* If here for an expression, and parsed no attrs, back off. */
5738 if (PL_expect == XOPERATOR && !attrs) {
5742 /* MUST advance bufptr here to avoid bogus "at end of line"
5743 context messages from yyerror().
5746 yyerror( (const char *)
5748 ? Perl_form(aTHX_ "Invalid separator character "
5749 "%c%c%c in attribute list", q, *s, q)
5750 : "Unterminated attribute list" ) );
5757 if (PL_parser->sig_seen) {
5758 /* see comment about about sig_seen and parser error
5762 Perl_croak(aTHX_ "Subroutine attributes must come "
5763 "before the signature");
5766 NEXTVAL_NEXTTOKE.opval = attrs;
5772 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5777 PL_lex_allbrackets--;
5782 yyl_subproto(pTHX_ char *s, CV *cv)
5784 STRLEN protolen = CvPROTOLEN(cv);
5785 const char *proto = CvPROTO(cv);
5788 proto = S_strip_spaces(aTHX_ proto, &protolen);
5791 if ((optional = *proto == ';')) {
5794 } while (*proto == ';');
5800 *proto == '$' || *proto == '_'
5801 || *proto == '*' || *proto == '+'
5806 *proto == '\\' && proto[1] && proto[2] == '\0'
5809 UNIPROTO(UNIOPSUB,optional);
5812 if (*proto == '\\' && proto[1] == '[') {
5813 const char *p = proto + 2;
5814 while(*p && *p != ']')
5816 if(*p == ']' && !p[1])
5817 UNIPROTO(UNIOPSUB,optional);
5820 if (*proto == '&' && *s == '{') {
5822 sv_setpvs(PL_subname, "__ANON__");
5824 sv_setpvs(PL_subname, "__ANON__::__ANON__");
5825 if (!PL_lex_allbrackets
5826 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
5828 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
5837 yyl_leftcurly(pTHX_ char *s, const U8 formbrack)
5840 if (PL_lex_brackets > 100) {
5841 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5844 switch (PL_expect) {
5847 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5848 PL_lex_allbrackets++;
5849 OPERATOR(HASHBRACK);
5851 while (s < PL_bufend && SPACE_OR_TAB(*s))
5854 PL_tokenbuf[0] = '\0';
5855 if (d < PL_bufend && *d == '-') {
5856 PL_tokenbuf[0] = '-';
5858 while (d < PL_bufend && SPACE_OR_TAB(*d))
5861 if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
5863 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
5865 while (d < PL_bufend && SPACE_OR_TAB(*d))
5868 const char minus = (PL_tokenbuf[0] == '-');
5869 s = force_word(s + minus, BAREWORD, FALSE, TRUE);
5877 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5878 PL_lex_allbrackets++;
5883 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5884 PL_lex_allbrackets++;
5888 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5889 PL_lex_allbrackets++;
5894 if (PL_oldoldbufptr == PL_last_lop)
5895 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
5897 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5898 PL_lex_allbrackets++;
5901 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5903 /* This hack is to get the ${} in the message. */
5905 yyerror("syntax error");
5908 OPERATOR(HASHBRACK);
5910 if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
5911 /* ${...} or @{...} etc., but not print {...}
5912 * Skip the disambiguation and treat this as a block.
5914 goto block_expectation;
5916 /* This hack serves to disambiguate a pair of curlies
5917 * as being a block or an anon hash. Normally, expectation
5918 * determines that, but in cases where we're not in a
5919 * position to expect anything in particular (like inside
5920 * eval"") we have to resolve the ambiguity. This code
5921 * covers the case where the first term in the curlies is a
5922 * quoted string. Most other cases need to be explicitly
5923 * disambiguated by prepending a "+" before the opening
5924 * curly in order to force resolution as an anon hash.
5926 * XXX should probably propagate the outer expectation
5927 * into eval"" to rely less on this hack, but that could
5928 * potentially break current behavior of eval"".
5932 if (*s == '\'' || *s == '"' || *s == '`') {
5933 /* common case: get past first string, handling escapes */
5934 for (t++; t < PL_bufend && *t != *s;)
5939 else if (*s == 'q') {
5942 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
5943 && !isWORDCHAR(*t))))
5945 /* skip q//-like construct */
5947 char open, close, term;
5950 while (t < PL_bufend && isSPACE(*t))
5952 /* check for q => */
5953 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5954 OPERATOR(HASHBRACK);
5958 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5962 for (t++; t < PL_bufend; t++) {
5963 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
5965 else if (*t == open)
5969 for (t++; t < PL_bufend; t++) {
5970 if (*t == '\\' && t+1 < PL_bufend)
5972 else if (*t == close && --brackets <= 0)
5974 else if (*t == open)
5981 /* skip plain q word */
5982 while ( t < PL_bufend
5983 && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
5985 t += UTF ? UTF8SKIP(t) : 1;
5988 else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
5989 t += UTF ? UTF8SKIP(t) : 1;
5990 while ( t < PL_bufend
5991 && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
5993 t += UTF ? UTF8SKIP(t) : 1;
5996 while (t < PL_bufend && isSPACE(*t))
5998 /* if comma follows first term, call it an anon hash */
5999 /* XXX it could be a comma expression with loop modifiers */
6000 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6001 || (*t == '=' && t[1] == '>')))
6002 OPERATOR(HASHBRACK);
6003 if (PL_expect == XREF) {
6005 /* If there is an opening brace or 'sub:', treat it
6006 as a term to make ${{...}}{k} and &{sub:attr...}
6007 dwim. Otherwise, treat it as a statement, so
6008 map {no strict; ...} works.
6015 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "sub")) {
6028 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6035 pl_yylval.ival = CopLINE(PL_curcop);
6036 PL_copline = NOLINE; /* invalidate current command line number */
6037 TOKEN(formbrack ? '=' : '{');
6041 yyl_rightcurly(pTHX_ char *s, const U8 formbrack)
6043 assert(s != PL_bufend);
6046 if (PL_lex_brackets <= 0)
6047 /* diag_listed_as: Unmatched right %s bracket */
6048 yyerror("Unmatched right curly bracket");
6050 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6052 PL_lex_allbrackets--;
6054 if (PL_lex_state == LEX_INTERPNORMAL) {
6055 if (PL_lex_brackets == 0) {
6056 if (PL_expect & XFAKEBRACK) {
6057 PL_expect &= XENUMMASK;
6058 PL_lex_state = LEX_INTERPEND;
6060 return yylex(); /* ignore fake brackets */
6062 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6063 && SvEVALED(PL_lex_repl))
6064 PL_lex_state = LEX_INTERPEND;
6065 else if (*s == '-' && s[1] == '>')
6066 PL_lex_state = LEX_INTERPENDMAYBE;
6067 else if (*s != '[' && *s != '{')
6068 PL_lex_state = LEX_INTERPEND;
6072 if (PL_expect & XFAKEBRACK) {
6073 PL_expect &= XENUMMASK;
6075 return yylex(); /* ignore fake brackets */
6078 force_next(formbrack ? '.' : '}');
6079 if (formbrack) LEAVE_with_name("lex_format");
6080 if (formbrack == 2) { /* means . where arguments were expected */
6089 yyl_ampersand(pTHX_ char *s)
6091 if (PL_expect == XPOSTDEREF)
6096 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6097 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6105 if (PL_expect == XOPERATOR) {
6108 if ( PL_bufptr == PL_linestart
6109 && ckWARN(WARN_SEMICOLON)
6110 && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
6112 CopLINE_dec(PL_curcop);
6113 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6114 CopLINE_inc(PL_curcop);
6117 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6119 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6120 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6126 BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
6131 PL_tokenbuf[0] = '&';
6132 s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
6133 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6136 force_ident_maybe_lex('&');
6144 yyl_verticalbar(pTHX_ char *s)
6151 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6152 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6161 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6164 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6165 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6170 BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
6174 yyl_bang(pTHX_ char *s)
6176 const char tmp = *s++;
6178 /* was this !=~ where !~ was meant?
6179 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6181 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6182 const char *t = s+1;
6184 while (t < PL_bufend && isSPACE(*t))
6187 if (*t == '/' || *t == '?'
6188 || ((*t == 'm' || *t == 's' || *t == 'y')
6189 && !isWORDCHAR(t[1]))
6190 || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6191 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6192 "!=~ should be !~");
6195 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6211 yyl_snail(pTHX_ char *s)
6213 if (PL_expect == XPOSTDEREF)
6215 PL_tokenbuf[0] = '@';
6216 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6217 if (PL_expect == XOPERATOR) {
6219 if (PL_bufptr > s) {
6221 PL_bufptr = PL_oldbufptr;
6226 if (!PL_tokenbuf[1]) {
6229 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6231 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6232 && intuit_more(s, PL_bufend))
6235 PL_tokenbuf[0] = '%';
6237 /* Warn about @ where they meant $. */
6238 if (*s == '[' || *s == '{') {
6239 if (ckWARN(WARN_SYNTAX)) {
6240 S_check_scalar_slice(aTHX_ s);
6244 PL_expect = XOPERATOR;
6245 force_ident_maybe_lex('@');
6250 yyl_slash(pTHX_ char *s)
6252 if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6253 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6254 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6259 else if (PL_expect == XOPERATOR) {
6261 if (*s == '=' && !PL_lex_allbrackets
6262 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6270 /* Disable warning on "study /blah/" */
6271 if ( PL_oldoldbufptr == PL_last_uni
6272 && ( *PL_last_uni != 's' || s - PL_last_uni < 5
6273 || memNE(PL_last_uni, "study", 5)
6274 || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF)
6277 s = scan_pat(s,OP_MATCH);
6278 TERM(sublex_start());
6283 yyl_leftsquare(pTHX_ char *s)
6287 if (PL_lex_brackets > 100)
6288 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6289 PL_lex_brackstack[PL_lex_brackets++] = 0;
6290 PL_lex_allbrackets++;
6296 yyl_rightsquare(pTHX_ char *s)
6298 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6301 if (PL_lex_brackets <= 0)
6302 /* diag_listed_as: Unmatched right %s bracket */
6303 yyerror("Unmatched right square bracket");
6306 PL_lex_allbrackets--;
6307 if (PL_lex_state == LEX_INTERPNORMAL) {
6308 if (PL_lex_brackets == 0) {
6309 if (*s == '-' && s[1] == '>')
6310 PL_lex_state = LEX_INTERPENDMAYBE;
6311 else if (*s != '[' && *s != '{')
6312 PL_lex_state = LEX_INTERPEND;
6319 yyl_tilde(pTHX_ char *s)
6322 if (s[1] == '~' && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)) {
6323 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6326 Perl_ck_warner_d(aTHX_
6327 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
6328 "Smartmatch is experimental");
6332 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
6334 BCop(OP_SCOMPLEMENT);
6336 BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
6340 yyl_leftparen(pTHX_ char *s)
6342 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6343 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
6347 PL_lex_allbrackets++;
6352 yyl_rightparen(pTHX_ char *s)
6354 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6357 PL_lex_allbrackets--;
6365 yyl_leftpointy(pTHX_ char *s)
6369 if (PL_expect != XOPERATOR) {
6370 if (s[1] != '<' && !memchr(s,'>', PL_bufend - s))
6372 if (s[1] == '<' && s[2] != '>')
6373 s = scan_heredoc(s);
6375 s = scan_inputsymbol(s);
6376 PL_expect = XOPERATOR;
6377 TOKEN(sublex_start());
6384 if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6388 SHop(OP_LEFT_SHIFT);
6393 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6400 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6408 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6417 yyl_rightpointy(pTHX_ char *s)
6419 const char tmp = *s++;
6422 if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6426 SHop(OP_RIGHT_SHIFT);
6428 else if (tmp == '=') {
6429 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6437 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6446 yyl_sglquote(pTHX_ char *s)
6448 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6450 missingterm(NULL, 0);
6451 COPLINE_SET_FROM_MULTI_END;
6452 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6453 if (PL_expect == XOPERATOR) {
6456 pl_yylval.ival = OP_CONST;
6457 TERM(sublex_start());
6461 yyl_dblquote(pTHX_ char *s, STRLEN len)
6464 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6467 printbuf("### Saw string before %s\n", s);
6469 PerlIO_printf(Perl_debug_log,
6470 "### Saw unterminated string\n");
6472 if (PL_expect == XOPERATOR) {
6476 missingterm(NULL, 0);
6477 pl_yylval.ival = OP_CONST;
6478 /* FIXME. I think that this can be const if char *d is replaced by
6479 more localised variables. */
6480 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6481 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6482 pl_yylval.ival = OP_STRINGIFY;
6486 if (pl_yylval.ival == OP_CONST)
6487 COPLINE_SET_FROM_MULTI_END;
6488 TERM(sublex_start());
6492 yyl_backtick(pTHX_ char *s)
6494 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6497 printbuf("### Saw backtick string before %s\n", s);
6499 PerlIO_printf(Perl_debug_log,
6500 "### Saw unterminated backtick string\n");
6502 if (PL_expect == XOPERATOR)
6503 no_op("Backticks",s);
6505 missingterm(NULL, 0);
6506 pl_yylval.ival = OP_BACKTICK;
6507 TERM(sublex_start());
6511 yyl_backslash(pTHX_ char *s)
6513 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr && isDIGIT(*s))
6514 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6516 if (PL_expect == XOPERATOR)
6517 no_op("Backslash",s);
6522 yyl_data_handle(pTHX)
6524 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
6527 GV *gv = (GV *)*hv_fetchs(stash, "DATA", 1);
6530 gv_init(gv,stash,"DATA",4,0);
6534 GvIOp(gv) = newIO();
6535 IoIFP(GvIOp(gv)) = PL_rsfp;
6537 /* Mark this internal pseudo-handle as clean */
6538 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6539 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6540 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6542 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6544 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6545 /* if the script was opened in binmode, we need to revert
6546 * it to text mode for compatibility; but only iff it has CRs
6547 * XXX this is a questionable hack at best. */
6548 if (PL_bufend-PL_bufptr > 2
6549 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6552 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6553 loc = PerlIO_tell(PL_rsfp);
6554 (void)PerlIO_seek(PL_rsfp, 0L, 0);
6556 if (PerlLIO_setmode(RSFP_FILENO, O_TEXT) != -1) {
6558 PerlIO_seek(PL_rsfp, loc, 0);
6563 #ifdef PERLIO_LAYERS
6566 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6573 PERL_STATIC_NO_RET void yyl_croak_unrecognised(pTHX_ char*)
6574 __attribute__noreturn__;
6576 PERL_STATIC_NO_RET void
6577 yyl_croak_unrecognised(pTHX_ char *s)
6579 SV *dsv = newSVpvs_flags("", SVs_TEMP);
6585 STRLEN skiplen = UTF8SKIP(s);
6586 STRLEN stravail = PL_bufend - s;
6587 c = sv_uni_display(dsv, newSVpvn_flags(s,
6588 skiplen > stravail ? stravail : skiplen,
6589 SVs_TEMP | SVf_UTF8),
6590 10, UNI_DISPLAY_ISPRINT);
6593 c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
6596 if (s >= PL_linestart) {
6600 /* somehow (probably due to a parse failure), PL_linestart has advanced
6601 * pass PL_bufptr, get a reasonable beginning of line
6604 while (d > SvPVX(PL_linestr) && d[-1] && d[-1] != '\n')
6607 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d);
6608 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
6609 d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT;
6612 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
6613 UTF8fARG(UTF, (s - d), d),
6618 yyl_require(pTHX_ char *s, I32 orig_keyword)
6622 s = force_version(s, FALSE);
6624 else if (*s != 'v' || !isDIGIT(s[1])
6625 || (s = force_version(s, TRUE), *s == 'v'))
6627 *PL_tokenbuf = '\0';
6628 s = force_word(s,BAREWORD,TRUE,TRUE);
6629 if (isIDFIRST_lazy_if_safe(PL_tokenbuf,
6630 PL_tokenbuf + sizeof(PL_tokenbuf),
6633 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
6634 GV_ADD | (UTF ? SVf_UTF8 : 0));
6637 yyerror("<> at require-statement should be quotes");
6640 if (orig_keyword == KEY_require)
6645 PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
6647 PL_last_uni = PL_oldbufptr;
6648 PL_last_lop_op = OP_REQUIRE;
6650 return REPORT( (int)REQUIRE );
6654 yyl_foreach(pTHX_ char *s)
6656 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6658 pl_yylval.ival = CopLINE(PL_curcop);
6660 if (PL_expect == XSTATE && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6662 SSize_t s_off = s - SvPVX(PL_linestr);
6665 if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "my") && isSPACE(p[2])) {
6668 else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "our") && isSPACE(p[3])) {
6673 /* skip optional package name, as in "for my abc $x (..)" */
6674 if (isIDFIRST_lazy_if_safe(p, PL_bufend, UTF)) {
6675 p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6678 if (*p != '$' && *p != '\\')
6679 Perl_croak(aTHX_ "Missing $ on loop variable");
6681 /* The buffer may have been reallocated, update s */
6682 s = SvPVX(PL_linestr) + s_off;
6688 yyl_do(pTHX_ char *s, I32 orig_keyword)
6697 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6699 if (len && memNEs(PL_tokenbuf+1, len, "CORE")
6700 && !keyword(PL_tokenbuf + 1, len, 0)) {
6701 SSize_t off = s-SvPVX(PL_linestr);
6703 s = SvPVX(PL_linestr)+off;
6705 force_ident_maybe_lex('&');
6710 if (orig_keyword == KEY_do)
6718 yyl_my(pTHX_ char *s, I32 my)
6722 yyerror(Perl_form(aTHX_
6723 "Can't redeclare \"%s\" in \"%s\"",
6724 my == KEY_my ? "my" :
6725 my == KEY_state ? "state" : "our",
6726 PL_in_my == KEY_my ? "my" :
6727 PL_in_my == KEY_state ? "state" : "our"));
6731 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6733 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6734 if (memEQs(PL_tokenbuf, len, "sub"))
6735 return yyl_sub(aTHX_ s, my);
6736 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
6737 if (!PL_in_my_stash) {
6741 i = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
6742 PERL_MY_SNPRINTF_POST_GUARD(i, sizeof(tmpbuf));
6743 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
6746 else if (*s == '\\') {
6747 if (!FEATURE_MYREF_IS_ENABLED)
6748 Perl_croak(aTHX_ "The experimental declared_refs "
6749 "feature is not enabled");
6750 Perl_ck_warner_d(aTHX_
6751 packWARN(WARN_EXPERIMENTAL__DECLARED_REFS),
6752 "Declaring references is experimental");
6757 static int yyl_try(pTHX_ char*, STRLEN);
6760 yyl_eol_needs_semicolon(pTHX_ char **ps)
6763 if (PL_lex_state != LEX_NORMAL
6764 || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
6766 const bool in_comment = *s == '#';
6768 if (*s == '#' && s == PL_linestart && PL_in_eval
6769 && !PL_rsfp && !PL_parser->filtered) {
6770 /* handle eval qq[#line 1 "foo"\n ...] */
6771 CopLINE_dec(PL_curcop);
6772 incline(s, PL_bufend);
6775 while (d < PL_bufend && *d != '\n')
6780 if (in_comment && d == PL_bufend
6781 && PL_lex_state == LEX_INTERPNORMAL
6782 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6783 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
6785 incline(s, PL_bufend);
6786 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
6787 PL_lex_state = LEX_FORMLINE;
6788 force_next(FORMRBRACK);
6794 while (s < PL_bufend && *s != '\n')
6796 if (s < PL_bufend) {
6799 incline(s, PL_bufend);
6807 yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s, STRLEN len)
6815 bof = cBOOL(PL_rsfp);
6818 PL_bufptr = PL_bufend;
6819 COPLINE_INC_WITH_HERELINES;
6820 if (!lex_next_chunk(fake_eof)) {
6821 CopLINE_dec(PL_curcop);
6823 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
6825 CopLINE_dec(PL_curcop);
6827 /* If it looks like the start of a BOM or raw UTF-16,
6828 * check if it in fact is. */
6831 || *(U8*)s == BOM_UTF8_FIRST_BYTE
6835 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
6836 bof = (offset == (Off_t)SvCUR(PL_linestr));
6837 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
6838 /* offset may include swallowed CR */
6840 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
6843 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
6844 s = swallow_bom((U8*)s);
6847 if (PL_parser->in_pod) {
6848 /* Incest with pod. */
6849 if ( memBEGINPs(s, (STRLEN) (PL_bufend - s), "=cut")
6852 SvPVCLEAR(PL_linestr);
6853 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
6854 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
6855 PL_last_lop = PL_last_uni = NULL;
6856 PL_parser->in_pod = 0;
6859 if (PL_rsfp || PL_parser->filtered)
6860 incline(s, PL_bufend);
6861 } while (PL_parser->in_pod);
6863 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
6864 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
6865 PL_last_lop = PL_last_uni = NULL;
6866 if (CopLINE(PL_curcop) == 1) {
6867 while (s < PL_bufend && isSPACE(*s))
6869 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
6873 if (*s == '#' && *(s+1) == '!')
6875 #ifdef ALTERNATE_SHEBANG
6877 static char const as[] = ALTERNATE_SHEBANG;
6878 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
6879 d = s + (sizeof(as) - 1);
6881 #endif /* ALTERNATE_SHEBANG */
6890 while (*d && !isSPACE(*d))
6894 #ifdef ARG_ZERO_IS_SCRIPT
6895 if (ipathend > ipath) {
6897 * HP-UX (at least) sets argv[0] to the script name,
6898 * which makes $^X incorrect. And Digital UNIX and Linux,
6899 * at least, set argv[0] to the basename of the Perl
6900 * interpreter. So, having found "#!", we'll set it right.
6902 SV* copfilesv = CopFILESV(PL_curcop);
6905 GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
6907 assert(SvPOK(x) || SvGMAGICAL(x));
6908 if (sv_eq(x, copfilesv)) {
6909 sv_setpvn(x, ipath, ipathend - ipath);
6915 const char *bstart = SvPV_const(copfilesv, blen);
6916 const char * const lstart = SvPV_const(x, llen);
6918 bstart += blen - llen;
6919 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
6920 sv_setpvn(x, ipath, ipathend - ipath);
6927 /* Anything to do if no copfilesv? */
6929 TAINT_NOT; /* $^X is always tainted, but that's OK */
6931 #endif /* ARG_ZERO_IS_SCRIPT */
6936 d = instr(s,"perl -");
6938 d = instr(s,"perl");
6940 /* avoid getting into infinite loops when shebang
6941 * line contains "Perl" rather than "perl" */
6943 for (d = ipathend-4; d >= ipath; --d) {
6944 if (isALPHA_FOLD_EQ(*d, 'p')
6945 && !ibcmp(d, "perl", 4))
6955 #ifdef ALTERNATE_SHEBANG
6957 * If the ALTERNATE_SHEBANG on this system starts with a
6958 * character that can be part of a Perl expression, then if
6959 * we see it but not "perl", we're probably looking at the
6960 * start of Perl code, not a request to hand off to some
6961 * other interpreter. Similarly, if "perl" is there, but
6962 * not in the first 'word' of the line, we assume the line
6963 * contains the start of the Perl program.
6965 if (d && *s != '#') {
6966 const char *c = ipath;
6967 while (*c && !strchr("; \t\r\n\f\v#", *c))
6970 d = NULL; /* "perl" not in first word; ignore */
6972 *s = '#'; /* Don't try to parse shebang line */
6974 #endif /* ALTERNATE_SHEBANG */
6979 && !instr(s,"indir")
6980 && instr(PL_origargv[0],"perl"))
6987 while (s < PL_bufend && isSPACE(*s))
6989 if (s < PL_bufend) {
6990 Newx(newargv,PL_origargc+3,char*);
6992 while (s < PL_bufend && !isSPACE(*s))
6995 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
6998 newargv = PL_origargv;
7001 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
7003 Perl_croak(aTHX_ "Can't exec %s", ipath);
7006 while (*d && !isSPACE(*d))
7008 while (SPACE_OR_TAB(*d))
7012 const bool switches_done = PL_doswitches;
7013 const U32 oldpdb = PL_perldb;
7014 const bool oldn = PL_minus_n;
7015 const bool oldp = PL_minus_p;
7019 bool baduni = FALSE;
7021 const char *d2 = d1 + 1;
7022 if (parse_unicode_opts((const char **)&d2)
7026 if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
7027 const char * const m = d1;
7028 while (*d1 && !isSPACE(*d1))
7030 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
7033 d1 = moreswitches(d1);
7035 if (PL_doswitches && !switches_done) {
7036 int argc = PL_origargc;
7037 char **argv = PL_origargv;
7040 } while (argc && argv[0][0] == '-' && argv[0][1]);
7041 init_argv_symbols(argc,argv);
7043 if ( (PERLDB_LINE_OR_SAVESRC && !oldpdb)
7044 || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
7045 /* if we have already added "LINE: while (<>) {",
7046 we must not do it again */
7048 SvPVCLEAR(PL_linestr);
7049 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
7050 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7051 PL_last_lop = PL_last_uni = NULL;
7052 PL_preambled = FALSE;
7053 if (PERLDB_LINE_OR_SAVESRC)
7054 (void)gv_fetchfile(PL_origfilename);
7055 return yyl_try(aTHX_ s, len);
7062 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
7063 PL_lex_state = LEX_FORMLINE;
7064 force_next(FORMRBRACK);
7068 return yyl_try(aTHX_ s, len);
7072 yyl_fatcomma(pTHX_ char *s, STRLEN len)
7076 = newSVOP(OP_CONST, 0,
7077 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7078 pl_yylval.opval->op_private = OPpCONST_BARE;
7083 yyl_safe_bareword(pTHX_ char *s, const char lastchar)
7085 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7086 && PL_parser->saw_infix_sigil)
7088 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7089 "Operator or semicolon missing before %c%" UTF8f,
7091 UTF8fARG(UTF, strlen(PL_tokenbuf),
7093 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7094 "Ambiguous use of %c resolved as operator %c",
7095 lastchar, lastchar);
7101 yyl_constant_op(pTHX_ char *s, SV *sv, CV *cv, OP *rv2cv_op, PADOFFSET off)
7105 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7106 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7107 if (SvTYPE(sv) == SVt_PVAV)
7108 pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7111 pl_yylval.opval->op_private = 0;
7112 pl_yylval.opval->op_folded = 1;
7113 pl_yylval.opval->op_flags |= OPf_SPECIAL;
7118 op_free(pl_yylval.opval);
7120 off ? newCVREF(0, rv2cv_op) : rv2cv_op;
7121 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7122 PL_last_lop = PL_oldbufptr;
7123 PL_last_lop_op = OP_ENTERSUB;
7125 /* Is there a prototype? */
7127 int k = yyl_subproto(aTHX_ s, cv);
7132 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7134 force_next(off ? PRIVATEREF : BAREWORD);
7135 if (!PL_lex_allbrackets
7136 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7138 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7144 /* Honour "reserved word" warnings, and enforce strict subs */
7146 yyl_strictwarn_bareword(pTHX_ const char lastchar)
7148 /* after "print" and similar functions (corresponding to
7149 * "F? L" in opcode.pl), whatever wasn't already parsed as
7150 * a filehandle should be subject to "strict subs".
7151 * Likewise for the optional indirect-object argument to system
7152 * or exec, which can't be a bareword */
7153 if ((PL_last_lop_op == OP_PRINT
7154 || PL_last_lop_op == OP_PRTF
7155 || PL_last_lop_op == OP_SAY
7156 || PL_last_lop_op == OP_SYSTEM
7157 || PL_last_lop_op == OP_EXEC)
7158 && (PL_hints & HINT_STRICT_SUBS))
7160 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7163 if (lastchar != '-' && ckWARN(WARN_RESERVED)) {
7164 char *d = PL_tokenbuf;
7167 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0)) {
7168 /* PL_warn_reserved is constant */
7169 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
7170 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7172 GCC_DIAG_RESTORE_STMT;
7178 yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 orig_keyword, struct code c)
7181 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7183 bool no_op_error = FALSE;
7184 /* Use this var to track whether intuit_method has been
7185 called. intuit_method returns 0 or > 255. */
7188 if (PL_expect == XOPERATOR) {
7189 if (PL_bufptr == PL_linestart) {
7190 CopLINE_dec(PL_curcop);
7191 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7192 CopLINE_inc(PL_curcop);
7195 /* We want to call no_op with s pointing after the
7196 bareword, so defer it. But we want it to come
7197 before the Bad name croak. */
7201 /* Get the rest if it looks like a package qualifier */
7203 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7205 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7208 no_op("Bareword",s);
7209 no_op_error = FALSE;
7212 Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
7213 UTF8fARG(UTF, len, PL_tokenbuf),
7214 *s == '\'' ? "'" : "::");
7220 no_op("Bareword",s);
7222 /* See if the name is "Foo::",
7223 in which case Foo is a bareword
7224 (and a package name). */
7226 if (len > 2 && PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':') {
7227 if (ckWARN(WARN_BAREWORD)
7228 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7229 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7230 "Bareword \"%" UTF8f
7231 "\" refers to nonexistent package",
7232 UTF8fARG(UTF, len, PL_tokenbuf));
7234 PL_tokenbuf[len] = '\0';
7243 /* if we saw a global override before, get the right name */
7246 c.sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len);
7248 SV *sv = newSVpvs("CORE::GLOBAL::");
7254 /* Presume this is going to be a bareword of some sort. */
7256 pl_yylval.opval = newSVOP(OP_CONST, 0, c.sv);
7257 pl_yylval.opval->op_private = OPpCONST_BARE;
7259 /* And if "Foo::", then that's what it certainly is. */
7261 return yyl_safe_bareword(aTHX_ s, lastchar);
7264 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(c.sv));
7265 const_op->op_private = OPpCONST_BARE;
7266 c.rv2cv_op = newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
7270 : SvROK(c.gv) && SvTYPE(SvRV(c.gv)) == SVt_PVCV
7273 : rv2cv_op_cv(c.rv2cv_op, RV2CVOPCV_RETURN_STUB);
7276 /* See if it's the indirect object for a list operator. */
7279 && PL_oldoldbufptr < PL_bufptr
7280 && (PL_oldoldbufptr == PL_last_lop
7281 || PL_oldoldbufptr == PL_last_uni)
7282 && /* NO SKIPSPACE BEFORE HERE! */
7284 || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
7287 bool immediate_paren = *s == '(';
7290 /* (Now we can afford to cross potential line boundary.) */
7293 /* intuit_method() can indirectly call lex_next_chunk(),
7296 s_off = s - SvPVX(PL_linestr);
7297 /* Two barewords in a row may indicate method call. */
7298 if ( ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
7300 && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7302 /* the code at method: doesn't use s */
7305 s = SvPVX(PL_linestr) + s_off;
7307 /* If not a declared subroutine, it's an indirect object. */
7308 /* (But it's an indir obj regardless for sort.) */
7309 /* Also, if "_" follows a filetest operator, it's a bareword */
7312 ( !immediate_paren && (PL_last_lop_op == OP_SORT
7314 && (PL_last_lop_op != OP_MAPSTART
7315 && PL_last_lop_op != OP_GREPSTART))))
7316 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7317 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
7321 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7322 yyl_strictwarn_bareword(aTHX_ lastchar);
7323 op_free(c.rv2cv_op);
7324 return yyl_safe_bareword(aTHX_ s, lastchar);
7328 PL_expect = XOPERATOR;
7331 /* Is this a word before a => operator? */
7332 if (*s == '=' && s[1] == '>' && !pkgname) {
7333 op_free(c.rv2cv_op);
7335 if (c.gvp || (c.lex && !c.off)) {
7336 assert (cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7337 /* This is our own scalar, created a few lines
7338 above, so this is safe. */
7339 SvREADONLY_off(c.sv);
7340 sv_setpv(c.sv, PL_tokenbuf);
7341 if (UTF && !IN_BYTES
7342 && is_utf8_string((U8*)PL_tokenbuf, len))
7344 SvREADONLY_on(c.sv);
7349 /* If followed by a paren, it's certainly a subroutine. */
7354 while (SPACE_OR_TAB(*d))
7356 if (*d == ')' && (c.sv = cv_const_sv_or_av(c.cv)))
7357 return yyl_constant_op(aTHX_ d + 1, c.sv, c.cv, c.rv2cv_op, c.off);
7359 NEXTVAL_NEXTTOKE.opval =
7360 c.off ? c.rv2cv_op : pl_yylval.opval;
7362 op_free(pl_yylval.opval), force_next(PRIVATEREF);
7363 else op_free(c.rv2cv_op), force_next(BAREWORD);
7368 /* If followed by var or block, call it a method (unless sub) */
7370 if ((*s == '$' || *s == '{') && !c.cv) {
7371 op_free(c.rv2cv_op);
7372 PL_last_lop = PL_oldbufptr;
7373 PL_last_lop_op = OP_METHOD;
7374 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7375 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7376 PL_expect = XBLOCKTERM;
7378 return REPORT(METHOD);
7381 /* If followed by a bareword, see if it looks like indir obj. */
7385 && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$')
7386 && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7389 if (c.lex && !c.off) {
7390 assert(cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7391 SvREADONLY_off(c.sv);
7392 sv_setpvn(c.sv, PL_tokenbuf, len);
7393 if (UTF && !IN_BYTES
7394 && is_utf8_string((U8*)PL_tokenbuf, len))
7396 else SvUTF8_off(c.sv);
7398 op_free(c.rv2cv_op);
7399 if (key == METHOD && !PL_lex_allbrackets
7400 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7402 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7407 /* Not a method, so call it a subroutine (if defined) */
7410 /* Check for a constant sub */
7411 c.sv = cv_const_sv_or_av(c.cv);
7412 return yyl_constant_op(aTHX_ s, c.sv, c.cv, c.rv2cv_op, c.off);
7415 /* Call it a bare word */
7417 if (PL_hints & HINT_STRICT_SUBS)
7418 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7420 yyl_strictwarn_bareword(aTHX_ lastchar);
7422 op_free(c.rv2cv_op);
7424 return yyl_safe_bareword(aTHX_ s, lastchar);
7428 yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct code c)
7431 default: /* not a keyword */
7432 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7435 FUN0OP( newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0)) );
7439 newSVOP(OP_CONST, 0,
7440 Perl_newSVpvf(aTHX_ "%" IVdf, (IV)CopLINE(PL_curcop)))
7443 case KEY___PACKAGE__:
7445 newSVOP(OP_CONST, 0, (PL_curstash
7446 ? newSVhek(HvNAME_HEK(PL_curstash))
7452 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D'))
7453 yyl_data_handle(aTHX);
7454 return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s, len);
7457 FUN0OP(CvCLONE(PL_compcv)
7458 ? newOP(OP_RUNCV, 0)
7459 : newPVOP(OP_RUNCV,0,NULL));
7468 if (PL_expect == XSTATE)
7469 return yyl_sub(aTHX_ PL_bufptr, key);
7470 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7479 LOP(OP_ACCEPT,XTERM);
7482 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7487 LOP(OP_ATAN2,XTERM);
7493 LOP(OP_BINMODE,XTERM);
7496 LOP(OP_BLESS,XTERM);
7505 /* We have to disambiguate the two senses of
7506 "continue". If the next token is a '{' then
7507 treat it as the start of a continue block;
7508 otherwise treat it as a control operator.
7518 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7528 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7537 if (!PL_cryptseen) {
7538 PL_cryptseen = TRUE;
7542 LOP(OP_CRYPT,XTERM);
7545 LOP(OP_CHMOD,XTERM);
7548 LOP(OP_CHOWN,XTERM);
7551 LOP(OP_CONNECT,XTERM);
7566 return yyl_do(aTHX_ s, orig_keyword);
7569 PL_hints |= HINT_BLOCK_SCOPE;
7579 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7580 STR_WITH_LEN("NDBM_File::"),
7581 STR_WITH_LEN("DB_File::"),
7582 STR_WITH_LEN("GDBM_File::"),
7583 STR_WITH_LEN("SDBM_File::"),
7584 STR_WITH_LEN("ODBM_File::"),
7586 LOP(OP_DBMOPEN,XTERM);
7598 pl_yylval.ival = CopLINE(PL_curcop);
7602 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7614 if (*s == '{') { /* block eval */
7615 PL_expect = XTERMBLOCK;
7616 UNIBRACK(OP_ENTERTRY);
7618 else { /* string eval */
7620 UNIBRACK(OP_ENTEREVAL);
7625 UNIBRACK(-OP_ENTEREVAL);
7639 case KEY_endhostent:
7645 case KEY_endservent:
7648 case KEY_endprotoent:
7659 return yyl_foreach(aTHX_ s);
7662 LOP(OP_FORMLINE,XTERM);
7671 LOP(OP_FCNTL,XTERM);
7677 LOP(OP_FLOCK,XTERM);
7680 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7685 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7690 LOP(OP_GREPSTART, XREF);
7707 case KEY_getpriority:
7708 LOP(OP_GETPRIORITY,XTERM);
7710 case KEY_getprotobyname:
7713 case KEY_getprotobynumber:
7714 LOP(OP_GPBYNUMBER,XTERM);
7716 case KEY_getprotoent:
7728 case KEY_getpeername:
7729 UNI(OP_GETPEERNAME);
7731 case KEY_gethostbyname:
7734 case KEY_gethostbyaddr:
7735 LOP(OP_GHBYADDR,XTERM);
7737 case KEY_gethostent:
7740 case KEY_getnetbyname:
7743 case KEY_getnetbyaddr:
7744 LOP(OP_GNBYADDR,XTERM);
7749 case KEY_getservbyname:
7750 LOP(OP_GSBYNAME,XTERM);
7752 case KEY_getservbyport:
7753 LOP(OP_GSBYPORT,XTERM);
7755 case KEY_getservent:
7758 case KEY_getsockname:
7759 UNI(OP_GETSOCKNAME);
7761 case KEY_getsockopt:
7762 LOP(OP_GSOCKOPT,XTERM);
7777 pl_yylval.ival = CopLINE(PL_curcop);
7778 Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
7779 "given is experimental");
7783 LOP( orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB, XTERM );
7789 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7791 pl_yylval.ival = CopLINE(PL_curcop);
7795 LOP(OP_INDEX,XTERM);
7801 LOP(OP_IOCTL,XTERM);
7828 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7833 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7847 LOP(OP_LISTEN,XTERM);
7856 s = scan_pat(s,OP_MATCH);
7857 TERM(sublex_start());
7860 LOP(OP_MAPSTART, XREF);
7863 LOP(OP_MKDIR,XTERM);
7866 LOP(OP_MSGCTL,XTERM);
7869 LOP(OP_MSGGET,XTERM);
7872 LOP(OP_MSGRCV,XTERM);
7875 LOP(OP_MSGSND,XTERM);
7880 return yyl_my(aTHX_ s, key);
7886 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7891 s = tokenize_use(0, s);
7895 if (*s == '(' || (s = skipspace(s), *s == '('))
7898 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7899 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7905 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
7907 char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
7908 for (t=d; isSPACE(*t);)
7910 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
7912 && !(t[0] == '=' && t[1] == '>')
7913 && !(t[0] == ':' && t[1] == ':')
7914 && !keyword(s, d-s, 0)
7916 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
7917 "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")",
7918 UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
7924 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7926 pl_yylval.ival = OP_OR;
7936 LOP(OP_OPEN_DIR,XTERM);
7939 checkcomma(s,PL_tokenbuf,"filehandle");
7943 checkcomma(s,PL_tokenbuf,"filehandle");
7962 s = force_word(s,BAREWORD,FALSE,TRUE);
7964 s = force_strict_version(s);
7968 LOP(OP_PIPE_OP,XTERM);
7971 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7973 missingterm(NULL, 0);
7974 COPLINE_SET_FROM_MULTI_END;
7975 pl_yylval.ival = OP_CONST;
7976 TERM(sublex_start());
7982 return yyl_qw(aTHX_ s, len);
7985 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
7987 missingterm(NULL, 0);
7988 pl_yylval.ival = OP_STRINGIFY;
7989 if (SvIVX(PL_lex_stuff) == '\'')
7990 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
7991 TERM(sublex_start());
7994 s = scan_pat(s,OP_QR);
7995 TERM(sublex_start());
7998 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8000 missingterm(NULL, 0);
8001 pl_yylval.ival = OP_BACKTICK;
8002 TERM(sublex_start());
8008 return yyl_require(aTHX_ s, orig_keyword);
8017 LOP(OP_RENAME,XTERM);
8026 LOP(OP_RINDEX,XTERM);
8035 UNIDOR(OP_READLINE);
8038 UNIDOR(OP_BACKTICK);
8047 LOP(OP_REVERSE,XTERM);
8050 UNIDOR(OP_READLINK);
8057 if (pl_yylval.opval)
8058 TERM(sublex_start());
8060 TOKEN(1); /* force error */
8063 checkcomma(s,PL_tokenbuf,"filehandle");
8073 LOP(OP_SELECT,XTERM);
8079 LOP(OP_SEMCTL,XTERM);
8082 LOP(OP_SEMGET,XTERM);
8085 LOP(OP_SEMOP,XTERM);
8091 LOP(OP_SETPGRP,XTERM);
8093 case KEY_setpriority:
8094 LOP(OP_SETPRIORITY,XTERM);
8096 case KEY_sethostent:
8102 case KEY_setservent:
8105 case KEY_setprotoent:
8115 LOP(OP_SEEKDIR,XTERM);
8117 case KEY_setsockopt:
8118 LOP(OP_SSOCKOPT,XTERM);
8124 LOP(OP_SHMCTL,XTERM);
8127 LOP(OP_SHMGET,XTERM);
8130 LOP(OP_SHMREAD,XTERM);
8133 LOP(OP_SHMWRITE,XTERM);
8136 LOP(OP_SHUTDOWN,XTERM);
8145 LOP(OP_SOCKET,XTERM);
8147 case KEY_socketpair:
8148 LOP(OP_SOCKPAIR,XTERM);
8151 checkcomma(s,PL_tokenbuf,"subroutine name");
8154 s = force_word(s,BAREWORD,TRUE,TRUE);
8158 LOP(OP_SPLIT,XTERM);
8161 LOP(OP_SPRINTF,XTERM);
8164 LOP(OP_SPLICE,XTERM);
8179 LOP(OP_SUBSTR,XTERM);
8183 return yyl_sub(aTHX_ s, key);
8186 LOP(OP_SYSTEM,XREF);
8189 LOP(OP_SYMLINK,XTERM);
8192 LOP(OP_SYSCALL,XTERM);
8195 LOP(OP_SYSOPEN,XTERM);
8198 LOP(OP_SYSSEEK,XTERM);
8201 LOP(OP_SYSREAD,XTERM);
8204 LOP(OP_SYSWRITE,XTERM);
8209 TERM(sublex_start());
8230 LOP(OP_TRUNCATE,XTERM);
8242 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8244 pl_yylval.ival = CopLINE(PL_curcop);
8248 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8250 pl_yylval.ival = CopLINE(PL_curcop);
8254 LOP(OP_UNLINK,XTERM);
8260 LOP(OP_UNPACK,XTERM);
8263 LOP(OP_UTIME,XTERM);
8269 LOP(OP_UNSHIFT,XTERM);
8272 s = tokenize_use(1, s);
8282 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8284 pl_yylval.ival = CopLINE(PL_curcop);
8285 Perl_ck_warner_d(aTHX_
8286 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8287 "when is experimental");
8291 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8293 pl_yylval.ival = CopLINE(PL_curcop);
8297 PL_hints |= HINT_BLOCK_SCOPE;
8304 LOP(OP_WAITPID,XTERM);
8310 /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8311 * we use the same number on EBCDIC */
8312 gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8316 if (PL_expect == XOPERATOR) {
8317 if (*s == '=' && !PL_lex_allbrackets
8318 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8325 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
8328 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8330 pl_yylval.ival = OP_XOR;
8336 yyl_key_core(pTHX_ char *s, STRLEN len, struct code c)
8339 I32 orig_keyword = 0;
8343 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8344 if ((*s == ':' && s[1] == ':')
8345 || (!(key = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
8347 Copy(PL_bufptr, PL_tokenbuf, olen, char);
8348 return yyl_just_a_word(aTHX_ d, olen, 0, c);
8351 Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword",
8352 UTF8fARG(UTF, len, PL_tokenbuf));
8355 else if (key == KEY_require || key == KEY_do
8357 /* that's a way to remember we saw "CORE::" */
8360 /* Known to be a reserved word at this point */
8361 return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8365 yyl_keylookup(pTHX_ char *s, GV *gv)
8371 struct code c = no_code;
8372 I32 orig_keyword = 0;
8378 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8380 /* Some keywords can be followed by any delimiter, including ':' */
8381 anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
8383 /* x::* is just a word, unless x is "CORE" */
8384 if (!anydelim && *s == ':' && s[1] == ':') {
8385 if (memEQs(PL_tokenbuf, len, "CORE"))
8386 return yyl_key_core(aTHX_ s, len, c);
8387 return yyl_just_a_word(aTHX_ s, len, 0, c);
8391 while (d < PL_bufend && isSPACE(*d))
8392 d++; /* no comments skipped here, or s### is misparsed */
8394 /* Is this a word before a => operator? */
8395 if (*d == '=' && d[1] == '>') {
8396 return yyl_fatcomma(aTHX_ s, len);
8399 /* Check for plugged-in keyword */
8403 char *saved_bufptr = PL_bufptr;
8405 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
8407 if (result == KEYWORD_PLUGIN_DECLINE) {
8408 /* not a plugged-in keyword */
8409 PL_bufptr = saved_bufptr;
8410 } else if (result == KEYWORD_PLUGIN_STMT) {
8411 pl_yylval.opval = o;
8413 if (!PL_nexttoke) PL_expect = XSTATE;
8414 return REPORT(PLUGSTMT);
8415 } else if (result == KEYWORD_PLUGIN_EXPR) {
8416 pl_yylval.opval = o;
8418 if (!PL_nexttoke) PL_expect = XOPERATOR;
8419 return REPORT(PLUGEXPR);
8421 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", PL_tokenbuf);
8425 /* Is this a label? */
8426 if (!anydelim && PL_expect == XSTATE
8427 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8430 newSVOP(OP_CONST, 0,
8431 newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0));
8436 /* Check for lexical sub */
8437 if (PL_expect != XOPERATOR) {
8438 char tmpbuf[sizeof PL_tokenbuf + 1];
8440 Copy(PL_tokenbuf, tmpbuf+1, len, char);
8441 c.off = pad_findmy_pvn(tmpbuf, len+1, 0);
8442 if (c.off != NOT_IN_PAD) {
8443 assert(c.off); /* we assume this is boolean-true below */
8444 if (PAD_COMPNAME_FLAGS_isOUR(c.off)) {
8445 HV * const stash = PAD_COMPNAME_OURSTASH(c.off);
8446 HEK * const stashname = HvNAME_HEK(stash);
8447 c.sv = newSVhek(stashname);
8448 sv_catpvs(c.sv, "::");
8449 sv_catpvn_flags(c.sv, PL_tokenbuf, len,
8450 (UTF ? SV_CATUTF8 : SV_CATBYTES));
8451 c.gv = gv_fetchsv(c.sv, GV_NOADD_NOINIT | SvUTF8(c.sv),
8457 return yyl_just_a_word(aTHX_ s, len, 0, c);
8461 c.rv2cv_op = newOP(OP_PADANY, 0);
8462 c.rv2cv_op->op_targ = c.off;
8463 c.cv = find_lexical_cv(c.off);
8466 return yyl_just_a_word(aTHX_ s, len, 0, c);
8471 /* Check for built-in keyword */
8472 key = keyword(PL_tokenbuf, len, 0);
8475 key = yyl_secondclass_keyword(aTHX_ s, len, key, &orig_keyword, &c.gv, &c.gvp);
8477 if (key && key != KEY___DATA__ && key != KEY___END__
8478 && (!anydelim || *s != '#')) {
8479 /* no override, and not s### either; skipspace is safe here
8480 * check for => on following line */
8482 STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
8483 STRLEN soff = s - SvPVX(PL_linestr);
8485 arrow = *s == '=' && s[1] == '>';
8486 PL_bufptr = SvPVX(PL_linestr) + bufoff;
8487 s = SvPVX(PL_linestr) + soff;
8489 return yyl_fatcomma(aTHX_ s, len);
8492 return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8496 yyl_try(pTHX_ char *s, STRLEN len)
8504 if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s))
8505 return yyl_keylookup(aTHX_ s, gv);
8506 yyl_croak_unrecognised(aTHX_ s);
8510 /* emulate EOF on ^D or ^Z */
8511 return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s, len);
8514 if ((!PL_rsfp || PL_lex_inwhat)
8515 && (!PL_parser->filtered || s+1 < PL_bufend)) {
8519 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
8521 yyerror((const char *)
8523 ? "Format not terminated"
8524 : "Missing right curly or square bracket"));
8527 PerlIO_printf(Perl_debug_log, "### Tokener got EOF\n");
8531 if (s++ < PL_bufend)
8532 goto retry; /* ignore stray nulls */
8535 if (!PL_in_eval && !PL_preambled) {
8536 PL_preambled = TRUE;
8538 /* Generate a string of Perl code to load the debugger.
8539 * If PERL5DB is set, it will return the contents of that,
8540 * otherwise a compile-time require of perl5db.pl. */
8542 const char * const pdb = PerlEnv_getenv("PERL5DB");
8545 sv_setpv(PL_linestr, pdb);
8546 sv_catpvs(PL_linestr,";");
8548 SETERRNO(0,SS_NORMAL);
8549 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
8551 PL_parser->preambling = CopLINE(PL_curcop);
8553 SvPVCLEAR(PL_linestr);
8554 if (PL_preambleav) {
8555 SV **svp = AvARRAY(PL_preambleav);
8556 SV **const end = svp + AvFILLp(PL_preambleav);
8558 sv_catsv(PL_linestr, *svp);
8560 sv_catpvs(PL_linestr, ";");
8562 sv_free(MUTABLE_SV(PL_preambleav));
8563 PL_preambleav = NULL;
8566 sv_catpvs(PL_linestr,
8567 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
8568 if (PL_minus_n || PL_minus_p) {
8569 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
8571 sv_catpvs(PL_linestr,"chomp;");
8574 if ( ( *PL_splitstr == '/'
8575 || *PL_splitstr == '\''
8576 || *PL_splitstr == '"')
8577 && strchr(PL_splitstr + 1, *PL_splitstr))
8579 /* strchr is ok, because -F pattern can't contain
8581 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
8584 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
8585 bytes can be used as quoting characters. :-) */
8586 const char *splits = PL_splitstr;
8587 sv_catpvs(PL_linestr, "our @F=split(q\0");
8590 if (*splits == '\\')
8591 sv_catpvn(PL_linestr, splits, 1);
8592 sv_catpvn(PL_linestr, splits, 1);
8593 } while (*splits++);
8594 /* This loop will embed the trailing NUL of
8595 PL_linestr as the last thing it does before
8597 sv_catpvs(PL_linestr, ");");
8601 sv_catpvs(PL_linestr,"our @F=split(' ');");
8604 sv_catpvs(PL_linestr, "\n");
8605 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
8606 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
8607 PL_last_lop = PL_last_uni = NULL;
8608 if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
8609 update_debugger_info(PL_linestr, NULL, 0);
8612 return yyl_fake_eof(aTHX_ 0, cBOOL(PL_rsfp), s, len);
8615 #ifdef PERL_STRICT_CR
8616 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
8618 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
8620 case ' ': case '\t': case '\f': case '\v':
8626 const bool needs_semicolon = yyl_eol_needs_semicolon(aTHX_ &s);
8627 if (needs_semicolon)
8634 return yyl_hyphen(aTHX_ s);
8637 return yyl_plus(aTHX_ s);
8640 return yyl_star(aTHX_ s);
8643 return yyl_percent(aTHX_ s);
8646 return yyl_caret(aTHX_ s);
8649 return yyl_leftsquare(aTHX_ s);
8652 return yyl_tilde(aTHX_ s);
8655 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
8661 return yyl_just_a_word(aTHX_ s, 0, 0, no_code);
8662 return yyl_colon(aTHX_ s + 1);
8665 return yyl_leftparen(aTHX_ s + 1);
8668 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8676 return yyl_rightparen(aTHX_ s);
8679 return yyl_rightsquare(aTHX_ s);
8682 return yyl_leftcurly(aTHX_ s + 1, 0);
8685 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
8687 return yyl_rightcurly(aTHX_ s, 0);
8690 return yyl_ampersand(aTHX_ s);
8693 return yyl_verticalbar(aTHX_ s);
8696 if (s[1] == '=' && (s == PL_linestart || s[-1] == '\n')
8697 && memBEGINs(s + 2, (STRLEN) (PL_bufend - s + 2), "====="))
8699 s = vcs_conflict_marker(s + 7);
8705 const char tmp = *s++;
8707 if (!PL_lex_allbrackets
8708 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8716 if (!PL_lex_allbrackets
8717 && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
8726 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
8727 && strchr("+-*/%.^&|<",tmp))
8728 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8729 "Reversed %c= operator",(int)tmp);
8731 if (PL_expect == XSTATE
8733 && (s == PL_linestart+1 || s[-2] == '\n') )
8735 if ( (PL_in_eval && !PL_rsfp && !PL_parser->filtered)
8736 || PL_lex_state != LEX_NORMAL)
8741 incline(s, PL_bufend);
8742 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut"))
8744 s = (char *) memchr(s,'\n', d - s);
8749 incline(s, PL_bufend);
8757 PL_parser->in_pod = 1;
8761 if (PL_expect == XBLOCK) {
8763 #ifdef PERL_STRICT_CR
8764 while (SPACE_OR_TAB(*t))
8766 while (SPACE_OR_TAB(*t) || *t == '\r')
8769 if (*t == '\n' || *t == '#') {
8770 ENTER_with_name("lex_format");
8771 SAVEI8(PL_parser->form_lex_state);
8772 SAVEI32(PL_lex_formbrack);
8773 PL_parser->form_lex_state = PL_lex_state;
8774 PL_lex_formbrack = PL_lex_brackets + 1;
8775 PL_parser->sub_error_count = PL_error_count;
8776 return yyl_leftcurly(aTHX_ s, 1);
8779 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
8787 return yyl_bang(aTHX_ s + 1);
8790 if (s[1] == '<' && (s == PL_linestart || s[-1] == '\n')
8791 && memBEGINs(s+2, (STRLEN) (PL_bufend - (s+2)), "<<<<<"))
8793 s = vcs_conflict_marker(s + 7);
8796 return yyl_leftpointy(aTHX_ s);
8799 if (s[1] == '>' && (s == PL_linestart || s[-1] == '\n')
8800 && memBEGINs(s + 2, (STRLEN) (PL_bufend - s + 2), ">>>>>"))
8802 s = vcs_conflict_marker(s + 7);
8805 return yyl_rightpointy(aTHX_ s + 1);
8808 return yyl_dollar(aTHX_ s);
8811 return yyl_snail(aTHX_ s);
8813 case '/': /* may be division, defined-or, or pattern */
8814 return yyl_slash(aTHX_ s);
8816 case '?': /* conditional */
8818 if (!PL_lex_allbrackets
8819 && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
8824 PL_lex_allbrackets++;
8828 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
8829 #ifdef PERL_STRICT_CR
8832 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
8834 && (s == PL_linestart || s[-1] == '\n') )
8837 /* formbrack==2 means dot seen where arguments expected */
8838 return yyl_rightcurly(aTHX_ s, 2);
8840 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
8844 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
8847 if (!PL_lex_allbrackets
8848 && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
8856 pl_yylval.ival = OPf_SPECIAL;
8862 if (*s == '=' && !PL_lex_allbrackets
8863 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8871 case '0': case '1': case '2': case '3': case '4':
8872 case '5': case '6': case '7': case '8': case '9':
8873 s = scan_num(s, &pl_yylval);
8874 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
8875 if (PL_expect == XOPERATOR)
8880 return yyl_sglquote(aTHX_ s);
8883 return yyl_dblquote(aTHX_ s, len);
8886 return yyl_backtick(aTHX_ s);
8889 return yyl_backslash(aTHX_ s + 1);
8892 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
8893 char *start = s + 2;
8894 while (isDIGIT(*start) || *start == '_')
8896 if (*start == '.' && isDIGIT(start[1])) {
8897 s = scan_num(s, &pl_yylval);
8900 else if ((*start == ':' && start[1] == ':')
8901 || (PL_expect == XSTATE && *start == ':'))
8902 return yyl_keylookup(aTHX_ s, gv);
8903 else if (PL_expect == XSTATE) {
8905 while (d < PL_bufend && isSPACE(*d)) d++;
8907 return yyl_keylookup(aTHX_ s, gv);
8909 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
8910 if (!isALPHA(*start) && (PL_expect == XTERM
8911 || PL_expect == XREF || PL_expect == XSTATE
8912 || PL_expect == XTERMORDORDOR)) {
8913 GV *const gv = gv_fetchpvn_flags(s, start - s,
8914 UTF ? SVf_UTF8 : 0, SVt_PVCV);
8916 s = scan_num(s, &pl_yylval);
8921 return yyl_keylookup(aTHX_ s, gv);
8924 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
8928 return yyl_keylookup(aTHX_ s, gv);
8957 return yyl_keylookup(aTHX_ s, gv);
8965 Works out what to call the token just pulled out of the input
8966 stream. The yacc parser takes care of taking the ops we return and
8967 stitching them into a tree.
8970 The type of the next token
8973 Check if we have already built the token; if so, use it.
8974 Switch based on the current state:
8975 - if we have a case modifier in a string, deal with that
8976 - handle other cases of interpolation inside a string
8977 - scan the next line if we are inside a format
8978 In the normal state, switch on the next character:
8980 if alphabetic, go to key lookup
8981 unrecognized character - croak
8982 - 0/4/26: handle end-of-line or EOF
8983 - cases for whitespace
8984 - \n and #: handle comments and line numbers
8985 - various operators, brackets and sigils
8988 - 'v': vstrings (or go to key lookup)
8989 - 'x' repetition operator (or go to key lookup)
8990 - other ASCII alphanumerics (key lookup begins here):
8993 scan built-in keyword (but do nothing with it yet)
8994 check for statement label
8995 check for lexical subs
8996 return yyl_just_a_word if there is one
8997 see whether built-in keyword is overridden
8998 switch on keyword number:
8999 - default: return yyl_just_a_word:
9000 not a built-in keyword; handle bareword lookup
9001 disambiguate between method and sub call
9002 fall back to bareword
9003 - cases for built-in keywords
9007 #define RSFP_FILENO (PL_rsfp)
9009 #define RSFP_FILENO (PerlIO_fileno(PL_rsfp))
9017 char *s = PL_bufptr;
9019 if (UNLIKELY(PL_parser->recheck_utf8_validity)) {
9020 const U8* first_bad_char_loc;
9021 if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr,
9022 PL_bufend - PL_bufptr,
9023 &first_bad_char_loc)))
9025 _force_out_malformed_utf8_message(first_bad_char_loc,
9028 1 /* 1 means die */ );
9029 NOT_REACHED; /* NOTREACHED */
9031 PL_parser->recheck_utf8_validity = FALSE;
9034 SV* tmp = newSVpvs("");
9035 PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n",
9036 (IV)CopLINE(PL_curcop),
9037 lex_state_names[PL_lex_state],
9038 exp_name[PL_expect],
9039 pv_display(tmp, s, strlen(s), 0, 60));
9043 /* when we've already built the next token, just pull it out of the queue */
9046 pl_yylval = PL_nextval[PL_nexttoke];
9049 next_type = PL_nexttype[PL_nexttoke];
9050 if (next_type & (7<<24)) {
9051 if (next_type & (1<<24)) {
9052 if (PL_lex_brackets > 100)
9053 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
9054 PL_lex_brackstack[PL_lex_brackets++] =
9055 (char) ((next_type >> 16) & 0xff);
9057 if (next_type & (2<<24))
9058 PL_lex_allbrackets++;
9059 if (next_type & (4<<24))
9060 PL_lex_allbrackets--;
9061 next_type &= 0xffff;
9063 return REPORT(next_type == 'p' ? pending_ident() : next_type);
9067 switch (PL_lex_state) {
9069 case LEX_INTERPNORMAL:
9072 /* interpolated case modifiers like \L \U, including \Q and \E.
9073 when we get here, PL_bufptr is at the \
9075 case LEX_INTERPCASEMOD:
9076 /* handle \E or end of string */
9077 return yyl_interpcasemod(aTHX_ s);
9079 case LEX_INTERPPUSH:
9080 return REPORT(sublex_push());
9082 case LEX_INTERPSTART:
9083 if (PL_bufptr == PL_bufend)
9084 return REPORT(sublex_done());
9086 if(*PL_bufptr != '(')
9087 PerlIO_printf(Perl_debug_log, "### Interpolated variable\n");
9090 /* for /@a/, we leave the joining for the regex engine to do
9091 * (unless we're within \Q etc) */
9092 PL_lex_dojoin = (*PL_bufptr == '@'
9093 && (!PL_lex_inpat || PL_lex_casemods));
9094 PL_lex_state = LEX_INTERPNORMAL;
9095 if (PL_lex_dojoin) {
9096 NEXTVAL_NEXTTOKE.ival = 0;
9098 force_ident("\"", '$');
9099 NEXTVAL_NEXTTOKE.ival = 0;
9101 NEXTVAL_NEXTTOKE.ival = 0;
9102 force_next((2<<24)|'(');
9103 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
9106 /* Convert (?{...}) and friends to 'do {...}' */
9107 if (PL_lex_inpat && *PL_bufptr == '(') {
9108 PL_parser->lex_shared->re_eval_start = PL_bufptr;
9110 if (*PL_bufptr != '{')
9112 PL_expect = XTERMBLOCK;
9116 if (PL_lex_starts++) {
9118 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9119 if (!PL_lex_casemods && PL_lex_inpat)
9122 AopNOASSIGN(OP_CONCAT);
9126 case LEX_INTERPENDMAYBE:
9127 if (intuit_more(PL_bufptr, PL_bufend)) {
9128 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
9134 if (PL_lex_dojoin) {
9135 const U8 dojoin_was = PL_lex_dojoin;
9136 PL_lex_dojoin = FALSE;
9137 PL_lex_state = LEX_INTERPCONCAT;
9138 PL_lex_allbrackets--;
9139 return REPORT(dojoin_was == 1 ? (int)')' : (int)POSTJOIN);
9141 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
9142 && SvEVALED(PL_lex_repl))
9144 if (PL_bufptr != PL_bufend)
9145 Perl_croak(aTHX_ "Bad evalled substitution pattern");
9148 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
9149 re_eval_str. If the here-doc body’s length equals the previous
9150 value of re_eval_start, re_eval_start will now be null. So
9151 check re_eval_str as well. */
9152 if (PL_parser->lex_shared->re_eval_start
9153 || PL_parser->lex_shared->re_eval_str) {
9155 if (*PL_bufptr != ')')
9156 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
9158 /* having compiled a (?{..}) expression, return the original
9159 * text too, as a const */
9160 if (PL_parser->lex_shared->re_eval_str) {
9161 sv = PL_parser->lex_shared->re_eval_str;
9162 PL_parser->lex_shared->re_eval_str = NULL;
9164 PL_bufptr - PL_parser->lex_shared->re_eval_start);
9165 SvPV_shrink_to_cur(sv);
9167 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
9168 PL_bufptr - PL_parser->lex_shared->re_eval_start);
9169 NEXTVAL_NEXTTOKE.opval =
9170 newSVOP(OP_CONST, 0,
9173 PL_parser->lex_shared->re_eval_start = NULL;
9179 case LEX_INTERPCONCAT:
9181 if (PL_lex_brackets)
9182 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
9183 (long) PL_lex_brackets);
9185 if (PL_bufptr == PL_bufend)
9186 return REPORT(sublex_done());
9188 /* m'foo' still needs to be parsed for possible (?{...}) */
9189 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
9190 SV *sv = newSVsv(PL_linestr);
9192 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
9196 int save_error_count = PL_error_count;
9198 s = scan_const(PL_bufptr);
9200 /* Set flag if this was a pattern and there were errors. op.c will
9201 * refuse to compile a pattern with this flag set. Otherwise, we
9202 * could get segfaults, etc. */
9203 if (PL_lex_inpat && PL_error_count > save_error_count) {
9204 ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR;
9207 PL_lex_state = LEX_INTERPCASEMOD;
9209 PL_lex_state = LEX_INTERPSTART;
9212 if (s != PL_bufptr) {
9213 NEXTVAL_NEXTTOKE = pl_yylval;
9216 if (PL_lex_starts++) {
9217 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9218 if (!PL_lex_casemods && PL_lex_inpat)
9221 AopNOASSIGN(OP_CONCAT);
9231 if (PL_parser->sub_error_count != PL_error_count) {
9232 /* There was an error parsing a formline, which tends to
9234 Unlike interpolated sub-parsing, we can't treat any of
9235 these as recoverable, so no need to check sub_no_recover.
9239 assert(PL_lex_formbrack);
9240 s = scan_formline(PL_bufptr);
9241 if (!PL_lex_formbrack)
9242 return yyl_rightcurly(aTHX_ s, 1);
9247 /* We really do *not* want PL_linestr ever becoming a COW. */
9248 assert (!SvIsCOW(PL_linestr));
9250 PL_oldoldbufptr = PL_oldbufptr;
9253 if (PL_in_my == KEY_sigvar) {
9254 PL_parser->saw_infix_sigil = 0;
9255 return yyl_sigvar(aTHX_ s);
9259 /* yyl_try() and its callees might consult PL_parser->saw_infix_sigil.
9260 On its return, we then need to set it to indicate whether the token
9261 we just encountered was an infix operator that (if we hadn't been
9262 expecting an operator) have been a sigil.
9264 bool expected_operator = (PL_expect == XOPERATOR);
9265 int ret = yyl_try(aTHX_ s, 0);
9266 switch (pl_yylval.ival) {
9271 if (expected_operator) {
9272 PL_parser->saw_infix_sigil = 1;
9277 PL_parser->saw_infix_sigil = 0;
9287 Looks up an identifier in the pad or in a package
9289 PL_in_my == KEY_sigvar indicates that this is a subroutine signature variable
9290 rather than a plain pad var.
9293 PRIVATEREF if this is a lexical name.
9294 BAREWORD if this belongs to a package.
9297 if we're in a my declaration
9298 croak if they tried to say my($foo::bar)
9299 build the ops for a my() declaration
9300 if it's an access to a my() variable
9301 build ops for access to a my() variable
9302 if in a dq string, and they've said @foo and we can't find @foo
9304 build ops for a bareword
9308 S_pending_ident(pTHX)
9311 const char pit = (char)pl_yylval.ival;
9312 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
9313 /* All routes through this function want to know if there is a colon. */
9314 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
9316 DEBUG_T({ PerlIO_printf(Perl_debug_log,
9317 "### Pending identifier '%s'\n", PL_tokenbuf); });
9318 assert(tokenbuf_len >= 2);
9320 /* if we're in a my(), we can't allow dynamics here.
9321 $foo'bar has already been turned into $foo::bar, so
9322 just check for colons.
9324 if it's a legal name, the OP is a PADANY.
9327 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9329 /* diag_listed_as: No package name allowed for variable %s
9331 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
9333 *PL_tokenbuf=='&' ? "subroutine" : "variable",
9334 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
9335 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
9340 /* "my" variable %s can't be in a package */
9341 /* PL_no_myglob is constant */
9342 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
9343 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
9344 PL_in_my == KEY_my ? "my" : "state",
9345 *PL_tokenbuf == '&' ? "subroutine" : "variable",
9347 UTF ? SVf_UTF8 : 0);
9348 GCC_DIAG_RESTORE_STMT;
9351 if (PL_in_my == KEY_sigvar) {
9352 /* A signature 'padop' needs in addition, an op_first to
9353 * point to a child sigdefelem, and an extra field to hold
9354 * the signature index. We can achieve both by using an
9355 * UNOP_AUX and (ab)using the op_aux field to hold the
9356 * index. If we ever need more fields, use a real malloced
9357 * aux strut instead.
9359 o = newUNOP_AUX(OP_ARGELEM, 0, NULL,
9360 INT2PTR(UNOP_AUX_item *,
9361 (PL_parser->sig_elems)));
9362 o->op_private |= ( PL_tokenbuf[0] == '$' ? OPpARGELEM_SV
9363 : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV
9367 o = newOP(OP_PADANY, 0);
9368 o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
9369 UTF ? SVf_UTF8 : 0);
9370 if (PL_in_my == KEY_sigvar)
9373 pl_yylval.opval = o;
9379 build the ops for accesses to a my() variable.
9384 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9386 if (tmp != NOT_IN_PAD) {
9387 /* might be an "our" variable" */
9388 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9389 /* build ops for a bareword */
9390 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9391 HEK * const stashname = HvNAME_HEK(stash);
9392 SV * const sym = newSVhek(stashname);
9393 sv_catpvs(sym, "::");
9394 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9395 pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
9396 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9400 ((PL_tokenbuf[0] == '$') ? SVt_PV
9401 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9406 pl_yylval.opval = newOP(OP_PADANY, 0);
9407 pl_yylval.opval->op_targ = tmp;
9413 Whine if they've said @foo or @foo{key} in a doublequoted string,
9414 and @foo (or %foo) isn't a variable we can find in the symbol
9417 if (ckWARN(WARN_AMBIGUOUS)
9419 && PL_lex_state != LEX_NORMAL
9420 && !PL_lex_brackets)
9422 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9423 ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
9425 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9428 /* Downgraded from fatal to warning 20000522 mjd */
9429 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9430 "Possible unintended interpolation of %" UTF8f
9432 UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9436 /* build ops for a bareword */
9437 pl_yylval.opval = newSVOP(OP_CONST, 0,
9438 newSVpvn_flags(PL_tokenbuf + 1,
9439 tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9440 UTF ? SVf_UTF8 : 0 ));
9441 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9443 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9444 (PL_in_eval ? GV_ADDMULTI : GV_ADD)
9445 | ( UTF ? SVf_UTF8 : 0 ),
9446 ((PL_tokenbuf[0] == '$') ? SVt_PV
9447 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9453 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9455 PERL_ARGS_ASSERT_CHECKCOMMA;
9457 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
9458 if (ckWARN(WARN_SYNTAX)) {
9461 for (w = s+2; *w && level; w++) {
9469 /* the list of chars below is for end of statements or
9470 * block / parens, boolean operators (&&, ||, //) and branch
9471 * constructs (or, and, if, until, unless, while, err, for).
9472 * Not a very solid hack... */
9473 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9474 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9475 "%s (...) interpreted as function",name);
9478 while (s < PL_bufend && isSPACE(*s))
9482 while (s < PL_bufend && isSPACE(*s))
9484 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
9485 const char * const w = s;
9486 s += UTF ? UTF8SKIP(s) : 1;
9487 while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
9488 s += UTF ? UTF8SKIP(s) : 1;
9489 while (s < PL_bufend && isSPACE(*s))
9493 if (keyword(w, s - w, 0))
9496 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9497 if (gv && GvCVu(gv))
9502 Copy(w, tmpbuf+1, s - w, char);
9504 off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
9505 if (off != NOT_IN_PAD) return;
9507 Perl_croak(aTHX_ "No comma allowed after %s", what);
9512 /* S_new_constant(): do any overload::constant lookup.
9514 Either returns sv, or mortalizes/frees sv and returns a new SV*.
9515 Best used as sv=new_constant(..., sv, ...).
9516 If s, pv are NULL, calls subroutine with one argument,
9517 and <type> is used with error messages only.
9518 <type> is assumed to be well formed UTF-8.
9520 If error_msg is not NULL, *error_msg will be set to any error encountered.
9521 Otherwise yyerror() will be used to output it */
9524 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9525 SV *sv, SV *pv, const char *type, STRLEN typelen,
9526 const char ** error_msg)
9529 HV * table = GvHV(PL_hintgv); /* ^H */
9534 const char *why1 = "", *why2 = "", *why3 = "";
9536 PERL_ARGS_ASSERT_NEW_CONSTANT;
9537 /* We assume that this is true: */
9538 if (*key == 'c') { assert (strEQ(key, "charnames")); }
9541 sv_2mortal(sv); /* Parent created it permanently */
9543 || ! (PL_hints & HINT_LOCALIZE_HH)
9544 || ! (cvp = hv_fetch(table, key, keylen, FALSE))
9549 /* Here haven't found what we're looking for. If it is charnames,
9550 * perhaps it needs to be loaded. Try doing that before giving up */
9552 Perl_load_module(aTHX_
9554 newSVpvs("_charnames"),
9555 /* version parameter; no need to specify it, as if
9556 * we get too early a version, will fail anyway,
9557 * not being able to find '_charnames' */
9562 assert(sp == PL_stack_sp);
9563 table = GvHV(PL_hintgv);
9565 && (PL_hints & HINT_LOCALIZE_HH)
9566 && (cvp = hv_fetch(table, key, keylen, FALSE))
9572 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9573 msg = Perl_form(aTHX_
9574 "Constant(%.*s) unknown",
9575 (int)(type ? typelen : len),
9581 why3 = "} is not defined";
9584 msg = Perl_form(aTHX_
9585 /* The +3 is for '\N{'; -4 for that, plus '}' */
9586 "Unknown charname '%.*s'", (int)typelen - 4, type + 3
9590 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
9591 (int)(type ? typelen : len),
9592 (type ? type: s), why1, why2, why3);
9599 yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9601 return SvREFCNT_inc_simple_NN(sv);
9606 pv = newSVpvn_flags(s, len, SVs_TEMP);
9608 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9610 typesv = &PL_sv_undef;
9612 PUSHSTACKi(PERLSI_OVERLOAD);
9624 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9628 /* Check the eval first */
9629 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9631 const char * errstr;
9632 sv_catpvs(errsv, "Propagated");
9633 errstr = SvPV_const(errsv, errlen);
9634 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9636 res = SvREFCNT_inc_simple_NN(sv);
9640 SvREFCNT_inc_simple_void_NN(res);
9649 why1 = "Call to &{$^H{";
9651 why3 = "}} did not return a defined value";
9653 (void)sv_2mortal(sv);
9660 PERL_STATIC_INLINE void
9661 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
9662 bool is_utf8, bool check_dollar, bool tick_warn)
9665 const char *olds = *s;
9666 PERL_ARGS_ASSERT_PARSE_IDENT;
9668 while (*s < PL_bufend) {
9670 Perl_croak(aTHX_ "%s", ident_too_long);
9671 if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) {
9672 /* The UTF-8 case must come first, otherwise things
9673 * like c\N{COMBINING TILDE} would start failing, as the
9674 * isWORDCHAR_A case below would gobble the 'c' up.
9677 char *t = *s + UTF8SKIP(*s);
9678 while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
9681 if (*d + (t - *s) > e)
9682 Perl_croak(aTHX_ "%s", ident_too_long);
9683 Copy(*s, *d, t - *s, char);
9687 else if ( isWORDCHAR_A(**s) ) {
9690 } while (isWORDCHAR_A(**s) && *d < e);
9692 else if ( allow_package
9694 && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8))
9701 else if (allow_package && **s == ':' && (*s)[1] == ':'
9702 /* Disallow things like Foo::$bar. For the curious, this is
9703 * the code path that triggers the "Bad name after" warning
9704 * when looking for barewords.
9706 && !(check_dollar && (*s)[2] == '$')) {
9713 if (UNLIKELY(tick_warn && saw_tick && PL_lex_state == LEX_INTERPNORMAL
9714 && !PL_lex_brackets && ckWARN(WARN_SYNTAX))) {
9717 Newx(this_d, *s - olds + saw_tick + 2, char); /* +2 for $# */
9720 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9721 "Old package separator used in string");
9722 if (olds[-1] == '#')
9726 if (*olds == '\'') {
9733 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9734 "\t(Did you mean \"%" UTF8f "\" instead?)\n",
9735 UTF8fARG(is_utf8, d2-this_d, this_d));
9740 /* Returns a NUL terminated string, with the length of the string written to
9744 Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9747 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9748 bool is_utf8 = cBOOL(UTF);
9750 PERL_ARGS_ASSERT_SCAN_WORD;
9752 parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, FALSE);
9758 /* Is the byte 'd' a legal single character identifier name? 'u' is true
9759 * iff Unicode semantics are to be used. The legal ones are any of:
9760 * a) all ASCII characters except:
9761 * 1) control and space-type ones, like NUL, SOH, \t, and SPACE;
9763 * The final case currently doesn't get this far in the program, so we
9764 * don't test for it. If that were to change, it would be ok to allow it.
9765 * b) When not under Unicode rules, any upper Latin1 character
9766 * c) Otherwise, when unicode rules are used, all XIDS characters.
9768 * Because all ASCII characters have the same representation whether
9769 * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
9770 * '{' without knowing if is UTF-8 or not. */
9771 #define VALID_LEN_ONE_IDENT(s, e, is_utf8) \
9772 (isGRAPH_A(*(s)) || ((is_utf8) \
9773 ? isIDFIRST_utf8_safe(s, e) \
9775 && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
9778 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
9780 I32 herelines = PL_parser->herelines;
9781 SSize_t bracket = -1;
9784 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9785 bool is_utf8 = cBOOL(UTF);
9786 I32 orig_copline = 0, tmp_copline = 0;
9788 PERL_ARGS_ASSERT_SCAN_IDENT;
9790 if (isSPACE(*s) || !*s)
9793 while (isDIGIT(*s)) {
9795 Perl_croak(aTHX_ "%s", ident_too_long);
9799 else { /* See if it is a "normal" identifier */
9800 parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE);
9805 /* Either a digit variable, or parse_ident() found an identifier
9806 (anything valid as a bareword), so job done and return. */
9807 if (PL_lex_state != LEX_NORMAL)
9808 PL_lex_state = LEX_INTERPENDMAYBE;
9812 /* Here, it is not a run-of-the-mill identifier name */
9814 if (*s == '$' && s[1]
9815 && ( isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8)
9816 || isDIGIT_A((U8)s[1])
9819 || memBEGINs(s+1, (STRLEN) (PL_bufend - (s+1)), "::")) )
9821 /* Dereferencing a value in a scalar variable.
9822 The alternatives are different syntaxes for a scalar variable.
9823 Using ' as a leading package separator isn't allowed. :: is. */
9826 /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */
9828 bracket = s - SvPVX(PL_linestr);
9830 orig_copline = CopLINE(PL_curcop);
9831 if (s < PL_bufend && isSPACE(*s)) {
9835 if ((s <= PL_bufend - ((is_utf8)
9838 && VALID_LEN_ONE_IDENT(s, PL_bufend, is_utf8))
9841 const STRLEN skip = UTF8SKIP(s);
9844 for ( i = 0; i < skip; i++ )
9852 /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
9853 if (*d == '^' && *s && isCONTROLVAR(*s)) {
9857 /* Warn about ambiguous code after unary operators if {...} notation isn't
9858 used. There's no difference in ambiguity; it's merely a heuristic
9859 about when not to warn. */
9860 else if (ck_uni && bracket == -1)
9862 if (bracket != -1) {
9865 /* If we were processing {...} notation then... */
9866 if (isIDFIRST_lazy_if_safe(d, e, is_utf8)
9867 || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
9870 /* note we have to check for a normal identifier first,
9871 * as it handles utf8 symbols, and only after that has
9872 * been ruled out can we look at the caret words */
9873 if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) {
9874 /* if it starts as a valid identifier, assume that it is one.
9875 (the later check for } being at the expected point will trap
9876 cases where this doesn't pan out.) */
9877 d += is_utf8 ? UTF8SKIP(d) : 1;
9878 parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE);
9881 else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
9883 while (isWORDCHAR(*s) && d < e) {
9887 Perl_croak(aTHX_ "%s", ident_too_long);
9890 tmp_copline = CopLINE(PL_curcop);
9891 if (s < PL_bufend && isSPACE(*s)) {
9894 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
9895 /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation. */
9896 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
9897 const char * const brack =
9899 ((*s == '[') ? "[...]" : "{...}");
9900 orig_copline = CopLINE(PL_curcop);
9901 CopLINE_set(PL_curcop, tmp_copline);
9902 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9903 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9904 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
9905 funny, dest, brack, funny, dest, brack);
9906 CopLINE_set(PL_curcop, orig_copline);
9909 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
9910 PL_lex_allbrackets++;
9916 tmp_copline = CopLINE(PL_curcop);
9917 if ((skip = s < PL_bufend && isSPACE(*s))) {
9918 /* Avoid incrementing line numbers or resetting PL_linestart,
9919 in case we have to back up. */
9920 STRLEN s_off = s - SvPVX(PL_linestr);
9922 s = SvPVX(PL_linestr) + s_off;
9927 /* Expect to find a closing } after consuming any trailing whitespace.
9930 /* Now increment line numbers if applicable. */
9934 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
9935 PL_lex_state = LEX_INTERPEND;
9938 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) {
9939 if (ckWARN(WARN_AMBIGUOUS)
9940 && (keyword(dest, d - dest, 0)
9941 || get_cvn_flags(dest, d - dest, is_utf8
9945 SV *tmp = newSVpvn_flags( dest, d - dest,
9946 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
9949 orig_copline = CopLINE(PL_curcop);
9950 CopLINE_set(PL_curcop, tmp_copline);
9951 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9952 "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
9953 funny, SVfARG(tmp), funny, SVfARG(tmp));
9954 CopLINE_set(PL_curcop, orig_copline);
9959 /* Didn't find the closing } at the point we expected, so restore
9960 state such that the next thing to process is the opening { and */
9961 s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
9962 CopLINE_set(PL_curcop, orig_copline);
9963 PL_parser->herelines = herelines;
9965 PL_parser->sub_no_recover = TRUE;
9968 else if ( PL_lex_state == LEX_INTERPNORMAL
9970 && !intuit_more(s, PL_bufend))
9971 PL_lex_state = LEX_INTERPEND;
9976 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
9978 /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
9979 * found in the parse starting at 's', based on the subset that are valid
9980 * in this context input to this routine in 'valid_flags'. Advances s.
9981 * Returns TRUE if the input should be treated as a valid flag, so the next
9982 * char may be as well; otherwise FALSE. 'charset' should point to a NUL
9983 * upon first call on the current regex. This routine will set it to any
9984 * charset modifier found. The caller shouldn't change it. This way,
9985 * another charset modifier encountered in the parse can be detected as an
9986 * error, as we have decided to allow only one */
9989 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
9991 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9992 if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) {
9993 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
9994 UTF ? SVf_UTF8 : 0);
9996 /* Pretend that it worked, so will continue processing before
10005 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
10006 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
10007 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
10008 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
10009 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
10010 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
10011 case LOCALE_PAT_MOD:
10013 goto multiple_charsets;
10015 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
10018 case UNICODE_PAT_MOD:
10020 goto multiple_charsets;
10022 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
10025 case ASCII_RESTRICT_PAT_MOD:
10027 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
10031 /* Error if previous modifier wasn't an 'a', but if it was, see
10032 * if, and accept, a second occurrence (only) */
10033 if (*charset != 'a'
10034 || get_regex_charset(*pmfl)
10035 != REGEX_ASCII_RESTRICTED_CHARSET)
10037 goto multiple_charsets;
10039 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
10043 case DEPENDS_PAT_MOD:
10045 goto multiple_charsets;
10047 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
10056 if (*charset != c) {
10057 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
10059 else if (c == 'a') {
10060 /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
10061 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
10064 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
10067 /* Pretend that it worked, so will continue processing before dieing */
10073 S_scan_pat(pTHX_ char *start, I32 type)
10077 const char * const valid_flags =
10078 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
10079 char charset = '\0'; /* character set modifier */
10080 unsigned int x_mod_count = 0;
10082 PERL_ARGS_ASSERT_SCAN_PAT;
10084 s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
10086 Perl_croak(aTHX_ "Search pattern not terminated");
10088 pm = (PMOP*)newPMOP(type, 0);
10089 if (PL_multi_open == '?') {
10090 /* This is the only point in the code that sets PMf_ONCE: */
10091 pm->op_pmflags |= PMf_ONCE;
10093 /* Hence it's safe to do this bit of PMOP book-keeping here, which
10094 allows us to restrict the list needed by reset to just the ??
10096 assert(type != OP_TRANS);
10098 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
10101 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
10104 elements = mg->mg_len / sizeof(PMOP**);
10105 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
10106 ((PMOP**)mg->mg_ptr) [elements++] = pm;
10107 mg->mg_len = elements * sizeof(PMOP**);
10108 PmopSTASH_set(pm,PL_curstash);
10112 /* if qr/...(?{..}).../, then need to parse the pattern within a new
10113 * anon CV. False positives like qr/[(?{]/ are harmless */
10115 if (type == OP_QR) {
10117 char *e, *p = SvPV(PL_lex_stuff, len);
10119 for (; p < e; p++) {
10120 if (p[0] == '(' && p[1] == '?'
10121 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
10123 pm->op_pmflags |= PMf_HAS_CV;
10127 pm->op_pmflags |= PMf_IS_QR;
10130 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
10131 &s, &charset, &x_mod_count))
10133 /* issue a warning if /c is specified,but /g is not */
10134 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
10136 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
10137 "Use of /c modifier is meaningless without /g" );
10140 PL_lex_op = (OP*)pm;
10141 pl_yylval.ival = OP_MATCH;
10146 S_scan_subst(pTHX_ char *start)
10152 line_t linediff = 0;
10154 char charset = '\0'; /* character set modifier */
10155 unsigned int x_mod_count = 0;
10158 PERL_ARGS_ASSERT_SCAN_SUBST;
10160 pl_yylval.ival = OP_NULL;
10162 s = scan_str(start, TRUE, FALSE, FALSE, &t);
10165 Perl_croak(aTHX_ "Substitution pattern not terminated");
10169 first_start = PL_multi_start;
10170 first_line = CopLINE(PL_curcop);
10171 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10173 SvREFCNT_dec_NN(PL_lex_stuff);
10174 PL_lex_stuff = NULL;
10175 Perl_croak(aTHX_ "Substitution replacement not terminated");
10177 PL_multi_start = first_start; /* so whole substitution is taken together */
10179 pm = (PMOP*)newPMOP(OP_SUBST, 0);
10183 if (*s == EXEC_PAT_MOD) {
10187 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
10188 &s, &charset, &x_mod_count))
10194 if ((pm->op_pmflags & PMf_CONTINUE)) {
10195 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
10199 SV * const repl = newSVpvs("");
10202 pm->op_pmflags |= PMf_EVAL;
10203 for (; es > 1; es--) {
10204 sv_catpvs(repl, "eval ");
10206 sv_catpvs(repl, "do {");
10207 sv_catsv(repl, PL_parser->lex_sub_repl);
10208 sv_catpvs(repl, "}");
10209 SvREFCNT_dec(PL_parser->lex_sub_repl);
10210 PL_parser->lex_sub_repl = repl;
10214 linediff = CopLINE(PL_curcop) - first_line;
10216 CopLINE_set(PL_curcop, first_line);
10218 if (linediff || es) {
10219 /* the IVX field indicates that the replacement string is a s///e;
10220 * the NVX field indicates how many src code lines the replacement
10222 sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
10223 ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = linediff;
10224 ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
10228 PL_lex_op = (OP*)pm;
10229 pl_yylval.ival = OP_SUBST;
10234 S_scan_trans(pTHX_ char *start)
10241 bool nondestruct = 0;
10244 PERL_ARGS_ASSERT_SCAN_TRANS;
10246 pl_yylval.ival = OP_NULL;
10248 s = scan_str(start,FALSE,FALSE,FALSE,&t);
10250 Perl_croak(aTHX_ "Transliteration pattern not terminated");
10254 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10256 SvREFCNT_dec_NN(PL_lex_stuff);
10257 PL_lex_stuff = NULL;
10258 Perl_croak(aTHX_ "Transliteration replacement not terminated");
10261 complement = del = squash = 0;
10265 complement = OPpTRANS_COMPLEMENT;
10268 del = OPpTRANS_DELETE;
10271 squash = OPpTRANS_SQUASH;
10283 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
10284 o->op_private &= ~OPpTRANS_ALL;
10285 o->op_private |= del|squash|complement;
10288 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
10295 Takes a pointer to the first < in <<FOO.
10296 Returns a pointer to the byte following <<FOO.
10298 This function scans a heredoc, which involves different methods
10299 depending on whether we are in a string eval, quoted construct, etc.
10300 This is because PL_linestr could containing a single line of input, or
10301 a whole string being evalled, or the contents of the current quote-
10304 The two basic methods are:
10305 - Steal lines from the input stream
10306 - Scan the heredoc in PL_linestr and remove it therefrom
10308 In a file scope or filtered eval, the first method is used; in a
10309 string eval, the second.
10311 In a quote-like operator, we have to choose between the two,
10312 depending on where we can find a newline. We peek into outer lex-
10313 ing scopes until we find one with a newline in it. If we reach the
10314 outermost lexing scope and it is a file, we use the stream method.
10315 Otherwise it is treated as an eval.
10319 S_scan_heredoc(pTHX_ char *s)
10321 I32 op_type = OP_SCALAR;
10329 I32 indent_len = 0;
10330 bool indented = FALSE;
10331 const bool infile = PL_rsfp || PL_parser->filtered;
10332 const line_t origline = CopLINE(PL_curcop);
10333 LEXSHARED *shared = PL_parser->lex_shared;
10335 PERL_ARGS_ASSERT_SCAN_HEREDOC;
10338 d = PL_tokenbuf + 1;
10339 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
10340 *PL_tokenbuf = '\n';
10343 if (*peek == '~') {
10348 while (SPACE_OR_TAB(*peek))
10351 if (*peek == '`' || *peek == '\'' || *peek =='"') {
10354 s = delimcpy(d, e, s, PL_bufend, term, &len);
10355 if (s == PL_bufend)
10356 Perl_croak(aTHX_ "Unterminated delimiter for here document");
10362 /* <<\FOO is equivalent to <<'FOO' */
10367 if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
10368 Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
10372 while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
10373 peek += UTF ? UTF8SKIP(peek) : 1;
10376 len = (peek - s >= e - d) ? (e - d) : (peek - s);
10377 Copy(s, d, len, char);
10382 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
10383 Perl_croak(aTHX_ "Delimiter for here document is too long");
10387 len = d - PL_tokenbuf;
10389 #ifndef PERL_STRICT_CR
10390 d = (char *) memchr(s, '\r', PL_bufend - s);
10392 char * const olds = s;
10394 while (s < PL_bufend) {
10400 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
10409 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10414 tmpstr = newSV_type(SVt_PVIV);
10415 SvGROW(tmpstr, 80);
10416 if (term == '\'') {
10417 op_type = OP_CONST;
10418 SvIV_set(tmpstr, -1);
10420 else if (term == '`') {
10421 op_type = OP_BACKTICK;
10422 SvIV_set(tmpstr, '\\');
10425 PL_multi_start = origline + 1 + PL_parser->herelines;
10426 PL_multi_open = PL_multi_close = '<';
10428 /* inside a string eval or quote-like operator */
10429 if (!infile || PL_lex_inwhat) {
10432 char * const olds = s;
10433 PERL_CONTEXT * const cx = CX_CUR();
10434 /* These two fields are not set until an inner lexing scope is
10435 entered. But we need them set here. */
10436 shared->ls_bufptr = s;
10437 shared->ls_linestr = PL_linestr;
10439 if (PL_lex_inwhat) {
10440 /* Look for a newline. If the current buffer does not have one,
10441 peek into the line buffer of the parent lexing scope, going
10442 up as many levels as necessary to find one with a newline
10445 while (!(s = (char *)memchr(
10446 (void *)shared->ls_bufptr, '\n',
10447 SvEND(shared->ls_linestr)-shared->ls_bufptr
10450 shared = shared->ls_prev;
10451 /* shared is only null if we have gone beyond the outermost
10452 lexing scope. In a file, we will have broken out of the
10453 loop in the previous iteration. In an eval, the string buf-
10454 fer ends with "\n;", so the while condition above will have
10455 evaluated to false. So shared can never be null. Or so you
10456 might think. Odd syntax errors like s;@{<<; can gobble up
10457 the implicit semicolon at the end of a flie, causing the
10458 file handle to be closed even when we are not in a string
10459 eval. So shared may be null in that case.
10460 (Closing '>>}' here to balance the earlier open brace for
10461 editors that look for matched pairs.) */
10462 if (UNLIKELY(!shared))
10464 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10465 most lexing scope. In a file, shared->ls_linestr at that
10466 level is just one line, so there is no body to steal. */
10467 if (infile && !shared->ls_prev) {
10473 else { /* eval or we've already hit EOF */
10474 s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10479 linestr = shared->ls_linestr;
10480 bufend = SvEND(linestr);
10485 while (s < bufend - len + 1) {
10487 ++PL_parser->herelines;
10489 if (memEQ(s, PL_tokenbuf + 1, len - 1)) {
10493 /* Only valid if it's preceded by whitespace only */
10494 while (backup != myolds && --backup >= myolds) {
10495 if (! SPACE_OR_TAB(*backup)) {
10501 /* No whitespace or all! */
10502 if (backup == s || *backup == '\n') {
10503 Newx(indent, indent_len + 1, char);
10504 memcpy(indent, backup + 1, indent_len);
10505 indent[indent_len] = 0;
10506 s--; /* before our delimiter */
10507 PL_parser->herelines--; /* this line doesn't count */
10514 while (s < bufend - len + 1
10515 && memNE(s,PL_tokenbuf,len) )
10518 ++PL_parser->herelines;
10522 if (s >= bufend - len + 1) {
10526 sv_setpvn(tmpstr,d+1,s-d);
10528 /* the preceding stmt passes a newline */
10529 PL_parser->herelines++;
10531 /* s now points to the newline after the heredoc terminator.
10532 d points to the newline before the body of the heredoc.
10535 /* We are going to modify linestr in place here, so set
10536 aside copies of the string if necessary for re-evals or
10538 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10539 check shared->re_eval_str. */
10540 if (shared->re_eval_start || shared->re_eval_str) {
10541 /* Set aside the rest of the regexp */
10542 if (!shared->re_eval_str)
10543 shared->re_eval_str =
10544 newSVpvn(shared->re_eval_start,
10545 bufend - shared->re_eval_start);
10546 shared->re_eval_start -= s-d;
10549 if (cxstack_ix >= 0
10550 && CxTYPE(cx) == CXt_EVAL
10551 && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
10552 && cx->blk_eval.cur_text == linestr)
10554 cx->blk_eval.cur_text = newSVsv(linestr);
10555 cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
10558 /* Copy everything from s onwards back to d. */
10559 Move(s,d,bufend-s + 1,char);
10560 SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10561 /* Setting PL_bufend only applies when we have not dug deeper
10562 into other scopes, because sublex_done sets PL_bufend to
10563 SvEND(PL_linestr). */
10564 if (shared == PL_parser->lex_shared)
10565 PL_bufend = SvEND(linestr);
10570 char *oldbufptr_save;
10571 char *oldoldbufptr_save;
10573 SvPVCLEAR(tmpstr); /* avoid "uninitialized" warning */
10574 term = PL_tokenbuf[1];
10576 linestr_save = PL_linestr; /* must restore this afterwards */
10577 d = s; /* and this */
10578 oldbufptr_save = PL_oldbufptr;
10579 oldoldbufptr_save = PL_oldoldbufptr;
10580 PL_linestr = newSVpvs("");
10581 PL_bufend = SvPVX(PL_linestr);
10584 PL_bufptr = PL_bufend;
10585 CopLINE_set(PL_curcop,
10586 origline + 1 + PL_parser->herelines);
10588 if ( !lex_next_chunk(LEX_NO_TERM)
10589 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n'))
10591 /* Simply freeing linestr_save might seem simpler here, as it
10592 does not matter what PL_linestr points to, since we are
10593 about to croak; but in a quote-like op, linestr_save
10594 will have been prospectively freed already, via
10595 SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
10596 restore PL_linestr. */
10597 SvREFCNT_dec_NN(PL_linestr);
10598 PL_linestr = linestr_save;
10599 PL_oldbufptr = oldbufptr_save;
10600 PL_oldoldbufptr = oldoldbufptr_save;
10604 CopLINE_set(PL_curcop, origline);
10606 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10607 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10608 /* ^That should be enough to avoid this needing to grow: */
10609 sv_catpvs(PL_linestr, "\n\0");
10610 assert(s == SvPVX(PL_linestr));
10611 PL_bufend = SvEND(PL_linestr);
10615 PL_parser->herelines++;
10616 PL_last_lop = PL_last_uni = NULL;
10618 #ifndef PERL_STRICT_CR
10619 if (PL_bufend - PL_linestart >= 2) {
10620 if ( (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
10621 || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10623 PL_bufend[-2] = '\n';
10625 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10627 else if (PL_bufend[-1] == '\r')
10628 PL_bufend[-1] = '\n';
10630 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10631 PL_bufend[-1] = '\n';
10634 if (indented && (PL_bufend-s) >= len) {
10635 char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
10638 char *backup = found;
10641 /* Only valid if it's preceded by whitespace only */
10642 while (backup != s && --backup >= s) {
10643 if (! SPACE_OR_TAB(*backup)) {
10649 /* All whitespace or none! */
10650 if (backup == found || SPACE_OR_TAB(*backup)) {
10651 Newx(indent, indent_len + 1, char);
10652 memcpy(indent, backup, indent_len);
10653 indent[indent_len] = 0;
10654 SvREFCNT_dec(PL_linestr);
10655 PL_linestr = linestr_save;
10656 PL_linestart = SvPVX(linestr_save);
10657 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10658 PL_oldbufptr = oldbufptr_save;
10659 PL_oldoldbufptr = oldoldbufptr_save;
10665 /* Didn't find it */
10666 sv_catsv(tmpstr,PL_linestr);
10669 if (*s == term && PL_bufend-s >= len
10670 && memEQ(s,PL_tokenbuf + 1,len))
10672 SvREFCNT_dec(PL_linestr);
10673 PL_linestr = linestr_save;
10674 PL_linestart = SvPVX(linestr_save);
10675 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10676 PL_oldbufptr = oldbufptr_save;
10677 PL_oldoldbufptr = oldoldbufptr_save;
10682 sv_catsv(tmpstr,PL_linestr);
10688 PL_multi_end = origline + PL_parser->herelines;
10690 if (indented && indent) {
10691 STRLEN linecount = 1;
10692 STRLEN herelen = SvCUR(tmpstr);
10693 char *ss = SvPVX(tmpstr);
10694 char *se = ss + herelen;
10695 SV *newstr = newSV(herelen+1);
10698 /* Trim leading whitespace */
10700 /* newline only? Copy and move on */
10702 sv_catpvs(newstr,"\n");
10706 /* Found our indentation? Strip it */
10708 else if (se - ss >= indent_len
10709 && memEQ(ss, indent, indent_len))
10714 while ((ss + le) < se && *(ss + le) != '\n')
10717 sv_catpvn(newstr, ss, le);
10720 /* Line doesn't begin with our indentation? Croak */
10725 "Indentation on line %d of here-doc doesn't match delimiter",
10731 /* avoid sv_setsv() as we dont wan't to COW here */
10732 sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
10734 SvREFCNT_dec_NN(newstr);
10737 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10738 SvPV_shrink_to_cur(tmpstr);
10742 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10746 PL_lex_stuff = tmpstr;
10747 pl_yylval.ival = op_type;
10753 SvREFCNT_dec(tmpstr);
10754 CopLINE_set(PL_curcop, origline);
10755 missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1);
10759 /* scan_inputsymbol
10760 takes: position of first '<' in input buffer
10761 returns: position of first char following the matching '>' in
10763 side-effects: pl_yylval and lex_op are set.
10768 <<>> read from ARGV without magic open
10769 <FH> read from filehandle
10770 <pkg::FH> read from package qualified filehandle
10771 <pkg'FH> read from package qualified filehandle
10772 <$fh> read from filehandle in $fh
10773 <*.h> filename glob
10778 S_scan_inputsymbol(pTHX_ char *start)
10780 char *s = start; /* current position in buffer */
10783 bool nomagicopen = FALSE;
10784 char *d = PL_tokenbuf; /* start of temp holding space */
10785 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
10787 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10789 end = (char *) memchr(s, '\n', PL_bufend - s);
10792 if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
10793 nomagicopen = TRUE;
10799 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
10801 /* die if we didn't have space for the contents of the <>,
10802 or if it didn't end, or if we see a newline
10805 if (len >= (I32)sizeof PL_tokenbuf)
10806 Perl_croak(aTHX_ "Excessively long <> operator");
10808 Perl_croak(aTHX_ "Unterminated <> operator");
10813 Remember, only scalar variables are interpreted as filehandles by
10814 this code. Anything more complex (e.g., <$fh{$num}>) will be
10815 treated as a glob() call.
10816 This code makes use of the fact that except for the $ at the front,
10817 a scalar variable and a filehandle look the same.
10819 if (*d == '$' && d[1]) d++;
10821 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10822 while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') {
10823 d += UTF ? UTF8SKIP(d) : 1;
10826 /* If we've tried to read what we allow filehandles to look like, and
10827 there's still text left, then it must be a glob() and not a getline.
10828 Use scan_str to pull out the stuff between the <> and treat it
10829 as nothing more than a string.
10832 if (d - PL_tokenbuf != len) {
10833 pl_yylval.ival = OP_GLOB;
10834 s = scan_str(start,FALSE,FALSE,FALSE,NULL);
10836 Perl_croak(aTHX_ "Glob not terminated");
10840 bool readline_overriden = FALSE;
10842 /* we're in a filehandle read situation */
10845 /* turn <> into <ARGV> */
10847 Copy("ARGV",d,5,char);
10849 /* Check whether readline() is overriden */
10850 if ((gv_readline = gv_override("readline",8)))
10851 readline_overriden = TRUE;
10853 /* if <$fh>, create the ops to turn the variable into a
10857 /* try to find it in the pad for this block, otherwise find
10858 add symbol table ops
10860 const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
10861 if (tmp != NOT_IN_PAD) {
10862 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
10863 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10864 HEK * const stashname = HvNAME_HEK(stash);
10865 SV * const sym = sv_2mortal(newSVhek(stashname));
10866 sv_catpvs(sym, "::");
10867 sv_catpv(sym, d+1);
10872 OP * const o = newOP(OP_PADSV, 0);
10874 PL_lex_op = readline_overriden
10875 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
10876 op_append_elem(OP_LIST, o,
10877 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10878 : newUNOP(OP_READLINE, 0, o);
10886 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
10888 PL_lex_op = readline_overriden
10889 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
10890 op_append_elem(OP_LIST,
10891 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10892 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10893 : newUNOP(OP_READLINE, 0,
10894 newUNOP(OP_RV2SV, 0,
10895 newGVOP(OP_GV, 0, gv)));
10897 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10898 pl_yylval.ival = OP_NULL;
10901 /* If it's none of the above, it must be a literal filehandle
10902 (<Foo::BAR> or <FOO>) so build a simple readline OP */
10904 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
10905 PL_lex_op = readline_overriden
10906 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
10907 op_append_elem(OP_LIST,
10908 newGVOP(OP_GV, 0, gv),
10909 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10910 : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
10911 pl_yylval.ival = OP_NULL;
10921 start position in buffer
10922 keep_bracketed_quoted preserve \ quoting of embedded delimiters, but
10923 only if they are of the open/close form
10924 keep_delims preserve the delimiters around the string
10925 re_reparse compiling a run-time /(?{})/:
10926 collapse // to /, and skip encoding src
10927 delimp if non-null, this is set to the position of
10928 the closing delimiter, or just after it if
10929 the closing and opening delimiters differ
10930 (i.e., the opening delimiter of a substitu-
10932 returns: position to continue reading from buffer
10933 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10934 updates the read buffer.
10936 This subroutine pulls a string out of the input. It is called for:
10937 q single quotes q(literal text)
10938 ' single quotes 'literal text'
10939 qq double quotes qq(interpolate $here please)
10940 " double quotes "interpolate $here please"
10941 qx backticks qx(/bin/ls -l)
10942 ` backticks `/bin/ls -l`
10943 qw quote words @EXPORT_OK = qw( func() $spam )
10944 m// regexp match m/this/
10945 s/// regexp substitute s/this/that/
10946 tr/// string transliterate tr/this/that/
10947 y/// string transliterate y/this/that/
10948 ($*@) sub prototypes sub foo ($)
10949 (stuff) sub attr parameters sub foo : attr(stuff)
10950 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
10952 In most of these cases (all but <>, patterns and transliterate)
10953 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
10954 calls scan_str(). s/// makes yylex() call scan_subst() which calls
10955 scan_str(). tr/// and y/// make yylex() call scan_trans() which
10958 It skips whitespace before the string starts, and treats the first
10959 character as the delimiter. If the delimiter is one of ([{< then
10960 the corresponding "close" character )]}> is used as the closing
10961 delimiter. It allows quoting of delimiters, and if the string has
10962 balanced delimiters ([{<>}]) it allows nesting.
10964 On success, the SV with the resulting string is put into lex_stuff or,
10965 if that is already non-NULL, into lex_repl. The second case occurs only
10966 when parsing the RHS of the special constructs s/// and tr/// (y///).
10967 For convenience, the terminating delimiter character is stuffed into
10972 Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
10976 SV *sv; /* scalar value: string */
10977 const char *tmps; /* temp string, used for delimiter matching */
10978 char *s = start; /* current position in the buffer */
10979 char term; /* terminating character */
10980 char *to; /* current position in the sv's data */
10981 I32 brackets = 1; /* bracket nesting level */
10982 bool d_is_utf8 = FALSE; /* is there any utf8 content? */
10983 IV termcode; /* terminating char. code */
10984 U8 termstr[UTF8_MAXBYTES+1]; /* terminating string */
10985 STRLEN termlen; /* length of terminating string */
10988 /* The delimiters that have a mirror-image closing one */
10989 const char * opening_delims = "([{<";
10990 const char * closing_delims = ")]}>";
10992 /* The only non-UTF character that isn't a stand alone grapheme is
10993 * white-space, hence can't be a delimiter. */
10994 const char * non_grapheme_msg = "Use of unassigned code point or"
10995 " non-standalone grapheme for a delimiter"
10997 PERL_ARGS_ASSERT_SCAN_STR;
10999 /* skip space before the delimiter */
11004 /* mark where we are, in case we need to report errors */
11007 /* after skipping whitespace, the next character is the terminator */
11009 if (!UTF || UTF8_IS_INVARIANT(term)) {
11010 termcode = termstr[0] = term;
11014 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
11015 if (UTF && UNLIKELY(! _is_grapheme((U8 *) start,
11020 yyerror(non_grapheme_msg);
11023 Copy(s, termstr, termlen, U8);
11026 /* mark where we are */
11027 PL_multi_start = CopLINE(PL_curcop);
11028 PL_multi_open = termcode;
11029 herelines = PL_parser->herelines;
11031 /* If the delimiter has a mirror-image closing one, get it */
11032 if (term && (tmps = strchr(opening_delims, term))) {
11033 termcode = termstr[0] = term = closing_delims[tmps - opening_delims];
11036 PL_multi_close = termcode;
11038 if (PL_multi_open == PL_multi_close) {
11039 keep_bracketed_quoted = FALSE;
11042 /* create a new SV to hold the contents. 79 is the SV's initial length.
11043 What a random number. */
11044 sv = newSV_type(SVt_PVIV);
11046 SvIV_set(sv, termcode);
11047 (void)SvPOK_only(sv); /* validate pointer */
11049 /* move past delimiter and try to read a complete string */
11051 sv_catpvn(sv, s, termlen);
11054 /* extend sv if need be */
11055 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11056 /* set 'to' to the next character in the sv's string */
11057 to = SvPVX(sv)+SvCUR(sv);
11059 /* if open delimiter is the close delimiter read unbridle */
11060 if (PL_multi_open == PL_multi_close) {
11061 for (; s < PL_bufend; s++,to++) {
11062 /* embedded newlines increment the current line number */
11063 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
11064 COPLINE_INC_WITH_HERELINES;
11065 /* handle quoted delimiters */
11066 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
11067 if (!keep_bracketed_quoted
11069 || (re_reparse && s[1] == '\\'))
11072 else /* any other quotes are simply copied straight through */
11075 /* terminate when run out of buffer (the for() condition), or
11076 have found the terminator */
11077 else if (*s == term) { /* First byte of terminator matches */
11078 if (termlen == 1) /* If is the only byte, are done */
11081 /* If the remainder of the terminator matches, also are
11082 * done, after checking that is a separate grapheme */
11083 if ( s + termlen <= PL_bufend
11084 && memEQ(s + 1, (char*)termstr + 1, termlen - 1))
11087 && UNLIKELY(! _is_grapheme((U8 *) start,
11092 yyerror(non_grapheme_msg);
11097 else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) {
11105 /* if the terminator isn't the same as the start character (e.g.,
11106 matched brackets), we have to allow more in the quoting, and
11107 be prepared for nested brackets.
11110 /* read until we run out of string, or we find the terminator */
11111 for (; s < PL_bufend; s++,to++) {
11112 /* embedded newlines increment the line count */
11113 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
11114 COPLINE_INC_WITH_HERELINES;
11115 /* backslashes can escape the open or closing characters */
11116 if (*s == '\\' && s+1 < PL_bufend) {
11117 if (!keep_bracketed_quoted
11118 && ( ((UV)s[1] == PL_multi_open)
11119 || ((UV)s[1] == PL_multi_close) ))
11126 /* allow nested opens and closes */
11127 else if ((UV)*s == PL_multi_close && --brackets <= 0)
11129 else if ((UV)*s == PL_multi_open)
11131 else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11136 /* terminate the copied string and update the sv's end-of-string */
11138 SvCUR_set(sv, to - SvPVX_const(sv));
11141 * this next chunk reads more into the buffer if we're not done yet
11145 break; /* handle case where we are done yet :-) */
11147 #ifndef PERL_STRICT_CR
11148 if (to - SvPVX_const(sv) >= 2) {
11149 if ( (to[-2] == '\r' && to[-1] == '\n')
11150 || (to[-2] == '\n' && to[-1] == '\r'))
11154 SvCUR_set(sv, to - SvPVX_const(sv));
11156 else if (to[-1] == '\r')
11159 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11163 /* if we're out of file, or a read fails, bail and reset the current
11164 line marker so we can report where the unterminated string began
11166 COPLINE_INC_WITH_HERELINES;
11167 PL_bufptr = PL_bufend;
11168 if (!lex_next_chunk(0)) {
11170 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11173 s = start = PL_bufptr;
11176 /* at this point, we have successfully read the delimited string */
11179 sv_catpvn(sv, s, termlen);
11185 PL_multi_end = CopLINE(PL_curcop);
11186 CopLINE_set(PL_curcop, PL_multi_start);
11187 PL_parser->herelines = herelines;
11189 /* if we allocated too much space, give some back */
11190 if (SvCUR(sv) + 5 < SvLEN(sv)) {
11191 SvLEN_set(sv, SvCUR(sv) + 1);
11192 SvPV_renew(sv, SvLEN(sv));
11195 /* decide whether this is the first or second quoted string we've read
11200 PL_parser->lex_sub_repl = sv;
11203 if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
11209 takes: pointer to position in buffer
11210 returns: pointer to new position in buffer
11211 side-effects: builds ops for the constant in pl_yylval.op
11213 Read a number in any of the formats that Perl accepts:
11215 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
11216 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
11217 0b[01](_?[01])* binary integers
11218 0[0-7](_?[0-7])* octal integers
11219 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* hexadecimal integers
11220 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+ hexadecimal floats
11222 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
11225 If it reads a number without a decimal point or an exponent, it will
11226 try converting the number to an integer and see if it can do so
11227 without loss of precision.
11231 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
11233 const char *s = start; /* current position in buffer */
11234 char *d; /* destination in temp buffer */
11235 char *e; /* end of temp buffer */
11236 NV nv; /* number read, as a double */
11237 SV *sv = NULL; /* place to put the converted number */
11238 bool floatit; /* boolean: int or float? */
11239 const char *lastub = NULL; /* position of last underbar */
11240 static const char* const number_too_long = "Number too long";
11241 bool warned_about_underscore = 0;
11242 I32 shift; /* shift per digit for hex/oct/bin, hoisted here for fp */
11243 #define WARN_ABOUT_UNDERSCORE() \
11245 if (!warned_about_underscore) { \
11246 warned_about_underscore = 1; \
11247 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \
11248 "Misplaced _ in number"); \
11251 /* Hexadecimal floating point.
11253 * In many places (where we have quads and NV is IEEE 754 double)
11254 * we can fit the mantissa bits of a NV into an unsigned quad.
11255 * (Note that UVs might not be quads even when we have quads.)
11256 * This will not work everywhere, though (either no quads, or
11257 * using long doubles), in which case we have to resort to NV,
11258 * which will probably mean horrible loss of precision due to
11259 * multiple fp operations. */
11260 bool hexfp = FALSE;
11261 int total_bits = 0;
11262 int significant_bits = 0;
11263 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
11264 # define HEXFP_UQUAD
11265 Uquad_t hexfp_uquad = 0;
11266 int hexfp_frac_bits = 0;
11271 NV hexfp_mult = 1.0;
11272 UV high_non_zero = 0; /* highest digit */
11273 int non_zero_integer_digits = 0;
11275 PERL_ARGS_ASSERT_SCAN_NUM;
11277 /* We use the first character to decide what type of number this is */
11281 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
11283 /* if it starts with a 0, it could be an octal number, a decimal in
11284 0.13 disguise, or a hexadecimal number, or a binary number. */
11288 u holds the "number so far"
11289 overflowed was the number more than we can hold?
11291 Shift is used when we add a digit. It also serves as an "are
11292 we in octal/hex/binary?" indicator to disallow hex characters
11293 when in octal mode.
11297 bool overflowed = FALSE;
11298 bool just_zero = TRUE; /* just plain 0 or binary number? */
11299 bool has_digs = FALSE;
11300 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11301 static const char* const bases[5] =
11302 { "", "binary", "", "octal", "hexadecimal" };
11303 static const char* const Bases[5] =
11304 { "", "Binary", "", "Octal", "Hexadecimal" };
11305 static const char* const maxima[5] =
11307 "0b11111111111111111111111111111111",
11311 const char *base, *Base, *max;
11313 /* check for hex */
11314 if (isALPHA_FOLD_EQ(s[1], 'x')) {
11318 } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
11323 /* check for a decimal in disguise */
11324 else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
11326 /* so it must be octal */
11333 WARN_ABOUT_UNDERSCORE();
11337 base = bases[shift];
11338 Base = Bases[shift];
11339 max = maxima[shift];
11341 /* read the rest of the number */
11343 /* x is used in the overflow test,
11344 b is the digit we're adding on. */
11349 /* if we don't mention it, we're done */
11353 /* _ are ignored -- but warned about if consecutive */
11355 if (lastub && s == lastub + 1)
11356 WARN_ABOUT_UNDERSCORE();
11360 /* 8 and 9 are not octal */
11361 case '8': case '9':
11363 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11367 case '2': case '3': case '4':
11368 case '5': case '6': case '7':
11370 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11373 case '0': case '1':
11374 b = *s++ & 15; /* ASCII digit -> value of digit */
11378 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11379 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11380 /* make sure they said 0x */
11383 b = (*s++ & 7) + 9;
11385 /* Prepare to put the digit we have onto the end
11386 of the number so far. We check for overflows.
11393 assert(shift >= 0);
11394 x = u << shift; /* make room for the digit */
11396 total_bits += shift;
11398 if ((x >> shift) != u
11399 && !(PL_hints & HINT_NEW_BINARY)) {
11402 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11403 "Integer overflow in %s number",
11406 u = x | b; /* add the digit to the end */
11409 n *= nvshift[shift];
11410 /* If an NV has not enough bits in its
11411 * mantissa to represent an UV this summing of
11412 * small low-order numbers is a waste of time
11413 * (because the NV cannot preserve the
11414 * low-order bits anyway): we could just
11415 * remember when did we overflow and in the
11416 * end just multiply n by the right
11421 if (high_non_zero == 0 && b > 0)
11425 non_zero_integer_digits++;
11427 /* this could be hexfp, but peek ahead
11428 * to avoid matching ".." */
11429 if (UNLIKELY(HEXFP_PEEK(s))) {
11437 /* if we get here, we had success: make a scalar value from
11442 /* final misplaced underbar check */
11444 WARN_ABOUT_UNDERSCORE();
11446 if (UNLIKELY(HEXFP_PEEK(s))) {
11447 /* Do sloppy (on the underbars) but quick detection
11448 * (and value construction) for hexfp, the decimal
11449 * detection will shortly be more thorough with the
11450 * underbar checks. */
11452 significant_bits = non_zero_integer_digits * shift;
11455 #else /* HEXFP_NV */
11458 /* Ignore the leading zero bits of
11459 * the high (first) non-zero digit. */
11460 if (high_non_zero) {
11461 if (high_non_zero < 0x8)
11462 significant_bits--;
11463 if (high_non_zero < 0x4)
11464 significant_bits--;
11465 if (high_non_zero < 0x2)
11466 significant_bits--;
11473 bool accumulate = TRUE;
11475 int lim = 1 << shift;
11476 for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) ||
11478 if (isXDIGIT(*h)) {
11479 significant_bits += shift;
11482 if (significant_bits < NV_MANT_DIG) {
11483 /* We are in the long "run" of xdigits,
11484 * accumulate the full four bits. */
11485 assert(shift >= 0);
11486 hexfp_uquad <<= shift;
11488 hexfp_frac_bits += shift;
11489 } else if (significant_bits - shift < NV_MANT_DIG) {
11490 /* We are at a hexdigit either at,
11491 * or straddling, the edge of mantissa.
11492 * We will try grabbing as many as
11493 * possible bits. */
11495 significant_bits - NV_MANT_DIG;
11499 hexfp_uquad <<= tail;
11500 assert((shift - tail) >= 0);
11501 hexfp_uquad |= b >> (shift - tail);
11502 hexfp_frac_bits += tail;
11504 /* Ignore the trailing zero bits
11505 * of the last non-zero xdigit.
11507 * The assumption here is that if
11508 * one has input of e.g. the xdigit
11509 * eight (0x8), there is only one
11510 * bit being input, not the full
11511 * four bits. Conversely, if one
11512 * specifies a zero xdigit, the
11513 * assumption is that one really
11514 * wants all those bits to be zero. */
11516 if ((b & 0x1) == 0x0) {
11517 significant_bits--;
11518 if ((b & 0x2) == 0x0) {
11519 significant_bits--;
11520 if ((b & 0x4) == 0x0) {
11521 significant_bits--;
11527 accumulate = FALSE;
11530 /* Keep skipping the xdigits, and
11531 * accumulating the significant bits,
11532 * but do not shift the uquad
11533 * (which would catastrophically drop
11534 * high-order bits) or accumulate the
11535 * xdigits anymore. */
11537 #else /* HEXFP_NV */
11539 nv_mult /= nvshift[shift];
11541 hexfp_nv += b * nv_mult;
11543 accumulate = FALSE;
11547 if (significant_bits >= NV_MANT_DIG)
11548 accumulate = FALSE;
11552 if ((total_bits > 0 || significant_bits > 0) &&
11553 isALPHA_FOLD_EQ(*h, 'p')) {
11554 bool negexp = FALSE;
11558 else if (*h == '-') {
11564 while (isDIGIT(*h) || *h == '_') {
11567 hexfp_exp += *h - '0';
11570 && -hexfp_exp < NV_MIN_EXP - 1) {
11571 /* NOTE: this means that the exponent
11572 * underflow warning happens for
11573 * the IEEE 754 subnormals (denormals),
11574 * because DBL_MIN_EXP etc are the lowest
11575 * possible binary (or, rather, DBL_RADIX-base)
11576 * exponent for normals, not subnormals.
11578 * This may or may not be a good thing. */
11579 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11580 "Hexadecimal float: exponent underflow");
11586 && hexfp_exp > NV_MAX_EXP - 1) {
11587 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11588 "Hexadecimal float: exponent overflow");
11596 hexfp_exp = -hexfp_exp;
11598 hexfp_exp -= hexfp_frac_bits;
11600 hexfp_mult = Perl_pow(2.0, hexfp_exp);
11607 if (shift != 3 && !has_digs) {
11608 /* 0x or 0b with no digits, treat it as an error.
11609 Originally this backed up the parse before the b or
11610 x, but that has the potential for silent changes in
11611 behaviour, like for: "0x.3" and "0x+$foo".
11614 char *oldbp = PL_bufptr;
11615 if (*d) ++d; /* so the user sees the bad non-digit */
11616 PL_bufptr = (char *)d; /* so yyerror reports the context */
11617 yyerror(Perl_form(aTHX_ "No digits found for %s literal",
11618 shift == 4 ? "hexadecimal" : "binary"));
11623 if (n > 4294967295.0)
11624 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11625 "%s number > %s non-portable",
11631 if (u > 0xffffffff)
11632 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11633 "%s number > %s non-portable",
11638 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
11639 sv = new_constant(start, s - start, "integer",
11640 sv, NULL, NULL, 0, NULL);
11641 else if (PL_hints & HINT_NEW_BINARY)
11642 sv = new_constant(start, s - start, "binary",
11643 sv, NULL, NULL, 0, NULL);
11648 handle decimal numbers.
11649 we're also sent here when we read a 0 as the first digit
11651 case '1': case '2': case '3': case '4': case '5':
11652 case '6': case '7': case '8': case '9': case '.':
11655 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
11673 NOT_REACHED; /* NOTREACHED */
11677 /* read next group of digits and _ and copy into d */
11680 || UNLIKELY(hexfp && isXDIGIT(*s)))
11682 /* skip underscores, checking for misplaced ones
11686 if (lastub && s == lastub + 1)
11687 WARN_ABOUT_UNDERSCORE();
11691 /* check for end of fixed-length buffer */
11693 Perl_croak(aTHX_ "%s", number_too_long);
11694 /* if we're ok, copy the character */
11699 /* final misplaced underbar check */
11700 if (lastub && s == lastub + 1)
11701 WARN_ABOUT_UNDERSCORE();
11703 /* read a decimal portion if there is one. avoid
11704 3..5 being interpreted as the number 3. followed
11707 if (*s == '.' && s[1] != '.') {
11712 WARN_ABOUT_UNDERSCORE();
11716 /* copy, ignoring underbars, until we run out of digits.
11720 || UNLIKELY(hexfp && isXDIGIT(*s));
11723 /* fixed length buffer check */
11725 Perl_croak(aTHX_ "%s", number_too_long);
11727 if (lastub && s == lastub + 1)
11728 WARN_ABOUT_UNDERSCORE();
11734 /* fractional part ending in underbar? */
11736 WARN_ABOUT_UNDERSCORE();
11737 if (*s == '.' && isDIGIT(s[1])) {
11738 /* oops, it's really a v-string, but without the "v" */
11744 /* read exponent part, if present */
11745 if ((isALPHA_FOLD_EQ(*s, 'e')
11746 || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
11747 && strchr("+-0123456789_", s[1]))
11749 int exp_digits = 0;
11750 const char *save_s = s;
11753 /* regardless of whether user said 3E5 or 3e5, use lower 'e',
11754 ditto for p (hexfloats) */
11755 if ((isALPHA_FOLD_EQ(*s, 'e'))) {
11756 /* At least some Mach atof()s don't grok 'E' */
11759 else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
11766 /* stray preinitial _ */
11768 WARN_ABOUT_UNDERSCORE();
11772 /* allow positive or negative exponent */
11773 if (*s == '+' || *s == '-')
11776 /* stray initial _ */
11778 WARN_ABOUT_UNDERSCORE();
11782 /* read digits of exponent */
11783 while (isDIGIT(*s) || *s == '_') {
11787 Perl_croak(aTHX_ "%s", number_too_long);
11791 if (((lastub && s == lastub + 1)
11792 || (!isDIGIT(s[1]) && s[1] != '_')))
11793 WARN_ABOUT_UNDERSCORE();
11799 /* no exponent digits, the [eEpP] could be for something else,
11800 * though in practice we don't get here for p since that's preparsed
11801 * earlier, and results in only the 0xX being consumed, so behave similarly
11802 * for decimal floats and consume only the D.DD, leaving the [eE] to the
11815 We try to do an integer conversion first if no characters
11816 indicating "float" have been found.
11821 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
11823 if (flags == IS_NUMBER_IN_UV) {
11825 sv = newSViv(uv); /* Prefer IVs over UVs. */
11828 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11829 if (uv <= (UV) IV_MIN)
11830 sv = newSViv(-(IV)uv);
11837 /* terminate the string */
11839 if (UNLIKELY(hexfp)) {
11840 # ifdef NV_MANT_DIG
11841 if (significant_bits > NV_MANT_DIG)
11842 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11843 "Hexadecimal float: mantissa overflow");
11846 nv = hexfp_uquad * hexfp_mult;
11847 #else /* HEXFP_NV */
11848 nv = hexfp_nv * hexfp_mult;
11851 nv = Atof(PL_tokenbuf);
11857 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
11858 const char *const key = floatit ? "float" : "integer";
11859 const STRLEN keylen = floatit ? 5 : 7;
11860 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
11861 key, keylen, sv, NULL, NULL, 0, NULL);
11865 /* if it starts with a v, it could be a v-string */
11868 sv = newSV(5); /* preallocate storage space */
11869 ENTER_with_name("scan_vstring");
11871 s = scan_vstring(s, PL_bufend, sv);
11872 SvREFCNT_inc_simple_void_NN(sv);
11873 LEAVE_with_name("scan_vstring");
11877 /* make the op for the constant and return */
11880 lvalp->opval = newSVOP(OP_CONST, 0, sv);
11882 lvalp->opval = NULL;
11888 S_scan_formline(pTHX_ char *s)
11890 SV * const stuff = newSVpvs("");
11891 bool needargs = FALSE;
11892 bool eofmt = FALSE;
11894 PERL_ARGS_ASSERT_SCAN_FORMLINE;
11896 while (!needargs) {
11900 #ifdef PERL_STRICT_CR
11901 while (SPACE_OR_TAB(*t))
11904 while (SPACE_OR_TAB(*t) || *t == '\r')
11907 if (*t == '\n' || t == PL_bufend) {
11912 eol = (char *) memchr(s,'\n',PL_bufend-s);
11917 for (t = s; t < eol; t++) {
11918 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
11920 goto enough; /* ~~ must be first line in formline */
11922 if (*t == '@' || *t == '^')
11926 sv_catpvn(stuff, s, eol-s);
11927 #ifndef PERL_STRICT_CR
11928 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
11929 char *end = SvPVX(stuff) + SvCUR(stuff);
11932 SvCUR_set(stuff, SvCUR(stuff) - 1);
11940 if ((PL_rsfp || PL_parser->filtered)
11941 && PL_parser->form_lex_state == LEX_NORMAL) {
11943 PL_bufptr = PL_bufend;
11944 COPLINE_INC_WITH_HERELINES;
11945 got_some = lex_next_chunk(0);
11946 CopLINE_dec(PL_curcop);
11951 incline(s, PL_bufend);
11954 if (!SvCUR(stuff) || needargs)
11955 PL_lex_state = PL_parser->form_lex_state;
11956 if (SvCUR(stuff)) {
11957 PL_expect = XSTATE;
11959 const char *s2 = s;
11960 while (isSPACE(*s2) && *s2 != '\n')
11963 PL_expect = XTERMBLOCK;
11964 NEXTVAL_NEXTTOKE.ival = 0;
11967 NEXTVAL_NEXTTOKE.ival = 0;
11968 force_next(FORMLBRACK);
11971 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
11974 NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff);
11978 SvREFCNT_dec(stuff);
11980 PL_lex_formbrack = 0;
11986 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
11988 const I32 oldsavestack_ix = PL_savestack_ix;
11989 CV* const outsidecv = PL_compcv;
11991 SAVEI32(PL_subline);
11992 save_item(PL_subname);
11993 SAVESPTR(PL_compcv);
11995 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
11996 CvFLAGS(PL_compcv) |= flags;
11998 PL_subline = CopLINE(PL_curcop);
11999 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12000 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
12001 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12002 if (outsidecv && CvPADLIST(outsidecv))
12003 CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
12005 return oldsavestack_ix;
12009 /* Do extra initialisation of a CV (typically one just created by
12010 * start_subparse()) if that CV is for a named sub
12014 Perl_init_named_cv(pTHX_ CV *cv, OP *nameop)
12016 PERL_ARGS_ASSERT_INIT_NAMED_CV;
12018 if (nameop->op_type == OP_CONST) {
12019 const char *const name = SvPV_nolen_const(((SVOP*)nameop)->op_sv);
12020 if ( strEQ(name, "BEGIN")
12021 || strEQ(name, "END")
12022 || strEQ(name, "INIT")
12023 || strEQ(name, "CHECK")
12024 || strEQ(name, "UNITCHECK")
12029 /* State subs inside anonymous subs need to be
12030 clonable themselves. */
12031 if ( CvANON(CvOUTSIDE(cv))
12032 || CvCLONE(CvOUTSIDE(cv))
12033 || !PadnameIsSTATE(PadlistNAMESARRAY(CvPADLIST(
12035 ))[nameop->op_targ])
12042 S_yywarn(pTHX_ const char *const s, U32 flags)
12044 PERL_ARGS_ASSERT_YYWARN;
12046 PL_in_eval |= EVAL_WARNONLY;
12047 yyerror_pv(s, flags);
12052 Perl_abort_execution(pTHX_ const char * const msg, const char * const name)
12054 PERL_ARGS_ASSERT_ABORT_EXECUTION;
12057 Perl_croak(aTHX_ "%s%s had compilation errors.\n", msg, name);
12060 "%sExecution of %s aborted due to compilation errors.\n", msg, name);
12062 NOT_REACHED; /* NOTREACHED */
12068 /* Called, after at least one error has been found, to abort the parse now,
12069 * instead of trying to forge ahead */
12071 yyerror_pvn(NULL, 0, 0);
12075 Perl_yyerror(pTHX_ const char *const s)
12077 PERL_ARGS_ASSERT_YYERROR;
12078 return yyerror_pvn(s, strlen(s), 0);
12082 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
12084 PERL_ARGS_ASSERT_YYERROR_PV;
12085 return yyerror_pvn(s, strlen(s), flags);
12089 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
12091 const char *context = NULL;
12094 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
12095 int yychar = PL_parser->yychar;
12097 /* Output error message 's' with length 'len'. 'flags' are SV flags that
12098 * apply. If the number of errors found is large enough, it abandons
12099 * parsing. If 's' is NULL, there is no message, and it abandons
12100 * processing unconditionally */
12103 if (!yychar || (yychar == ';' && !PL_rsfp))
12104 sv_catpvs(where_sv, "at EOF");
12105 else if ( PL_oldoldbufptr
12106 && PL_bufptr > PL_oldoldbufptr
12107 && PL_bufptr - PL_oldoldbufptr < 200
12108 && PL_oldoldbufptr != PL_oldbufptr
12109 && PL_oldbufptr != PL_bufptr)
12113 The code below is removed for NetWare because it
12114 abends/crashes on NetWare when the script has error such as
12115 not having the closing quotes like:
12116 if ($var eq "value)
12117 Checking of white spaces is anyway done in NetWare code.
12120 while (isSPACE(*PL_oldoldbufptr))
12123 context = PL_oldoldbufptr;
12124 contlen = PL_bufptr - PL_oldoldbufptr;
12126 else if ( PL_oldbufptr
12127 && PL_bufptr > PL_oldbufptr
12128 && PL_bufptr - PL_oldbufptr < 200
12129 && PL_oldbufptr != PL_bufptr) {
12132 The code below is removed for NetWare because it
12133 abends/crashes on NetWare when the script has error such as
12134 not having the closing quotes like:
12135 if ($var eq "value)
12136 Checking of white spaces is anyway done in NetWare code.
12139 while (isSPACE(*PL_oldbufptr))
12142 context = PL_oldbufptr;
12143 contlen = PL_bufptr - PL_oldbufptr;
12145 else if (yychar > 255)
12146 sv_catpvs(where_sv, "next token ???");
12147 else if (yychar == YYEMPTY) {
12148 if (PL_lex_state == LEX_NORMAL)
12149 sv_catpvs(where_sv, "at end of line");
12150 else if (PL_lex_inpat)
12151 sv_catpvs(where_sv, "within pattern");
12153 sv_catpvs(where_sv, "within string");
12156 sv_catpvs(where_sv, "next char ");
12158 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12159 else if (isPRINT_LC(yychar)) {
12160 const char string = yychar;
12161 sv_catpvn(where_sv, &string, 1);
12164 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12166 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
12167 Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ",
12168 OutCopFILE(PL_curcop),
12169 (IV)(PL_parser->preambling == NOLINE
12170 ? CopLINE(PL_curcop)
12171 : PL_parser->preambling));
12173 Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
12174 UTF8fARG(UTF, contlen, context));
12176 Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
12177 if ( PL_multi_start < PL_multi_end
12178 && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1)
12180 Perl_sv_catpvf(aTHX_ msg,
12181 " (Might be a runaway multi-line %c%c string starting on"
12182 " line %" IVdf ")\n",
12183 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
12186 if (PL_in_eval & EVAL_WARNONLY) {
12187 PL_in_eval &= ~EVAL_WARNONLY;
12188 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
12194 if (s == NULL || PL_error_count >= 10) {
12195 const char * msg = "";
12196 const char * const name = OutCopFILE(PL_curcop);
12199 SV * errsv = ERRSV;
12200 if (SvCUR(errsv)) {
12201 msg = Perl_form(aTHX_ "%" SVf, SVfARG(errsv));
12206 abort_execution(msg, name);
12209 Perl_croak(aTHX_ "%s%s has too many errors.\n", msg, name);
12213 PL_in_my_stash = NULL;
12218 S_swallow_bom(pTHX_ U8 *s)
12220 const STRLEN slen = SvCUR(PL_linestr);
12222 PERL_ARGS_ASSERT_SWALLOW_BOM;
12226 if (s[1] == 0xFE) {
12227 /* UTF-16 little-endian? (or UTF-32LE?) */
12228 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
12229 /* diag_listed_as: Unsupported script encoding %s */
12230 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
12231 #ifndef PERL_NO_UTF16_FILTER
12233 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
12236 if (PL_bufend > (char*)s) {
12237 s = add_utf16_textfilter(s, TRUE);
12240 /* diag_listed_as: Unsupported script encoding %s */
12241 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
12246 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
12247 #ifndef PERL_NO_UTF16_FILTER
12249 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12252 if (PL_bufend > (char *)s) {
12253 s = add_utf16_textfilter(s, FALSE);
12256 /* diag_listed_as: Unsupported script encoding %s */
12257 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
12261 case BOM_UTF8_FIRST_BYTE: {
12262 if (memBEGINs(s+1, slen - 1, BOM_UTF8_TAIL)) {
12264 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12266 s += sizeof(BOM_UTF8) - 1; /* UTF-8 */
12273 if (s[2] == 0xFE && s[3] == 0xFF) {
12274 /* UTF-32 big-endian */
12275 /* diag_listed_as: Unsupported script encoding %s */
12276 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
12279 else if (s[2] == 0 && s[3] != 0) {
12282 * are a good indicator of UTF-16BE. */
12283 #ifndef PERL_NO_UTF16_FILTER
12285 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
12287 s = add_utf16_textfilter(s, FALSE);
12289 /* diag_listed_as: Unsupported script encoding %s */
12290 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
12297 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12300 * are a good indicator of UTF-16LE. */
12301 #ifndef PERL_NO_UTF16_FILTER
12303 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12305 s = add_utf16_textfilter(s, TRUE);
12307 /* diag_listed_as: Unsupported script encoding %s */
12308 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
12316 #ifndef PERL_NO_UTF16_FILTER
12318 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12320 SV *const filter = FILTER_DATA(idx);
12321 /* We re-use this each time round, throwing the contents away before we
12323 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
12324 SV *const utf8_buffer = filter;
12325 IV status = IoPAGE(filter);
12326 const bool reverse = cBOOL(IoLINES(filter));
12329 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
12331 /* As we're automatically added, at the lowest level, and hence only called
12332 from this file, we can be sure that we're not called in block mode. Hence
12333 don't bother writing code to deal with block mode. */
12335 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
12338 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
12340 DEBUG_P(PerlIO_printf(Perl_debug_log,
12341 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
12342 FPTR2DPTR(void *, S_utf16_textfilter),
12343 reverse ? 'l' : 'b', idx, maxlen, status,
12344 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12351 /* First, look in our buffer of existing UTF-8 data: */
12352 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
12356 } else if (status == 0) {
12358 IoPAGE(filter) = 0;
12359 nl = SvEND(utf8_buffer);
12362 STRLEN got = nl - SvPVX(utf8_buffer);
12363 /* Did we have anything to append? */
12365 sv_catpvn(sv, SvPVX(utf8_buffer), got);
12366 /* Everything else in this code works just fine if SVp_POK isn't
12367 set. This, however, needs it, and we need it to work, else
12368 we loop infinitely because the buffer is never consumed. */
12369 sv_chop(utf8_buffer, nl);
12373 /* OK, not a complete line there, so need to read some more UTF-16.
12374 Read an extra octect if the buffer currently has an odd number. */
12378 if (SvCUR(utf16_buffer) >= 2) {
12379 /* Location of the high octet of the last complete code point.
12380 Gosh, UTF-16 is a pain. All the benefits of variable length,
12381 *coupled* with all the benefits of partial reads and
12383 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
12384 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
12386 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
12390 /* We have the first half of a surrogate. Read more. */
12391 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
12394 status = FILTER_READ(idx + 1, utf16_buffer,
12395 160 + (SvCUR(utf16_buffer) & 1));
12396 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
12397 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
12400 IoPAGE(filter) = status;
12405 /* 'chars' isn't quite the right name, as code points above 0xFFFF
12406 * require 4 bytes per char */
12407 chars = SvCUR(utf16_buffer) >> 1;
12408 have = SvCUR(utf8_buffer);
12410 /* Assume the worst case size as noted by the functions: twice the
12411 * number of input bytes */
12412 SvGROW(utf8_buffer, have + chars * 4 + 1);
12415 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
12416 (U8*)SvPVX_const(utf8_buffer) + have,
12417 chars * 2, &newlen);
12419 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
12420 (U8*)SvPVX_const(utf8_buffer) + have,
12421 chars * 2, &newlen);
12423 SvCUR_set(utf8_buffer, have + newlen);
12426 /* No need to keep this SV "well-formed" with a '\0' after the end, as
12427 it's private to us, and utf16_to_utf8{,reversed} take a
12428 (pointer,length) pair, rather than a NUL-terminated string. */
12429 if(SvCUR(utf16_buffer) & 1) {
12430 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
12431 SvCUR_set(utf16_buffer, 1);
12433 SvCUR_set(utf16_buffer, 0);
12436 DEBUG_P(PerlIO_printf(Perl_debug_log,
12437 "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
12439 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12440 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
12445 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
12447 SV *filter = filter_add(S_utf16_textfilter, NULL);
12449 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
12451 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
12453 IoLINES(filter) = reversed;
12454 IoPAGE(filter) = 1; /* Not EOF */
12456 /* Sadly, we have to return a valid pointer, come what may, so we have to
12457 ignore any error return from this. */
12458 SvCUR_set(PL_linestr, 0);
12459 if (FILTER_READ(0, PL_linestr, 0)) {
12460 SvUTF8_on(PL_linestr);
12462 SvUTF8_on(PL_linestr);
12464 PL_bufend = SvEND(PL_linestr);
12465 return (U8*)SvPVX(PL_linestr);
12470 Returns a pointer to the next character after the parsed
12471 vstring, as well as updating the passed in sv.
12473 Function must be called like
12475 sv = sv_2mortal(newSV(5));
12476 s = scan_vstring(s,e,sv);
12478 where s and e are the start and end of the string.
12479 The sv should already be large enough to store the vstring
12480 passed in, for performance reasons.
12482 This function may croak if fatal warnings are enabled in the
12483 calling scope, hence the sv_2mortal in the example (to prevent
12484 a leak). Make sure to do SvREFCNT_inc afterwards if you use
12490 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
12492 const char *pos = s;
12493 const char *start = s;
12495 PERL_ARGS_ASSERT_SCAN_VSTRING;
12497 if (*pos == 'v') pos++; /* get past 'v' */
12498 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12500 if ( *pos != '.') {
12501 /* this may not be a v-string if followed by => */
12502 const char *next = pos;
12503 while (next < e && isSPACE(*next))
12505 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
12506 /* return string not v-string */
12507 sv_setpvn(sv,(char *)s,pos-s);
12508 return (char *)pos;
12512 if (!isALPHA(*pos)) {
12513 U8 tmpbuf[UTF8_MAXBYTES+1];
12516 s++; /* get past 'v' */
12521 /* this is atoi() that tolerates underscores */
12524 const char *end = pos;
12526 while (--end >= s) {
12528 const UV orev = rev;
12529 rev += (*end - '0') * mult;
12532 /* diag_listed_as: Integer overflow in %s number */
12533 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12534 "Integer overflow in decimal number");
12538 /* Append native character for the rev point */
12539 tmpend = uvchr_to_utf8(tmpbuf, rev);
12540 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12541 if (!UVCHR_IS_INVARIANT(rev))
12543 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
12549 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12553 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12560 Perl_keyword_plugin_standard(pTHX_
12561 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
12563 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
12564 PERL_UNUSED_CONTEXT;
12565 PERL_UNUSED_ARG(keyword_ptr);
12566 PERL_UNUSED_ARG(keyword_len);
12567 PERL_UNUSED_ARG(op_ptr);
12568 return KEYWORD_PLUGIN_DECLINE;
12572 =for apidoc wrap_keyword_plugin
12574 Puts a C function into the chain of keyword plugins. This is the
12575 preferred way to manipulate the L</PL_keyword_plugin> variable.
12576 C<new_plugin> is a pointer to the C function that is to be added to the
12577 keyword plugin chain, and C<old_plugin_p> points to the storage location
12578 where a pointer to the next function in the chain will be stored. The
12579 value of C<new_plugin> is written into the L</PL_keyword_plugin> variable,
12580 while the value previously stored there is written to C<*old_plugin_p>.
12582 L</PL_keyword_plugin> is global to an entire process, and a module wishing
12583 to hook keyword parsing may find itself invoked more than once per
12584 process, typically in different threads. To handle that situation, this
12585 function is idempotent. The location C<*old_plugin_p> must initially
12586 (once per process) contain a null pointer. A C variable of static
12587 duration (declared at file scope, typically also marked C<static> to give
12588 it internal linkage) will be implicitly initialised appropriately, if it
12589 does not have an explicit initialiser. This function will only actually
12590 modify the plugin chain if it finds C<*old_plugin_p> to be null. This
12591 function is also thread safe on the small scale. It uses appropriate
12592 locking to avoid race conditions in accessing L</PL_keyword_plugin>.
12594 When this function is called, the function referenced by C<new_plugin>
12595 must be ready to be called, except for C<*old_plugin_p> being unfilled.
12596 In a threading situation, C<new_plugin> may be called immediately, even
12597 before this function has returned. C<*old_plugin_p> will always be
12598 appropriately set before C<new_plugin> is called. If C<new_plugin>
12599 decides not to do anything special with the identifier that it is given
12600 (which is the usual case for most calls to a keyword plugin), it must
12601 chain the plugin function referenced by C<*old_plugin_p>.
12603 Taken all together, XS code to install a keyword plugin should typically
12604 look something like this:
12606 static Perl_keyword_plugin_t next_keyword_plugin;
12607 static OP *my_keyword_plugin(pTHX_
12608 char *keyword_plugin, STRLEN keyword_len, OP **op_ptr)
12610 if (memEQs(keyword_ptr, keyword_len,
12611 "my_new_keyword")) {
12614 return next_keyword_plugin(aTHX_
12615 keyword_ptr, keyword_len, op_ptr);
12619 wrap_keyword_plugin(my_keyword_plugin,
12620 &next_keyword_plugin);
12622 Direct access to L</PL_keyword_plugin> should be avoided.
12628 Perl_wrap_keyword_plugin(pTHX_
12629 Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p)
12633 PERL_UNUSED_CONTEXT;
12634 PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN;
12635 if (*old_plugin_p) return;
12636 KEYWORD_PLUGIN_MUTEX_LOCK;
12637 if (!*old_plugin_p) {
12638 *old_plugin_p = PL_keyword_plugin;
12639 PL_keyword_plugin = new_plugin;
12641 KEYWORD_PLUGIN_MUTEX_UNLOCK;
12644 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
12646 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
12648 SAVEI32(PL_lex_brackets);
12649 if (PL_lex_brackets > 100)
12650 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
12651 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
12652 SAVEI32(PL_lex_allbrackets);
12653 PL_lex_allbrackets = 0;
12654 SAVEI8(PL_lex_fakeeof);
12655 PL_lex_fakeeof = (U8)fakeeof;
12656 if(yyparse(gramtype) && !PL_parser->error_count)
12657 qerror(Perl_mess(aTHX_ "Parse error"));
12660 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
12662 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
12666 SAVEVPTR(PL_eval_root);
12667 PL_eval_root = NULL;
12668 parse_recdescent(gramtype, fakeeof);
12674 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
12676 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
12679 if (flags & ~PARSE_OPTIONAL)
12680 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
12681 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
12682 if (!exprop && !(flags & PARSE_OPTIONAL)) {
12683 if (!PL_parser->error_count)
12684 qerror(Perl_mess(aTHX_ "Parse error"));
12685 exprop = newOP(OP_NULL, 0);
12691 =for apidoc parse_arithexpr
12693 Parse a Perl arithmetic expression. This may contain operators of precedence
12694 down to the bit shift operators. The expression must be followed (and thus
12695 terminated) either by a comparison or lower-precedence operator or by
12696 something that would normally terminate an expression such as semicolon.
12697 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12698 otherwise it is mandatory. It is up to the caller to ensure that the
12699 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12700 the source of the code to be parsed and the lexical context for the
12703 The op tree representing the expression is returned. If an optional
12704 expression is absent, a null pointer is returned, otherwise the pointer
12707 If an error occurs in parsing or compilation, in most cases a valid op
12708 tree is returned anyway. The error is reflected in the parser state,
12709 normally resulting in a single exception at the top level of parsing
12710 which covers all the compilation errors that occurred. Some compilation
12711 errors, however, will throw an exception immediately.
12717 Perl_parse_arithexpr(pTHX_ U32 flags)
12719 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
12723 =for apidoc parse_termexpr
12725 Parse a Perl term expression. This may contain operators of precedence
12726 down to the assignment operators. The expression must be followed (and thus
12727 terminated) either by a comma or lower-precedence operator or by
12728 something that would normally terminate an expression such as semicolon.
12729 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12730 otherwise it is mandatory. It is up to the caller to ensure that the
12731 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12732 the source of the code to be parsed and the lexical context for the
12735 The op tree representing the expression is returned. If an optional
12736 expression is absent, a null pointer is returned, otherwise the pointer
12739 If an error occurs in parsing or compilation, in most cases a valid op
12740 tree is returned anyway. The error is reflected in the parser state,
12741 normally resulting in a single exception at the top level of parsing
12742 which covers all the compilation errors that occurred. Some compilation
12743 errors, however, will throw an exception immediately.
12749 Perl_parse_termexpr(pTHX_ U32 flags)
12751 return parse_expr(LEX_FAKEEOF_COMMA, flags);
12755 =for apidoc parse_listexpr
12757 Parse a Perl list expression. This may contain operators of precedence
12758 down to the comma operator. The expression must be followed (and thus
12759 terminated) either by a low-precedence logic operator such as C<or> or by
12760 something that would normally terminate an expression such as semicolon.
12761 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12762 otherwise it is mandatory. It is up to the caller to ensure that the
12763 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12764 the source of the code to be parsed and the lexical context for the
12767 The op tree representing the expression is returned. If an optional
12768 expression is absent, a null pointer is returned, otherwise the pointer
12771 If an error occurs in parsing or compilation, in most cases a valid op
12772 tree is returned anyway. The error is reflected in the parser state,
12773 normally resulting in a single exception at the top level of parsing
12774 which covers all the compilation errors that occurred. Some compilation
12775 errors, however, will throw an exception immediately.
12781 Perl_parse_listexpr(pTHX_ U32 flags)
12783 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
12787 =for apidoc parse_fullexpr
12789 Parse a single complete Perl expression. This allows the full
12790 expression grammar, including the lowest-precedence operators such
12791 as C<or>. The expression must be followed (and thus terminated) by a
12792 token that an expression would normally be terminated by: end-of-file,
12793 closing bracketing punctuation, semicolon, or one of the keywords that
12794 signals a postfix expression-statement modifier. If C<flags> has the
12795 C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
12796 mandatory. It is up to the caller to ensure that the dynamic parser
12797 state (L</PL_parser> et al) is correctly set to reflect the source of
12798 the code to be parsed and the lexical context for the expression.
12800 The op tree representing the expression is returned. If an optional
12801 expression is absent, a null pointer is returned, otherwise the pointer
12804 If an error occurs in parsing or compilation, in most cases a valid op
12805 tree is returned anyway. The error is reflected in the parser state,
12806 normally resulting in a single exception at the top level of parsing
12807 which covers all the compilation errors that occurred. Some compilation
12808 errors, however, will throw an exception immediately.
12814 Perl_parse_fullexpr(pTHX_ U32 flags)
12816 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
12820 =for apidoc parse_block
12822 Parse a single complete Perl code block. This consists of an opening
12823 brace, a sequence of statements, and a closing brace. The block
12824 constitutes a lexical scope, so C<my> variables and various compile-time
12825 effects can be contained within it. It is up to the caller to ensure
12826 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12827 reflect the source of the code to be parsed and the lexical context for
12830 The op tree representing the code block is returned. This is always a
12831 real op, never a null pointer. It will normally be a C<lineseq> list,
12832 including C<nextstate> or equivalent ops. No ops to construct any kind
12833 of runtime scope are included by virtue of it being a block.
12835 If an error occurs in parsing or compilation, in most cases a valid op
12836 tree (most likely null) is returned anyway. The error is reflected in
12837 the parser state, normally resulting in a single exception at the top
12838 level of parsing which covers all the compilation errors that occurred.
12839 Some compilation errors, however, will throw an exception immediately.
12841 The C<flags> parameter is reserved for future use, and must always
12848 Perl_parse_block(pTHX_ U32 flags)
12851 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
12852 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
12856 =for apidoc parse_barestmt
12858 Parse a single unadorned Perl statement. This may be a normal imperative
12859 statement or a declaration that has compile-time effect. It does not
12860 include any label or other affixture. It is up to the caller to ensure
12861 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12862 reflect the source of the code to be parsed and the lexical context for
12865 The op tree representing the statement is returned. This may be a
12866 null pointer if the statement is null, for example if it was actually
12867 a subroutine definition (which has compile-time side effects). If not
12868 null, it will be ops directly implementing the statement, suitable to
12869 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
12870 equivalent op (except for those embedded in a scope contained entirely
12871 within the statement).
12873 If an error occurs in parsing or compilation, in most cases a valid op
12874 tree (most likely null) is returned anyway. The error is reflected in
12875 the parser state, normally resulting in a single exception at the top
12876 level of parsing which covers all the compilation errors that occurred.
12877 Some compilation errors, however, will throw an exception immediately.
12879 The C<flags> parameter is reserved for future use, and must always
12886 Perl_parse_barestmt(pTHX_ U32 flags)
12889 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
12890 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
12894 =for apidoc parse_label
12896 Parse a single label, possibly optional, of the type that may prefix a
12897 Perl statement. It is up to the caller to ensure that the dynamic parser
12898 state (L</PL_parser> et al) is correctly set to reflect the source of
12899 the code to be parsed. If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
12900 label is optional, otherwise it is mandatory.
12902 The name of the label is returned in the form of a fresh scalar. If an
12903 optional label is absent, a null pointer is returned.
12905 If an error occurs in parsing, which can only occur if the label is
12906 mandatory, a valid label is returned anyway. The error is reflected in
12907 the parser state, normally resulting in a single exception at the top
12908 level of parsing which covers all the compilation errors that occurred.
12914 Perl_parse_label(pTHX_ U32 flags)
12916 if (flags & ~PARSE_OPTIONAL)
12917 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
12919 PL_parser->yychar = yylex();
12920 if (PL_parser->yychar == LABEL) {
12921 SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv;
12922 PL_parser->yychar = YYEMPTY;
12923 cSVOPx(pl_yylval.opval)->op_sv = NULL;
12924 op_free(pl_yylval.opval);
12932 STRLEN wlen, bufptr_pos;
12935 if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
12937 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
12938 if (word_takes_any_delimiter(s, wlen))
12940 bufptr_pos = s - SvPVX(PL_linestr);
12942 lex_read_space(LEX_KEEP_PREVIOUS);
12944 s = SvPVX(PL_linestr) + bufptr_pos;
12945 if (t[0] == ':' && t[1] != ':') {
12946 PL_oldoldbufptr = PL_oldbufptr;
12949 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
12953 if (flags & PARSE_OPTIONAL) {
12956 qerror(Perl_mess(aTHX_ "Parse error"));
12957 return newSVpvs("x");
12964 =for apidoc parse_fullstmt
12966 Parse a single complete Perl statement. This may be a normal imperative
12967 statement or a declaration that has compile-time effect, and may include
12968 optional labels. It is up to the caller to ensure that the dynamic
12969 parser state (L</PL_parser> et al) is correctly set to reflect the source
12970 of the code to be parsed and the lexical context for the statement.
12972 The op tree representing the statement is returned. This may be a
12973 null pointer if the statement is null, for example if it was actually
12974 a subroutine definition (which has compile-time side effects). If not
12975 null, it will be the result of a L</newSTATEOP> call, normally including
12976 a C<nextstate> or equivalent op.
12978 If an error occurs in parsing or compilation, in most cases a valid op
12979 tree (most likely null) is returned anyway. The error is reflected in
12980 the parser state, normally resulting in a single exception at the top
12981 level of parsing which covers all the compilation errors that occurred.
12982 Some compilation errors, however, will throw an exception immediately.
12984 The C<flags> parameter is reserved for future use, and must always
12991 Perl_parse_fullstmt(pTHX_ U32 flags)
12994 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
12995 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
12999 =for apidoc parse_stmtseq
13001 Parse a sequence of zero or more Perl statements. These may be normal
13002 imperative statements, including optional labels, or declarations
13003 that have compile-time effect, or any mixture thereof. The statement
13004 sequence ends when a closing brace or end-of-file is encountered in a
13005 place where a new statement could have validly started. It is up to
13006 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
13007 is correctly set to reflect the source of the code to be parsed and the
13008 lexical context for the statements.
13010 The op tree representing the statement sequence is returned. This may
13011 be a null pointer if the statements were all null, for example if there
13012 were no statements or if there were only subroutine definitions (which
13013 have compile-time side effects). If not null, it will be a C<lineseq>
13014 list, normally including C<nextstate> or equivalent ops.
13016 If an error occurs in parsing or compilation, in most cases a valid op
13017 tree is returned anyway. The error is reflected in the parser state,
13018 normally resulting in a single exception at the top level of parsing
13019 which covers all the compilation errors that occurred. Some compilation
13020 errors, however, will throw an exception immediately.
13022 The C<flags> parameter is reserved for future use, and must always
13029 Perl_parse_stmtseq(pTHX_ U32 flags)
13034 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
13035 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
13036 c = lex_peek_unichar(0);
13037 if (c != -1 && c != /*{*/'}')
13038 qerror(Perl_mess(aTHX_ "Parse error"));
13043 =for apidoc parse_subsignature
13045 Parse a subroutine signature declaration. This is the contents of the
13046 parentheses following a named or anonymous subroutine declaration when the
13047 C<signatures> feature is enabled. Note that this function neither expects
13048 nor consumes the opening and closing parentheses around the signature; it
13049 is the caller's job to handle these.
13051 This function must only be called during parsing of a subroutine; after
13052 L</start_subparse> has been called. It might allocate lexical variables on
13053 the pad for the current subroutine.
13055 The op tree to unpack the arguments from the stack at runtime is returned.
13056 This op tree should appear at the beginning of the compiled function. The
13057 caller may wish to use L</op_append_list> to build their function body
13058 after it, or splice it together with the body before calling L</newATTRSUB>.
13060 The C<flags> parameter is reserved for future use, and must always
13067 Perl_parse_subsignature(pTHX_ U32 flags)
13070 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_subsignature");
13071 return parse_recdescent_for_op(GRAMSUBSIGNATURE, LEX_FAKEEOF_NONEXPR);
13075 * ex: set ts=8 sts=4 sw=4 et: