3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
12 * 'It all comes from here, the stench and the peril.' --Frodo
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
18 * This file is the lexer for Perl. It's closely linked to the
21 * The main routine is yylex(), which returns the next token.
25 =head1 Lexer interface
26 This is the lower layer of the Perl parser, managing characters and tokens.
28 =for apidoc AmnU|yy_parser *|PL_parser
30 Pointer to a structure encapsulating the state of the parsing operation
31 currently in progress. The pointer can be locally changed to perform
32 a nested parse without interfering with the state of an outer parse.
33 Individual members of C<PL_parser> have their own documentation.
39 #define PERL_IN_TOKE_C
41 #include "invlist_inline.h"
43 #define new_constant(a,b,c,d,e,f,g, h) \
44 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g, h)
46 #define pl_yylval (PL_parser->yylval)
48 /* XXX temporary backwards compatibility */
49 #define PL_lex_brackets (PL_parser->lex_brackets)
50 #define PL_lex_allbrackets (PL_parser->lex_allbrackets)
51 #define PL_lex_fakeeof (PL_parser->lex_fakeeof)
52 #define PL_lex_brackstack (PL_parser->lex_brackstack)
53 #define PL_lex_casemods (PL_parser->lex_casemods)
54 #define PL_lex_casestack (PL_parser->lex_casestack)
55 #define PL_lex_dojoin (PL_parser->lex_dojoin)
56 #define PL_lex_formbrack (PL_parser->lex_formbrack)
57 #define PL_lex_inpat (PL_parser->lex_inpat)
58 #define PL_lex_inwhat (PL_parser->lex_inwhat)
59 #define PL_lex_op (PL_parser->lex_op)
60 #define PL_lex_repl (PL_parser->lex_repl)
61 #define PL_lex_starts (PL_parser->lex_starts)
62 #define PL_lex_stuff (PL_parser->lex_stuff)
63 #define PL_multi_start (PL_parser->multi_start)
64 #define PL_multi_open (PL_parser->multi_open)
65 #define PL_multi_close (PL_parser->multi_close)
66 #define PL_preambled (PL_parser->preambled)
67 #define PL_linestr (PL_parser->linestr)
68 #define PL_expect (PL_parser->expect)
69 #define PL_copline (PL_parser->copline)
70 #define PL_bufptr (PL_parser->bufptr)
71 #define PL_oldbufptr (PL_parser->oldbufptr)
72 #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
73 #define PL_linestart (PL_parser->linestart)
74 #define PL_bufend (PL_parser->bufend)
75 #define PL_last_uni (PL_parser->last_uni)
76 #define PL_last_lop (PL_parser->last_lop)
77 #define PL_last_lop_op (PL_parser->last_lop_op)
78 #define PL_lex_state (PL_parser->lex_state)
79 #define PL_rsfp (PL_parser->rsfp)
80 #define PL_rsfp_filters (PL_parser->rsfp_filters)
81 #define PL_in_my (PL_parser->in_my)
82 #define PL_in_my_stash (PL_parser->in_my_stash)
83 #define PL_tokenbuf (PL_parser->tokenbuf)
84 #define PL_multi_end (PL_parser->multi_end)
85 #define PL_error_count (PL_parser->error_count)
87 # define PL_nexttoke (PL_parser->nexttoke)
88 # define PL_nexttype (PL_parser->nexttype)
89 # define PL_nextval (PL_parser->nextval)
92 #define SvEVALED(sv) \
93 (SvTYPE(sv) >= SVt_PVNV \
94 && ((XPVIV*)SvANY(sv))->xiv_u.xivu_eval_seen)
96 static const char* const ident_too_long = "Identifier too long";
97 static const char* const ident_var_zero_multi_digit = "Numeric variables with more than one digit may not start with '0'";
99 # define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
101 #define XENUMMASK 0x3f
102 #define XFAKEEOF 0x40
103 #define XFAKEBRACK 0x80
105 #ifdef USE_UTF8_SCRIPTS
106 # define UTF cBOOL(!IN_BYTES)
108 # define UTF cBOOL((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
111 /* The maximum number of characters preceding the unrecognized one to display */
112 #define UNRECOGNIZED_PRECEDE_COUNT 10
114 /* In variables named $^X, these are the legal values for X.
115 * 1999-02-27 mjd-perl-patch@plover.com */
116 #define isCONTROLVAR(x) (isUPPER(x) || memCHRs("[\\]^_?", (x)))
118 #define SPACE_OR_TAB(c) isBLANK_A(c)
120 #define HEXFP_PEEK(s) \
122 (isXDIGIT(s[1]) || isALPHA_FOLD_EQ(s[1], 'p'))) || \
123 isALPHA_FOLD_EQ(s[0], 'p'))
125 /* LEX_* are values for PL_lex_state, the state of the lexer.
126 * They are arranged oddly so that the guard on the switch statement
127 * can get by with a single comparison (if the compiler is smart enough).
129 * These values refer to the various states within a sublex parse,
130 * i.e. within a double quotish string
133 /* #define LEX_NOTPARSING 11 is done in perl.h. */
135 #define LEX_NORMAL 10 /* normal code (ie not within "...") */
136 #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
137 #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
138 #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
139 #define LEX_INTERPSTART 6 /* expecting the start of a $var */
141 /* at end of code, eg "$x" followed by: */
142 #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
143 #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
145 #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
146 string or after \E, $foo, etc */
147 #define LEX_INTERPCONST 2 /* NOT USED */
148 #define LEX_FORMLINE 1 /* expecting a format line */
150 /* returned to yyl_try() to request it to retry the parse loop, expected to only
151 be returned directly by yyl_fake_eof(), but functions that call yyl_fake_eof()
154 yylex (aka Perl_yylex) returns 0 on EOF rather than returning -1,
155 other token values are 258 or higher (see perly.h), so -1 should be
158 #define YYL_RETRY (-1)
161 static const char* const lex_state_names[] = {
176 #include "keywords.h"
178 /* CLINE is a macro that ensures PL_copline has a sane value */
180 #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
183 * Convenience functions to return different tokens and prime the
184 * lexer for the next token. They all take an argument.
186 * TOKEN : generic token (used for '(', DOLSHARP, etc)
187 * OPERATOR : generic operator
188 * AOPERATOR : assignment operator
189 * PREBLOCK : beginning the block after an if, while, foreach, ...
190 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
191 * PREREF : *EXPR where EXPR is not a simple identifier
192 * TERM : expression term
193 * POSTDEREF : postfix dereference (->$* ->@[...] etc.)
194 * LOOPX : loop exiting command (goto, last, dump, etc)
195 * FTST : file test operator
196 * FUN0 : zero-argument function
197 * FUN0OP : zero-argument function, with its op created in this file
198 * FUN1 : not used, except for not, which isn't a UNIOP
199 * BOop : bitwise or or xor
201 * BCop : bitwise complement
202 * SHop : shift operator
203 * PWop : power operator
204 * PMop : pattern-matching operator
205 * Aop : addition-level operator
206 * AopNOASSIGN : addition-level operator that is never part of .=
207 * Mop : multiplication-level operator
208 * ChEop : chaining equality-testing operator
209 * NCEop : non-chaining comparison operator at equality precedence
210 * ChRop : chaining relational operator <= != gt
211 * NCRop : non-chaining relational operator isa
213 * Also see LOP and lop() below.
216 #ifdef DEBUGGING /* Serve -DT. */
217 # define REPORT(retval) tokereport((I32)retval, &pl_yylval)
219 # define REPORT(retval) (retval)
222 #define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
223 #define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
224 #define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, retval))
225 #define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
226 #define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
227 #define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
228 #define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
229 #define POSTDEREF(f) return (PL_bufptr = s, S_postderef(aTHX_ REPORT(f),s[1]))
230 #define LOOPX(f) return (PL_bufptr = force_word(s,BAREWORD,TRUE,FALSE), \
232 PL_expect = PL_nexttoke ? XOPERATOR : XTERM, \
234 #define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
235 #define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
236 #define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
237 #define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
238 #define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITOROP))
239 #define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)BITANDOP))
240 #define BCop(f) return pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr = s, \
242 #define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)SHIFTOP))
243 #define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)POWOP))
244 #define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
245 #define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)ADDOP))
246 #define AopNOASSIGN(f) return (pl_yylval.ival=f, PL_bufptr=s, REPORT((int)ADDOP))
247 #define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, (int)MULOP))
248 #define ChEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHEQOP))
249 #define NCEop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCEQOP))
250 #define ChRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)CHRELOP))
251 #define NCRop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)NCRELOP))
253 /* This bit of chicanery makes a unary function followed by
254 * a parenthesis into a function with one argument, highest precedence.
255 * The UNIDOR macro is for unary functions that can be followed by the //
256 * operator (such as C<shift // 0>).
258 #define UNI3(f,x,have_x) { \
259 pl_yylval.ival = f; \
260 if (have_x) PL_expect = x; \
262 PL_last_uni = PL_oldbufptr; \
263 PL_last_lop_op = (f) < 0 ? -(f) : (f); \
265 return REPORT( (int)FUNC1 ); \
267 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
269 #define UNI(f) UNI3(f,XTERM,1)
270 #define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
271 #define UNIPROTO(f,optional) { \
272 if (optional) PL_last_uni = PL_oldbufptr; \
276 #define UNIBRACK(f) UNI3(f,0,0)
278 /* grandfather return to old style */
281 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
282 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
283 pl_yylval.ival = (f); \
289 #define COPLINE_INC_WITH_HERELINES \
291 CopLINE_inc(PL_curcop); \
292 if (PL_parser->herelines) \
293 CopLINE(PL_curcop) += PL_parser->herelines, \
294 PL_parser->herelines = 0; \
296 /* Called after scan_str to update CopLINE(PL_curcop), but only when there
297 * is no sublex_push to follow. */
298 #define COPLINE_SET_FROM_MULTI_END \
300 CopLINE_set(PL_curcop, PL_multi_end); \
301 if (PL_multi_end != PL_multi_start) \
302 PL_parser->herelines = 0; \
306 /* A file-local structure for passing around information about subroutines and
307 * related definable words */
317 static const struct code no_code = { NULL, NULL, NULL, NULL, NULL, 0, FALSE };
321 /* how to interpret the pl_yylval associated with the token */
325 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
330 static struct debug_tokens {
332 enum token_type type;
334 } const debug_tokens[] =
336 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
337 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
338 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
339 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
340 { ANON_SIGSUB, TOKENTYPE_IVAL, "ANON_SIGSUB" },
341 { ARROW, TOKENTYPE_NONE, "ARROW" },
342 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
343 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
344 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
345 { CHEQOP, TOKENTYPE_OPNUM, "CHEQOP" },
346 { CHRELOP, TOKENTYPE_OPNUM, "CHRELOP" },
347 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
348 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
349 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
350 { DO, TOKENTYPE_NONE, "DO" },
351 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
352 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
353 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
354 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
355 { ELSE, TOKENTYPE_NONE, "ELSE" },
356 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
357 { FOR, TOKENTYPE_IVAL, "FOR" },
358 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
359 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
360 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
361 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
362 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
363 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
364 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
365 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
366 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
367 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
368 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
369 { IF, TOKENTYPE_IVAL, "IF" },
370 { LABEL, TOKENTYPE_OPVAL, "LABEL" },
371 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
372 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
373 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
374 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
375 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
376 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
377 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
378 { MY, TOKENTYPE_IVAL, "MY" },
379 { NCEQOP, TOKENTYPE_OPNUM, "NCEQOP" },
380 { NCRELOP, TOKENTYPE_OPNUM, "NCRELOP" },
381 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
382 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
383 { OROP, TOKENTYPE_IVAL, "OROP" },
384 { OROR, TOKENTYPE_NONE, "OROR" },
385 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
386 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
387 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
388 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
389 { POSTJOIN, TOKENTYPE_NONE, "POSTJOIN" },
390 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
391 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
392 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
393 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
394 { PREINC, TOKENTYPE_NONE, "PREINC" },
395 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
396 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
397 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
398 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
399 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
400 { SIGSUB, TOKENTYPE_NONE, "SIGSUB" },
401 { SUB, TOKENTYPE_NONE, "SUB" },
402 { SUBLEXEND, TOKENTYPE_NONE, "SUBLEXEND" },
403 { SUBLEXSTART, TOKENTYPE_NONE, "SUBLEXSTART" },
404 { THING, TOKENTYPE_OPVAL, "THING" },
405 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
406 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
407 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
408 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
409 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
410 { USE, TOKENTYPE_IVAL, "USE" },
411 { WHEN, TOKENTYPE_IVAL, "WHEN" },
412 { WHILE, TOKENTYPE_IVAL, "WHILE" },
413 { BAREWORD, TOKENTYPE_OPVAL, "BAREWORD" },
414 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
415 { 0, TOKENTYPE_NONE, NULL }
418 /* dump the returned token in rv, plus any optional arg in pl_yylval */
421 S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
423 PERL_ARGS_ASSERT_TOKEREPORT;
426 const char *name = NULL;
427 enum token_type type = TOKENTYPE_NONE;
428 const struct debug_tokens *p;
429 SV* const report = newSVpvs("<== ");
431 for (p = debug_tokens; p->token; p++) {
432 if (p->token == (int)rv) {
439 Perl_sv_catpv(aTHX_ report, name);
440 else if (isGRAPH(rv))
442 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
444 sv_catpvs(report, " (pending identifier)");
447 sv_catpvs(report, "EOF");
449 Perl_sv_catpvf(aTHX_ report, "?? %" IVdf, (IV)rv);
454 Perl_sv_catpvf(aTHX_ report, "(ival=%" IVdf ")", (IV)lvalp->ival);
456 case TOKENTYPE_OPNUM:
457 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
458 PL_op_name[lvalp->ival]);
461 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
463 case TOKENTYPE_OPVAL:
465 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
466 PL_op_name[lvalp->opval->op_type]);
467 if (lvalp->opval->op_type == OP_CONST) {
468 Perl_sv_catpvf(aTHX_ report, " %s",
469 SvPEEK(cSVOPx_sv(lvalp->opval)));
474 sv_catpvs(report, "(opval=null)");
477 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
483 /* print the buffer with suitable escapes */
486 S_printbuf(pTHX_ const char *const fmt, const char *const s)
488 SV* const tmp = newSVpvs("");
490 PERL_ARGS_ASSERT_PRINTBUF;
492 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral); /* fmt checked by caller */
493 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
494 GCC_DIAG_RESTORE_STMT;
503 * This subroutine looks for an '=' next to the operator that has just been
504 * parsed and turns it into an ASSIGNOP if it finds one.
508 S_ao(pTHX_ int toketype)
510 if (*PL_bufptr == '=') {
512 if (toketype == ANDAND)
513 pl_yylval.ival = OP_ANDASSIGN;
514 else if (toketype == OROR)
515 pl_yylval.ival = OP_ORASSIGN;
516 else if (toketype == DORDOR)
517 pl_yylval.ival = OP_DORASSIGN;
520 return REPORT(toketype);
525 * When Perl expects an operator and finds something else, no_op
526 * prints the warning. It always prints "<something> found where
527 * operator expected. It prints "Missing semicolon on previous line?"
528 * if the surprise occurs at the start of the line. "do you need to
529 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
530 * where the compiler doesn't know if foo is a method call or a function.
531 * It prints "Missing operator before end of line" if there's nothing
532 * after the missing operator, or "... before <...>" if there is something
533 * after the missing operator.
535 * PL_bufptr is expected to point to the start of the thing that was found,
536 * and s after the next token or partial token.
540 S_no_op(pTHX_ const char *const what, char *s)
542 char * const oldbp = PL_bufptr;
543 const bool is_first = (PL_oldbufptr == PL_linestart);
545 PERL_ARGS_ASSERT_NO_OP;
551 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
552 if (ckWARN_d(WARN_SYNTAX)) {
554 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
555 "\t(Missing semicolon on previous line?)\n");
556 else if (PL_oldoldbufptr && isIDFIRST_lazy_if_safe(PL_oldoldbufptr,
561 for (t = PL_oldoldbufptr;
562 (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF) || *t == ':');
563 t += UTF ? UTF8SKIP(t) : 1)
567 if (t < PL_bufptr && isSPACE(*t))
568 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
569 "\t(Do you need to predeclare %" UTF8f "?)\n",
570 UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
574 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
575 "\t(Missing operator before %" UTF8f "?)\n",
576 UTF8fARG(UTF, s - oldbp, oldbp));
584 * Complain about missing quote/regexp/heredoc terminator.
585 * If it's called with NULL then it cauterizes the line buffer.
586 * If we're in a delimited string and the delimiter is a control
587 * character, it's reformatted into a two-char sequence like ^C.
592 S_missingterm(pTHX_ char *s, STRLEN len)
594 char tmpbuf[UTF8_MAXBYTES + 1];
599 char * const nl = (char *) my_memrchr(s, '\n', len);
606 else if (PL_multi_close < 32) {
608 tmpbuf[1] = (char)toCTRL(PL_multi_close);
614 if (LIKELY(PL_multi_close < 256)) {
615 *tmpbuf = (char)PL_multi_close;
620 char *end = (char *)uvchr_to_utf8((U8 *)tmpbuf, PL_multi_close);
627 q = memchr(s, '"', len) ? '\'' : '"';
628 sv = sv_2mortal(newSVpvn(s, len));
631 Perl_croak(aTHX_ "Can't find string terminator %c%" SVf "%c"
632 " anywhere before EOF", q, SVfARG(sv), q);
638 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
639 * utf16-to-utf8-reversed.
642 #ifdef PERL_CR_FILTER
646 const char *s = SvPVX_const(sv);
647 const char * const e = s + SvCUR(sv);
649 PERL_ARGS_ASSERT_STRIP_RETURN;
651 /* outer loop optimized to do nothing if there are no CR-LFs */
653 if (*s++ == '\r' && *s == '\n') {
654 /* hit a CR-LF, need to copy the rest */
658 if (*s == '\r' && s[1] == '\n')
669 S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
671 const I32 count = FILTER_READ(idx+1, sv, maxlen);
672 if (count > 0 && !maxlen)
679 =for apidoc lex_start
681 Creates and initialises a new lexer/parser state object, supplying
682 a context in which to lex and parse from a new source of Perl code.
683 A pointer to the new state object is placed in L</PL_parser>. An entry
684 is made on the save stack so that upon unwinding, the new state object
685 will be destroyed and the former value of L</PL_parser> will be restored.
686 Nothing else need be done to clean up the parsing context.
688 The code to be parsed comes from C<line> and C<rsfp>. C<line>, if
689 non-null, provides a string (in SV form) containing code to be parsed.
690 A copy of the string is made, so subsequent modification of C<line>
691 does not affect parsing. C<rsfp>, if non-null, provides an input stream
692 from which code will be read to be parsed. If both are non-null, the
693 code in C<line> comes first and must consist of complete lines of input,
694 and C<rsfp> supplies the remainder of the source.
696 The C<flags> parameter is reserved for future use. Currently it is only
697 used by perl internally, so extensions should always pass zero.
702 /* LEX_START_SAME_FILTER indicates that this is not a new file, so it
703 can share filters with the current parser.
704 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
705 caller, hence isn't owned by the parser, so shouldn't be closed on parser
706 destruction. This is used to handle the case of defaulting to reading the
707 script from the standard input because no filename was given on the command
708 line (without getting confused by situation where STDIN has been closed, so
709 the script handle is opened on fd 0) */
712 Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
714 const char *s = NULL;
715 yy_parser *parser, *oparser;
717 if (flags && flags & ~LEX_START_FLAGS)
718 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
720 /* create and initialise a parser */
722 Newxz(parser, 1, yy_parser);
723 parser->old_parser = oparser = PL_parser;
726 parser->stack = NULL;
727 parser->stack_max1 = NULL;
730 /* on scope exit, free this parser and restore any outer one */
732 parser->saved_curcop = PL_curcop;
734 /* initialise lexer state */
736 parser->nexttoke = 0;
737 parser->error_count = oparser ? oparser->error_count : 0;
738 parser->copline = parser->preambling = NOLINE;
739 parser->lex_state = LEX_NORMAL;
740 parser->expect = XSTATE;
742 parser->recheck_utf8_validity = TRUE;
743 parser->rsfp_filters =
744 !(flags & LEX_START_SAME_FILTER) || !oparser
746 : MUTABLE_AV(SvREFCNT_inc(
747 oparser->rsfp_filters
748 ? oparser->rsfp_filters
749 : (oparser->rsfp_filters = newAV())
752 Newx(parser->lex_brackstack, 120, char);
753 Newx(parser->lex_casestack, 12, char);
754 *parser->lex_casestack = '\0';
755 Newxz(parser->lex_shared, 1, LEXSHARED);
759 const U8* first_bad_char_loc;
761 s = SvPV_const(line, len);
764 && UNLIKELY(! is_utf8_string_loc((U8 *) s,
766 &first_bad_char_loc)))
768 _force_out_malformed_utf8_message(first_bad_char_loc,
769 (U8 *) s + SvCUR(line),
771 1 /* 1 means die */ );
772 NOT_REACHED; /* NOTREACHED */
775 parser->linestr = flags & LEX_START_COPIED
776 ? SvREFCNT_inc_simple_NN(line)
777 : newSVpvn_flags(s, len, SvUTF8(line));
779 sv_catpvs(parser->linestr, "\n;");
781 parser->linestr = newSVpvn("\n;", rsfp ? 1 : 2);
784 parser->oldoldbufptr =
787 parser->linestart = SvPVX(parser->linestr);
788 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
789 parser->last_lop = parser->last_uni = NULL;
791 STATIC_ASSERT_STMT(FITS_IN_8_BITS(LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
792 |LEX_DONT_CLOSE_RSFP));
793 parser->lex_flags = (U8) (flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
794 |LEX_DONT_CLOSE_RSFP));
796 parser->in_pod = parser->filtered = 0;
800 /* delete a parser object */
803 Perl_parser_free(pTHX_ const yy_parser *parser)
805 PERL_ARGS_ASSERT_PARSER_FREE;
807 PL_curcop = parser->saved_curcop;
808 SvREFCNT_dec(parser->linestr);
810 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
811 PerlIO_clearerr(parser->rsfp);
812 else if (parser->rsfp && (!parser->old_parser
813 || (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
814 PerlIO_close(parser->rsfp);
815 SvREFCNT_dec(parser->rsfp_filters);
816 SvREFCNT_dec(parser->lex_stuff);
817 SvREFCNT_dec(parser->lex_sub_repl);
819 Safefree(parser->lex_brackstack);
820 Safefree(parser->lex_casestack);
821 Safefree(parser->lex_shared);
822 PL_parser = parser->old_parser;
827 Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab)
829 I32 nexttoke = parser->nexttoke;
830 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
832 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
833 && parser->nextval[nexttoke].opval
834 && parser->nextval[nexttoke].opval->op_slabbed
835 && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
836 op_free(parser->nextval[nexttoke].opval);
837 parser->nextval[nexttoke].opval = NULL;
844 =for apidoc AmnxUN|SV *|PL_parser-E<gt>linestr
846 Buffer scalar containing the chunk currently under consideration of the
847 text currently being lexed. This is always a plain string scalar (for
848 which C<SvPOK> is true). It is not intended to be used as a scalar by
849 normal scalar means; instead refer to the buffer directly by the pointer
850 variables described below.
852 The lexer maintains various C<char*> pointers to things in the
853 C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
854 reallocated, all of these pointers must be updated. Don't attempt to
855 do this manually, but rather use L</lex_grow_linestr> if you need to
856 reallocate the buffer.
858 The content of the text chunk in the buffer is commonly exactly one
859 complete line of input, up to and including a newline terminator,
860 but there are situations where it is otherwise. The octets of the
861 buffer may be intended to be interpreted as either UTF-8 or Latin-1.
862 The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
863 flag on this scalar, which may disagree with it.
865 For direct examination of the buffer, the variable
866 L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
867 lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
868 of these pointers is usually preferable to examination of the scalar
869 through normal scalar means.
871 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufend
873 Direct pointer to the end of the chunk of text currently being lexed, the
874 end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
875 + SvCUR(PL_parser-E<gt>linestr)>. A C<NUL> character (zero octet) is
876 always located at the end of the buffer, and does not count as part of
877 the buffer's contents.
879 =for apidoc AmnxUN|char *|PL_parser-E<gt>bufptr
881 Points to the current position of lexing inside the lexer buffer.
882 Characters around this point may be freely examined, within
883 the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
884 L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
885 interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
887 Lexing code (whether in the Perl core or not) moves this pointer past
888 the characters that it consumes. It is also expected to perform some
889 bookkeeping whenever a newline character is consumed. This movement
890 can be more conveniently performed by the function L</lex_read_to>,
891 which handles newlines appropriately.
893 Interpretation of the buffer's octets can be abstracted out by
894 using the slightly higher-level functions L</lex_peek_unichar> and
895 L</lex_read_unichar>.
897 =for apidoc AmnxUN|char *|PL_parser-E<gt>linestart
899 Points to the start of the current line inside the lexer buffer.
900 This is useful for indicating at which column an error occurred, and
901 not much else. This must be updated by any lexing code that consumes
902 a newline; the function L</lex_read_to> handles this detail.
908 =for apidoc lex_bufutf8
910 Indicates whether the octets in the lexer buffer
911 (L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
912 of Unicode characters. If not, they should be interpreted as Latin-1
913 characters. This is analogous to the C<SvUTF8> flag for scalars.
915 In UTF-8 mode, it is not guaranteed that the lexer buffer actually
916 contains valid UTF-8. Lexing code must be robust in the face of invalid
919 The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
920 is significant, but not the whole story regarding the input character
921 encoding. Normally, when a file is being read, the scalar contains octets
922 and its C<SvUTF8> flag is off, but the octets should be interpreted as
923 UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
924 however, the scalar may have the C<SvUTF8> flag on, and in this case its
925 octets should be interpreted as UTF-8 unless the C<use bytes> pragma
926 is in effect. This logic may change in the future; use this function
927 instead of implementing the logic yourself.
933 Perl_lex_bufutf8(pTHX)
939 =for apidoc lex_grow_linestr
941 Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
942 at least C<len> octets (including terminating C<NUL>). Returns a
943 pointer to the reallocated buffer. This is necessary before making
944 any direct modification of the buffer that would increase its length.
945 L</lex_stuff_pvn> provides a more convenient way to insert text into
948 Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
949 this function updates all of the lexer's variables that point directly
956 Perl_lex_grow_linestr(pTHX_ STRLEN len)
960 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
961 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
964 linestr = PL_parser->linestr;
965 buf = SvPVX(linestr);
966 if (len <= SvLEN(linestr))
969 /* Is the lex_shared linestr SV the same as the current linestr SV?
970 * Only in this case does re_eval_start need adjusting, since it
971 * points within lex_shared->ls_linestr's buffer */
972 current = ( !PL_parser->lex_shared->ls_linestr
973 || linestr == PL_parser->lex_shared->ls_linestr);
975 bufend_pos = PL_parser->bufend - buf;
976 bufptr_pos = PL_parser->bufptr - buf;
977 oldbufptr_pos = PL_parser->oldbufptr - buf;
978 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
979 linestart_pos = PL_parser->linestart - buf;
980 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
981 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
982 re_eval_start_pos = (current && PL_parser->lex_shared->re_eval_start) ?
983 PL_parser->lex_shared->re_eval_start - buf : 0;
985 buf = sv_grow(linestr, len);
987 PL_parser->bufend = buf + bufend_pos;
988 PL_parser->bufptr = buf + bufptr_pos;
989 PL_parser->oldbufptr = buf + oldbufptr_pos;
990 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
991 PL_parser->linestart = buf + linestart_pos;
992 if (PL_parser->last_uni)
993 PL_parser->last_uni = buf + last_uni_pos;
994 if (PL_parser->last_lop)
995 PL_parser->last_lop = buf + last_lop_pos;
996 if (current && PL_parser->lex_shared->re_eval_start)
997 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
1002 =for apidoc lex_stuff_pvn
1004 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1005 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1006 reallocating the buffer if necessary. This means that lexing code that
1007 runs later will see the characters as if they had appeared in the input.
1008 It is not recommended to do this as part of normal parsing, and most
1009 uses of this facility run the risk of the inserted characters being
1010 interpreted in an unintended manner.
1012 The string to be inserted is represented by C<len> octets starting
1013 at C<pv>. These octets are interpreted as either UTF-8 or Latin-1,
1014 according to whether the C<LEX_STUFF_UTF8> flag is set in C<flags>.
1015 The characters are recoded for the lexer buffer, according to how the
1016 buffer is currently being interpreted (L</lex_bufutf8>). If a string
1017 to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
1018 function is more convenient.
1020 =for apidoc Amnh||LEX_STUFF_UTF8
1026 Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
1030 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1031 if (flags & ~(LEX_STUFF_UTF8))
1032 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1034 if (flags & LEX_STUFF_UTF8) {
1037 STRLEN highhalf = variant_under_utf8_count((U8 *) pv,
1039 const char *p, *e = pv+len;;
1042 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1043 bufptr = PL_parser->bufptr;
1044 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
1045 SvCUR_set(PL_parser->linestr,
1046 SvCUR(PL_parser->linestr) + len+highhalf);
1047 PL_parser->bufend += len+highhalf;
1048 for (p = pv; p != e; p++) {
1049 append_utf8_from_native_byte(*p, (U8 **) &bufptr);
1053 if (flags & LEX_STUFF_UTF8) {
1054 STRLEN highhalf = 0;
1055 const char *p, *e = pv+len;
1056 for (p = pv; p != e; p++) {
1058 if (UTF8_IS_ABOVE_LATIN1(c)) {
1059 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1060 "non-Latin-1 character into Latin-1 input");
1061 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
1064 } else assert(UTF8_IS_INVARIANT(c));
1068 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1069 bufptr = PL_parser->bufptr;
1070 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
1071 SvCUR_set(PL_parser->linestr,
1072 SvCUR(PL_parser->linestr) + len-highhalf);
1073 PL_parser->bufend += len-highhalf;
1076 if (UTF8_IS_INVARIANT(*p)) {
1082 *bufptr++ = EIGHT_BIT_UTF8_TO_NATIVE(*p, *(p+1));
1088 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1089 bufptr = PL_parser->bufptr;
1090 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1091 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
1092 PL_parser->bufend += len;
1093 Copy(pv, bufptr, len, char);
1099 =for apidoc lex_stuff_pv
1101 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1102 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1103 reallocating the buffer if necessary. This means that lexing code that
1104 runs later will see the characters as if they had appeared in the input.
1105 It is not recommended to do this as part of normal parsing, and most
1106 uses of this facility run the risk of the inserted characters being
1107 interpreted in an unintended manner.
1109 The string to be inserted is represented by octets starting at C<pv>
1110 and continuing to the first nul. These octets are interpreted as either
1111 UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1112 in C<flags>. The characters are recoded for the lexer buffer, according
1113 to how the buffer is currently being interpreted (L</lex_bufutf8>).
1114 If it is not convenient to nul-terminate a string to be inserted, the
1115 L</lex_stuff_pvn> function is more appropriate.
1121 Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1123 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1124 lex_stuff_pvn(pv, strlen(pv), flags);
1128 =for apidoc lex_stuff_sv
1130 Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1131 immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1132 reallocating the buffer if necessary. This means that lexing code that
1133 runs later will see the characters as if they had appeared in the input.
1134 It is not recommended to do this as part of normal parsing, and most
1135 uses of this facility run the risk of the inserted characters being
1136 interpreted in an unintended manner.
1138 The string to be inserted is the string value of C<sv>. The characters
1139 are recoded for the lexer buffer, according to how the buffer is currently
1140 being interpreted (L</lex_bufutf8>). If a string to be inserted is
1141 not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1142 need to construct a scalar.
1148 Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1152 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1154 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1156 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1160 =for apidoc lex_unstuff
1162 Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1163 C<ptr>. Text following C<ptr> will be moved, and the buffer shortened.
1164 This hides the discarded text from any lexing code that runs later,
1165 as if the text had never appeared.
1167 This is not the normal way to consume lexed text. For that, use
1174 Perl_lex_unstuff(pTHX_ char *ptr)
1178 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1179 buf = PL_parser->bufptr;
1181 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1184 bufend = PL_parser->bufend;
1186 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1187 unstuff_len = ptr - buf;
1188 Move(ptr, buf, bufend+1-ptr, char);
1189 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1190 PL_parser->bufend = bufend - unstuff_len;
1194 =for apidoc lex_read_to
1196 Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1197 to C<ptr>. This advances L</PL_parser-E<gt>bufptr> to match C<ptr>,
1198 performing the correct bookkeeping whenever a newline character is passed.
1199 This is the normal way to consume lexed text.
1201 Interpretation of the buffer's octets can be abstracted out by
1202 using the slightly higher-level functions L</lex_peek_unichar> and
1203 L</lex_read_unichar>.
1209 Perl_lex_read_to(pTHX_ char *ptr)
1212 PERL_ARGS_ASSERT_LEX_READ_TO;
1213 s = PL_parser->bufptr;
1214 if (ptr < s || ptr > PL_parser->bufend)
1215 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1216 for (; s != ptr; s++)
1218 COPLINE_INC_WITH_HERELINES;
1219 PL_parser->linestart = s+1;
1221 PL_parser->bufptr = ptr;
1225 =for apidoc lex_discard_to
1227 Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1228 up to C<ptr>. The remaining content of the buffer will be moved, and
1229 all pointers into the buffer updated appropriately. C<ptr> must not
1230 be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1231 it is not permitted to discard text that has yet to be lexed.
1233 Normally it is not necessarily to do this directly, because it suffices to
1234 use the implicit discarding behaviour of L</lex_next_chunk> and things
1235 based on it. However, if a token stretches across multiple lines,
1236 and the lexing code has kept multiple lines of text in the buffer for
1237 that purpose, then after completion of the token it would be wise to
1238 explicitly discard the now-unneeded earlier lines, to avoid future
1239 multi-line tokens growing the buffer without bound.
1245 Perl_lex_discard_to(pTHX_ char *ptr)
1249 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1250 buf = SvPVX(PL_parser->linestr);
1252 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1255 if (ptr > PL_parser->bufptr)
1256 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1257 discard_len = ptr - buf;
1258 if (PL_parser->oldbufptr < ptr)
1259 PL_parser->oldbufptr = ptr;
1260 if (PL_parser->oldoldbufptr < ptr)
1261 PL_parser->oldoldbufptr = ptr;
1262 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1263 PL_parser->last_uni = NULL;
1264 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1265 PL_parser->last_lop = NULL;
1266 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1267 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1268 PL_parser->bufend -= discard_len;
1269 PL_parser->bufptr -= discard_len;
1270 PL_parser->oldbufptr -= discard_len;
1271 PL_parser->oldoldbufptr -= discard_len;
1272 if (PL_parser->last_uni)
1273 PL_parser->last_uni -= discard_len;
1274 if (PL_parser->last_lop)
1275 PL_parser->last_lop -= discard_len;
1279 Perl_notify_parser_that_changed_to_utf8(pTHX)
1281 /* Called when $^H is changed to indicate that HINT_UTF8 has changed from
1282 * off to on. At compile time, this has the effect of entering a 'use
1283 * utf8' section. This means that any input was not previously checked for
1284 * UTF-8 (because it was off), but now we do need to check it, or our
1285 * assumptions about the input being sane could be wrong, and we could
1286 * segfault. This routine just sets a flag so that the next time we look
1287 * at the input we do the well-formed UTF-8 check. If we aren't in the
1288 * proper phase, there may not be a parser object, but if there is, setting
1289 * the flag is harmless */
1292 PL_parser->recheck_utf8_validity = TRUE;
1297 =for apidoc lex_next_chunk
1299 Reads in the next chunk of text to be lexed, appending it to
1300 L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1301 looked to the end of the current chunk and wants to know more. It is
1302 usual, but not necessary, for lexing to have consumed the entirety of
1303 the current chunk at this time.
1305 If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1306 chunk (i.e., the current chunk has been entirely consumed), normally the
1307 current chunk will be discarded at the same time that the new chunk is
1308 read in. If C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, the current chunk
1309 will not be discarded. If the current chunk has not been entirely
1310 consumed, then it will not be discarded regardless of the flag.
1312 Returns true if some new text was added to the buffer, or false if the
1313 buffer has reached the end of the input text.
1315 =for apidoc Amnh||LEX_KEEP_PREVIOUS
1320 #define LEX_FAKE_EOF 0x80000000
1321 #define LEX_NO_TERM 0x40000000 /* here-doc */
1324 Perl_lex_next_chunk(pTHX_ U32 flags)
1328 STRLEN old_bufend_pos, new_bufend_pos;
1329 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1330 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
1331 bool got_some_for_debugger = 0;
1334 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
1335 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
1336 if (!(flags & LEX_NO_TERM) && PL_lex_inwhat)
1338 linestr = PL_parser->linestr;
1339 buf = SvPVX(linestr);
1340 if (!(flags & LEX_KEEP_PREVIOUS)
1341 && PL_parser->bufptr == PL_parser->bufend)
1343 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1345 if (PL_parser->last_uni != PL_parser->bufend)
1346 PL_parser->last_uni = NULL;
1347 if (PL_parser->last_lop != PL_parser->bufend)
1348 PL_parser->last_lop = NULL;
1349 last_uni_pos = last_lop_pos = 0;
1351 SvCUR_set(linestr, 0);
1353 old_bufend_pos = PL_parser->bufend - buf;
1354 bufptr_pos = PL_parser->bufptr - buf;
1355 oldbufptr_pos = PL_parser->oldbufptr - buf;
1356 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1357 linestart_pos = PL_parser->linestart - buf;
1358 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1359 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1361 if (flags & LEX_FAKE_EOF) {
1363 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
1365 } else if (filter_gets(linestr, old_bufend_pos)) {
1367 got_some_for_debugger = 1;
1368 } else if (flags & LEX_NO_TERM) {
1371 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1374 /* End of real input. Close filehandle (unless it was STDIN),
1375 * then add implicit termination.
1377 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
1378 PerlIO_clearerr(PL_parser->rsfp);
1379 else if (PL_parser->rsfp)
1380 (void)PerlIO_close(PL_parser->rsfp);
1381 PL_parser->rsfp = NULL;
1382 PL_parser->in_pod = PL_parser->filtered = 0;
1383 if (!PL_in_eval && PL_minus_p) {
1385 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1386 PL_minus_n = PL_minus_p = 0;
1387 } else if (!PL_in_eval && PL_minus_n) {
1388 sv_catpvs(linestr, /*{*/";}");
1391 sv_catpvs(linestr, ";");
1394 buf = SvPVX(linestr);
1395 new_bufend_pos = SvCUR(linestr);
1396 PL_parser->bufend = buf + new_bufend_pos;
1397 PL_parser->bufptr = buf + bufptr_pos;
1400 const U8* first_bad_char_loc;
1401 if (UNLIKELY(! is_utf8_string_loc(
1402 (U8 *) PL_parser->bufptr,
1403 PL_parser->bufend - PL_parser->bufptr,
1404 &first_bad_char_loc)))
1406 _force_out_malformed_utf8_message(first_bad_char_loc,
1407 (U8 *) PL_parser->bufend,
1409 1 /* 1 means die */ );
1410 NOT_REACHED; /* NOTREACHED */
1414 PL_parser->oldbufptr = buf + oldbufptr_pos;
1415 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1416 PL_parser->linestart = buf + linestart_pos;
1417 if (PL_parser->last_uni)
1418 PL_parser->last_uni = buf + last_uni_pos;
1419 if (PL_parser->last_lop)
1420 PL_parser->last_lop = buf + last_lop_pos;
1421 if (PL_parser->preambling != NOLINE) {
1422 CopLINE_set(PL_curcop, PL_parser->preambling + 1);
1423 PL_parser->preambling = NOLINE;
1425 if ( got_some_for_debugger
1426 && PERLDB_LINE_OR_SAVESRC
1427 && PL_curstash != PL_debstash)
1429 /* debugger active and we're not compiling the debugger code,
1430 * so store the line into the debugger's array of lines
1432 update_debugger_info(NULL, buf+old_bufend_pos,
1433 new_bufend_pos-old_bufend_pos);
1439 =for apidoc lex_peek_unichar
1441 Looks ahead one (Unicode) character in the text currently being lexed.
1442 Returns the codepoint (unsigned integer value) of the next character,
1443 or -1 if lexing has reached the end of the input text. To consume the
1444 peeked character, use L</lex_read_unichar>.
1446 If the next character is in (or extends into) the next chunk of input
1447 text, the next chunk will be read in. Normally the current chunk will be
1448 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1449 bit set, then the current chunk will not be discarded.
1451 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1452 is encountered, an exception is generated.
1458 Perl_lex_peek_unichar(pTHX_ U32 flags)
1462 if (flags & ~(LEX_KEEP_PREVIOUS))
1463 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1464 s = PL_parser->bufptr;
1465 bufend = PL_parser->bufend;
1471 if (!lex_next_chunk(flags))
1473 s = PL_parser->bufptr;
1474 bufend = PL_parser->bufend;
1477 if (UTF8_IS_INVARIANT(head))
1479 if (UTF8_IS_START(head)) {
1480 len = UTF8SKIP(&head);
1481 while ((STRLEN)(bufend-s) < len) {
1482 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1484 s = PL_parser->bufptr;
1485 bufend = PL_parser->bufend;
1488 unichar = utf8n_to_uvchr((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1489 if (retlen == (STRLEN)-1) {
1490 _force_out_malformed_utf8_message((U8 *) s,
1493 1 /* 1 means die */ );
1494 NOT_REACHED; /* NOTREACHED */
1499 if (!lex_next_chunk(flags))
1501 s = PL_parser->bufptr;
1508 =for apidoc lex_read_unichar
1510 Reads the next (Unicode) character in the text currently being lexed.
1511 Returns the codepoint (unsigned integer value) of the character read,
1512 and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1513 if lexing has reached the end of the input text. To non-destructively
1514 examine the next character, use L</lex_peek_unichar> instead.
1516 If the next character is in (or extends into) the next chunk of input
1517 text, the next chunk will be read in. Normally the current chunk will be
1518 discarded at the same time, but if C<flags> has the C<LEX_KEEP_PREVIOUS>
1519 bit set, then the current chunk will not be discarded.
1521 If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1522 is encountered, an exception is generated.
1528 Perl_lex_read_unichar(pTHX_ U32 flags)
1531 if (flags & ~(LEX_KEEP_PREVIOUS))
1532 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1533 c = lex_peek_unichar(flags);
1536 COPLINE_INC_WITH_HERELINES;
1538 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1540 ++(PL_parser->bufptr);
1546 =for apidoc lex_read_space
1548 Reads optional spaces, in Perl style, in the text currently being
1549 lexed. The spaces may include ordinary whitespace characters and
1550 Perl-style comments. C<#line> directives are processed if encountered.
1551 L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1552 at a non-space character (or the end of the input text).
1554 If spaces extend into the next chunk of input text, the next chunk will
1555 be read in. Normally the current chunk will be discarded at the same
1556 time, but if C<flags> has the C<LEX_KEEP_PREVIOUS> bit set, then the current
1557 chunk will not be discarded.
1562 #define LEX_NO_INCLINE 0x40000000
1563 #define LEX_NO_NEXT_CHUNK 0x80000000
1566 Perl_lex_read_space(pTHX_ U32 flags)
1569 const bool can_incline = !(flags & LEX_NO_INCLINE);
1570 bool need_incline = 0;
1571 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
1572 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1573 s = PL_parser->bufptr;
1574 bufend = PL_parser->bufend;
1580 } while (!(c == '\n' || (c == 0 && s == bufend)));
1581 } else if (c == '\n') {
1584 PL_parser->linestart = s;
1590 } else if (isSPACE(c)) {
1592 } else if (c == 0 && s == bufend) {
1595 if (flags & LEX_NO_NEXT_CHUNK)
1597 PL_parser->bufptr = s;
1598 l = CopLINE(PL_curcop);
1599 CopLINE(PL_curcop) += PL_parser->herelines + 1;
1600 got_more = lex_next_chunk(flags);
1601 CopLINE_set(PL_curcop, l);
1602 s = PL_parser->bufptr;
1603 bufend = PL_parser->bufend;
1606 if (can_incline && need_incline && PL_parser->rsfp) {
1616 PL_parser->bufptr = s;
1621 =for apidoc validate_proto
1623 This function performs syntax checking on a prototype, C<proto>.
1624 If C<warn> is true, any illegal characters or mismatched brackets
1625 will trigger illegalproto warnings, declaring that they were
1626 detected in the prototype for C<name>.
1628 The return value is C<true> if this is a valid prototype, and
1629 C<false> if it is not, regardless of whether C<warn> was C<true> or
1632 Note that C<NULL> is a valid C<proto> and will always return C<true>.
1639 Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash)
1641 STRLEN len, origlen;
1643 bool bad_proto = FALSE;
1644 bool in_brackets = FALSE;
1645 bool after_slash = FALSE;
1646 char greedy_proto = ' ';
1647 bool proto_after_greedy_proto = FALSE;
1648 bool must_be_last = FALSE;
1649 bool underscore = FALSE;
1650 bool bad_proto_after_underscore = FALSE;
1652 PERL_ARGS_ASSERT_VALIDATE_PROTO;
1657 p = SvPV(proto, len);
1659 for (; len--; p++) {
1662 proto_after_greedy_proto = TRUE;
1664 if (!memCHRs(";@%", *p))
1665 bad_proto_after_underscore = TRUE;
1668 if (!memCHRs("$@%*;[]&\\_+", *p) || *p == '\0') {
1675 in_brackets = FALSE;
1676 else if ((*p == '@' || *p == '%')
1680 must_be_last = TRUE;
1689 after_slash = FALSE;
1694 SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
1697 ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1698 origlen, UNI_DISPLAY_ISPRINT)
1699 : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1701 if (curstash && !memchr(SvPVX(name), ':', SvCUR(name))) {
1702 SV *name2 = sv_2mortal(newSVsv(PL_curstname));
1703 sv_catpvs(name2, "::");
1704 sv_catsv(name2, (SV *)name);
1708 if (proto_after_greedy_proto)
1709 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1710 "Prototype after '%c' for %" SVf " : %s",
1711 greedy_proto, SVfARG(name), p);
1713 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1714 "Missing ']' in prototype for %" SVf " : %s",
1717 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1718 "Illegal character in prototype for %" SVf " : %s",
1720 if (bad_proto_after_underscore)
1721 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1722 "Illegal character after '_' in prototype for %" SVf " : %s",
1726 return (! (proto_after_greedy_proto || bad_proto) );
1731 * This subroutine has nothing to do with tilting, whether at windmills
1732 * or pinball tables. Its name is short for "increment line". It
1733 * increments the current line number in CopLINE(PL_curcop) and checks
1734 * to see whether the line starts with a comment of the form
1735 * # line 500 "foo.pm"
1736 * If so, it sets the current line number and file to the values in the comment.
1740 S_incline(pTHX_ const char *s, const char *end)
1748 PERL_ARGS_ASSERT_INCLINE;
1752 COPLINE_INC_WITH_HERELINES;
1753 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1754 && s+1 == PL_bufend && *s == ';') {
1755 /* fake newline in string eval */
1756 CopLINE_dec(PL_curcop);
1761 while (SPACE_OR_TAB(*s))
1763 if (memBEGINs(s, (STRLEN) (end - s), "line"))
1764 s += sizeof("line") - 1;
1767 if (SPACE_OR_TAB(*s))
1771 while (SPACE_OR_TAB(*s))
1779 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
1781 while (SPACE_OR_TAB(*s))
1783 if (*s == '"' && (t = (char *) memchr(s+1, '"', end - s))) {
1789 while (*t && !isSPACE(*t))
1793 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
1795 if (*e != '\n' && *e != '\0')
1796 return; /* false alarm */
1798 if (!grok_atoUV(n, &uv, &e))
1800 line_num = ((line_t)uv) - 1;
1803 const STRLEN len = t - s;
1805 if (!PL_rsfp && !PL_parser->filtered) {
1806 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1807 * to *{"::_<newfilename"} */
1808 /* However, the long form of evals is only turned on by the
1809 debugger - usually they're "(eval %lu)" */
1810 GV * const cfgv = CopFILEGV(PL_curcop);
1813 STRLEN tmplen2 = len;
1817 if (tmplen2 + 2 <= sizeof smallbuf)
1820 Newx(tmpbuf2, tmplen2 + 2, char);
1825 memcpy(tmpbuf2 + 2, s, tmplen2);
1828 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
1830 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
1831 /* adjust ${"::_<newfilename"} to store the new file name */
1832 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
1833 /* The line number may differ. If that is the case,
1834 alias the saved lines that are in the array.
1835 Otherwise alias the whole array. */
1836 if (CopLINE(PL_curcop) == line_num) {
1837 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1838 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
1840 else if (GvAV(cfgv)) {
1841 AV * const av = GvAV(cfgv);
1842 const line_t start = CopLINE(PL_curcop)+1;
1843 SSize_t items = AvFILLp(av) - start;
1845 AV * const av2 = GvAVn(gv2);
1846 SV **svp = AvARRAY(av) + start;
1847 Size_t l = line_num+1;
1848 while (items-- && l < SSize_t_MAX && l == (line_t)l)
1849 av_store(av2, (SSize_t)l++, SvREFCNT_inc(*svp++));
1854 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
1857 CopFILE_free(PL_curcop);
1858 CopFILE_setn(PL_curcop, s, len);
1860 CopLINE_set(PL_curcop, line_num);
1864 S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
1866 AV *av = CopFILEAVx(PL_curcop);
1869 if (PL_parser->preambling == NOLINE) sv = newSV_type(SVt_PVMG);
1871 sv = *av_fetch(av, 0, 1);
1872 SvUPGRADE(sv, SVt_PVMG);
1874 if (!SvPOK(sv)) SvPVCLEAR(sv);
1876 sv_catsv(sv, orig_sv);
1878 sv_catpvn(sv, buf, len);
1883 if (PL_parser->preambling == NOLINE)
1884 av_store(av, CopLINE(PL_curcop), sv);
1890 * Called to gobble the appropriate amount and type of whitespace.
1891 * Skips comments as well.
1892 * Returns the next character after the whitespace that is skipped.
1895 * Same thing, but look ahead without incrementing line numbers or
1896 * adjusting PL_linestart.
1899 #define skipspace(s) skipspace_flags(s, 0)
1900 #define peekspace(s) skipspace_flags(s, LEX_NO_INCLINE)
1903 Perl_skipspace_flags(pTHX_ char *s, U32 flags)
1905 PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
1906 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
1907 while (s < PL_bufend && (SPACE_OR_TAB(*s) || !*s))
1910 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1912 lex_read_space(flags | LEX_KEEP_PREVIOUS |
1913 (PL_lex_inwhat || PL_lex_state == LEX_FORMLINE ?
1914 LEX_NO_NEXT_CHUNK : 0));
1916 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1917 if (PL_linestart > PL_bufptr)
1918 PL_bufptr = PL_linestart;
1926 * Check the unary operators to ensure there's no ambiguity in how they're
1927 * used. An ambiguous piece of code would be:
1929 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1930 * the +5 is its argument.
1938 if (PL_oldoldbufptr != PL_last_uni)
1940 while (isSPACE(*PL_last_uni))
1943 while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF) || *s == '-')
1944 s += UTF ? UTF8SKIP(s) : 1;
1945 if (s < PL_bufptr && memchr(s, '(', PL_bufptr - s))
1948 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1949 "Warning: Use of \"%" UTF8f "\" without parentheses is ambiguous",
1950 UTF8fARG(UTF, (int)(s - PL_last_uni), PL_last_uni));
1954 * LOP : macro to build a list operator. Its behaviour has been replaced
1955 * with a subroutine, S_lop() for which LOP is just another name.
1958 #define LOP(f,x) return lop(f,x,s)
1962 * Build a list operator (or something that might be one). The rules:
1963 * - if we have a next token, then it's a list operator (no parens) for
1964 * which the next token has already been parsed; e.g.,
1967 * - if the next thing is an opening paren, then it's a function
1968 * - else it's a list operator
1972 S_lop(pTHX_ I32 f, U8 x, char *s)
1974 PERL_ARGS_ASSERT_LOP;
1979 PL_last_lop = PL_oldbufptr;
1980 PL_last_lop_op = (OPCODE)f;
1985 return REPORT(FUNC);
1988 return REPORT(FUNC);
1991 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1992 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
1993 return REPORT(LSTOP);
1999 * When the lexer realizes it knows the next token (for instance,
2000 * it is reordering tokens for the parser) then it can call S_force_next
2001 * to know what token to return the next time the lexer is called. Caller
2002 * will need to set PL_nextval[] and possibly PL_expect to ensure
2003 * the lexer handles the token correctly.
2007 S_force_next(pTHX_ I32 type)
2011 PerlIO_printf(Perl_debug_log, "### forced token:\n");
2012 tokereport(type, &NEXTVAL_NEXTTOKE);
2015 assert(PL_nexttoke < C_ARRAY_LENGTH(PL_nexttype));
2016 PL_nexttype[PL_nexttoke] = type;
2023 * This subroutine handles postfix deref syntax after the arrow has already
2024 * been emitted. @* $* etc. are emitted as two separate tokens right here.
2025 * @[ @{ %[ %{ *{ are emitted also as two tokens, but this function emits
2026 * only the first, leaving yylex to find the next.
2030 S_postderef(pTHX_ int const funny, char const next)
2032 assert(funny == DOLSHARP || memCHRs("$@%&*", funny));
2034 PL_expect = XOPERATOR;
2035 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
2036 assert('@' == funny || '$' == funny || DOLSHARP == funny);
2037 PL_lex_state = LEX_INTERPEND;
2039 force_next(POSTJOIN);
2045 if ('@' == funny && PL_lex_state == LEX_INTERPNORMAL
2046 && !PL_lex_brackets)
2048 PL_expect = XOPERATOR;
2057 int yyc = PL_parser->yychar;
2058 if (yyc != YYEMPTY) {
2060 NEXTVAL_NEXTTOKE = PL_parser->yylval;
2061 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
2062 PL_lex_allbrackets--;
2064 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2065 } else if (yyc == '('/*)*/) {
2066 PL_lex_allbrackets--;
2071 PL_parser->yychar = YYEMPTY;
2076 S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
2078 SV * const sv = newSVpvn_utf8(start, len,
2082 && is_utf8_non_invariant_string((const U8*)start, len));
2088 * When the lexer knows the next thing is a word (for instance, it has
2089 * just seen -> and it knows that the next char is a word char, then
2090 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2094 * char *start : buffer position (must be within PL_linestr)
2095 * int token : PL_next* will be this type of bare word
2096 * (e.g., METHOD,BAREWORD)
2097 * int check_keyword : if true, Perl checks to make sure the word isn't
2098 * a keyword (do this if the word is a label, e.g. goto FOO)
2099 * int allow_pack : if true, : characters will also be allowed (require,
2100 * use, etc. do this)
2104 S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
2109 PERL_ARGS_ASSERT_FORCE_WORD;
2111 start = skipspace(start);
2113 if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
2114 || (allow_pack && *s == ':' && s[1] == ':') )
2116 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
2117 if (check_keyword) {
2118 char *s2 = PL_tokenbuf;
2120 if (allow_pack && memBEGINPs(s2, len, "CORE::")) {
2121 s2 += sizeof("CORE::") - 1;
2122 len2 -= sizeof("CORE::") - 1;
2124 if (keyword(s2, len2, 0))
2127 if (token == METHOD) {
2132 PL_expect = XOPERATOR;
2135 NEXTVAL_NEXTTOKE.opval
2136 = newSVOP(OP_CONST,0,
2137 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
2138 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
2146 * Called when the lexer wants $foo *foo &foo etc, but the program
2147 * text only contains the "foo" portion. The first argument is a pointer
2148 * to the "foo", and the second argument is the type symbol to prefix.
2149 * Forces the next token to be a "BAREWORD".
2150 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
2154 S_force_ident(pTHX_ const char *s, int kind)
2156 PERL_ARGS_ASSERT_FORCE_IDENT;
2159 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
2160 OP* const o = newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2161 UTF ? SVf_UTF8 : 0));
2162 NEXTVAL_NEXTTOKE.opval = o;
2163 force_next(BAREWORD);
2165 o->op_private = OPpCONST_ENTERED;
2166 /* XXX see note in pp_entereval() for why we forgo typo
2167 warnings if the symbol must be introduced in an eval.
2169 gv_fetchpvn_flags(s, len,
2170 (PL_in_eval ? GV_ADDMULTI
2171 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
2172 kind == '$' ? SVt_PV :
2173 kind == '@' ? SVt_PVAV :
2174 kind == '%' ? SVt_PVHV :
2182 S_force_ident_maybe_lex(pTHX_ char pit)
2184 NEXTVAL_NEXTTOKE.ival = pit;
2189 Perl_str_to_version(pTHX_ SV *sv)
2194 const char *start = SvPV_const(sv,len);
2195 const char * const end = start + len;
2196 const bool utf = cBOOL(SvUTF8(sv));
2198 PERL_ARGS_ASSERT_STR_TO_VERSION;
2200 while (start < end) {
2204 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
2209 retval += ((NV)n)/nshift;
2218 * Forces the next token to be a version number.
2219 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2220 * and if "guessing" is TRUE, then no new token is created (and the caller
2221 * must use an alternative parsing method).
2225 S_force_version(pTHX_ char *s, int guessing)
2230 PERL_ARGS_ASSERT_FORCE_VERSION;
2238 while (isDIGIT(*d) || *d == '_' || *d == '.')
2240 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
2242 s = scan_num(s, &pl_yylval);
2243 version = pl_yylval.opval;
2244 ver = cSVOPx(version)->op_sv;
2245 if (SvPOK(ver) && !SvNIOK(ver)) {
2246 SvUPGRADE(ver, SVt_PVNV);
2247 SvNV_set(ver, str_to_version(ver));
2248 SvNOK_on(ver); /* hint that it is a version */
2251 else if (guessing) {
2256 /* NOTE: The parser sees the package name and the VERSION swapped */
2257 NEXTVAL_NEXTTOKE.opval = version;
2258 force_next(BAREWORD);
2264 * S_force_strict_version
2265 * Forces the next token to be a version number using strict syntax rules.
2269 S_force_strict_version(pTHX_ char *s)
2272 const char *errstr = NULL;
2274 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2276 while (isSPACE(*s)) /* leading whitespace */
2279 if (is_STRICT_VERSION(s,&errstr)) {
2281 s = (char *)scan_version(s, ver, 0);
2282 version = newSVOP(OP_CONST, 0, ver);
2284 else if ((*s != ';' && *s != '{' && *s != '}' )
2285 && (s = skipspace(s), (*s != ';' && *s != '{' && *s != '}' )))
2289 yyerror(errstr); /* version required */
2293 /* NOTE: The parser sees the package name and the VERSION swapped */
2294 NEXTVAL_NEXTTOKE.opval = version;
2295 force_next(BAREWORD);
2302 * Turns any \\ into \ in a quoted string passed in in 'sv', returning 'sv',
2303 * modified as necessary. However, if HINT_NEW_STRING is on, 'sv' is
2304 * unchanged, and a new SV containing the modified input is returned.
2308 S_tokeq(pTHX_ SV *sv)
2315 PERL_ARGS_ASSERT_TOKEQ;
2319 assert (!SvIsCOW(sv));
2320 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1) /* <<'heredoc' */
2324 /* This is relying on the SV being "well formed" with a trailing '\0' */
2325 while (s < send && !(*s == '\\' && s[1] == '\\'))
2330 if ( PL_hints & HINT_NEW_STRING ) {
2331 pv = newSVpvn_flags(SvPVX_const(pv), SvCUR(sv),
2332 SVs_TEMP | SvUTF8(sv));
2336 if (s + 1 < send && (s[1] == '\\'))
2337 s++; /* all that, just for this */
2342 SvCUR_set(sv, d - SvPVX_const(sv));
2344 if ( PL_hints & HINT_NEW_STRING )
2345 return new_constant(NULL, 0, "q", sv, pv, "q", 1, NULL);
2350 * Now come three functions related to double-quote context,
2351 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2352 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2353 * interact with PL_lex_state, and create fake ( ... ) argument lists
2354 * to handle functions and concatenation.
2358 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
2363 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
2365 * Pattern matching will set PL_lex_op to the pattern-matching op to
2366 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
2368 * OP_CONST is easy--just make the new op and return.
2370 * Everything else becomes a FUNC.
2372 * Sets PL_lex_state to LEX_INTERPPUSH unless ival was OP_NULL or we
2373 * had an OP_CONST. This just sets us up for a
2374 * call to S_sublex_push().
2378 S_sublex_start(pTHX)
2380 const I32 op_type = pl_yylval.ival;
2382 if (op_type == OP_NULL) {
2383 pl_yylval.opval = PL_lex_op;
2387 if (op_type == OP_CONST) {
2388 SV *sv = PL_lex_stuff;
2389 PL_lex_stuff = NULL;
2392 if (SvTYPE(sv) == SVt_PVIV) {
2393 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2395 const char * const p = SvPV_const(sv, len);
2396 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
2400 pl_yylval.opval = newSVOP(op_type, 0, sv);
2404 PL_parser->lex_super_state = PL_lex_state;
2405 PL_parser->lex_sub_inwhat = (U16)op_type;
2406 PL_parser->lex_sub_op = PL_lex_op;
2407 PL_parser->sub_no_recover = FALSE;
2408 PL_parser->sub_error_count = PL_error_count;
2409 PL_lex_state = LEX_INTERPPUSH;
2413 pl_yylval.opval = PL_lex_op;
2423 * Create a new scope to save the lexing state. The scope will be
2424 * ended in S_sublex_done. Returns a '(', starting the function arguments
2425 * to the uc, lc, etc. found before.
2426 * Sets PL_lex_state to LEX_INTERPCONCAT.
2433 const bool is_heredoc = PL_multi_close == '<';
2436 PL_lex_state = PL_parser->lex_super_state;
2437 SAVEI8(PL_lex_dojoin);
2438 SAVEI32(PL_lex_brackets);
2439 SAVEI32(PL_lex_allbrackets);
2440 SAVEI32(PL_lex_formbrack);
2441 SAVEI8(PL_lex_fakeeof);
2442 SAVEI32(PL_lex_casemods);
2443 SAVEI32(PL_lex_starts);
2444 SAVEI8(PL_lex_state);
2445 SAVESPTR(PL_lex_repl);
2446 SAVEVPTR(PL_lex_inpat);
2447 SAVEI16(PL_lex_inwhat);
2450 SAVECOPLINE(PL_curcop);
2451 SAVEI32(PL_multi_end);
2452 SAVEI32(PL_parser->herelines);
2453 PL_parser->herelines = 0;
2455 SAVEIV(PL_multi_close);
2456 SAVEPPTR(PL_bufptr);
2457 SAVEPPTR(PL_bufend);
2458 SAVEPPTR(PL_oldbufptr);
2459 SAVEPPTR(PL_oldoldbufptr);
2460 SAVEPPTR(PL_last_lop);
2461 SAVEPPTR(PL_last_uni);
2462 SAVEPPTR(PL_linestart);
2463 SAVESPTR(PL_linestr);
2464 SAVEGENERICPV(PL_lex_brackstack);
2465 SAVEGENERICPV(PL_lex_casestack);
2466 SAVEGENERICPV(PL_parser->lex_shared);
2467 SAVEBOOL(PL_parser->lex_re_reparsing);
2468 SAVEI32(PL_copline);
2470 /* The here-doc parser needs to be able to peek into outer lexing
2471 scopes to find the body of the here-doc. So we put PL_linestr and
2472 PL_bufptr into lex_shared, to ‘share’ those values.
2474 PL_parser->lex_shared->ls_linestr = PL_linestr;
2475 PL_parser->lex_shared->ls_bufptr = PL_bufptr;
2477 PL_linestr = PL_lex_stuff;
2478 PL_lex_repl = PL_parser->lex_sub_repl;
2479 PL_lex_stuff = NULL;
2480 PL_parser->lex_sub_repl = NULL;
2482 /* Arrange for PL_lex_stuff to be freed on scope exit, in case it gets
2483 set for an inner quote-like operator and then an error causes scope-
2484 popping. We must not have a PL_lex_stuff value left dangling, as
2485 that breaks assumptions elsewhere. See bug #123617. */
2486 SAVEGENERICSV(PL_lex_stuff);
2487 SAVEGENERICSV(PL_parser->lex_sub_repl);
2489 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2490 = SvPVX(PL_linestr);
2491 PL_bufend += SvCUR(PL_linestr);
2492 PL_last_lop = PL_last_uni = NULL;
2493 SAVEFREESV(PL_linestr);
2494 if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
2496 PL_lex_dojoin = FALSE;
2497 PL_lex_brackets = PL_lex_formbrack = 0;
2498 PL_lex_allbrackets = 0;
2499 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2500 Newx(PL_lex_brackstack, 120, char);
2501 Newx(PL_lex_casestack, 12, char);
2502 PL_lex_casemods = 0;
2503 *PL_lex_casestack = '\0';
2505 PL_lex_state = LEX_INTERPCONCAT;
2507 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
2508 PL_copline = NOLINE;
2510 Newxz(shared, 1, LEXSHARED);
2511 shared->ls_prev = PL_parser->lex_shared;
2512 PL_parser->lex_shared = shared;
2514 PL_lex_inwhat = PL_parser->lex_sub_inwhat;
2515 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
2516 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2517 PL_lex_inpat = PL_parser->lex_sub_op;
2519 PL_lex_inpat = NULL;
2521 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2522 PL_in_eval &= ~EVAL_RE_REPARSING;
2529 * Restores lexer state after a S_sublex_push.
2535 if (!PL_lex_starts++) {
2536 SV * const sv = newSVpvs("");
2537 if (SvUTF8(PL_linestr))
2539 PL_expect = XOPERATOR;
2540 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
2544 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2545 PL_lex_state = LEX_INTERPCASEMOD;
2549 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
2550 assert(PL_lex_inwhat != OP_TRANSR);
2552 assert (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS);
2553 PL_linestr = PL_lex_repl;
2555 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2556 PL_bufend += SvCUR(PL_linestr);
2557 PL_last_lop = PL_last_uni = NULL;
2558 PL_lex_dojoin = FALSE;
2559 PL_lex_brackets = 0;
2560 PL_lex_allbrackets = 0;
2561 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
2562 PL_lex_casemods = 0;
2563 *PL_lex_casestack = '\0';
2565 if (SvEVALED(PL_lex_repl)) {
2566 PL_lex_state = LEX_INTERPNORMAL;
2568 /* we don't clear PL_lex_repl here, so that we can check later
2569 whether this is an evalled subst; that means we rely on the
2570 logic to ensure sublex_done() is called again only via the
2571 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
2574 PL_lex_state = LEX_INTERPCONCAT;
2577 if (SvTYPE(PL_linestr) >= SVt_PVNV) {
2578 CopLINE(PL_curcop) +=
2579 ((XPVNV*)SvANY(PL_linestr))->xnv_u.xnv_lines
2580 + PL_parser->herelines;
2581 PL_parser->herelines = 0;
2586 const line_t l = CopLINE(PL_curcop);
2588 if (PL_parser->sub_error_count != PL_error_count) {
2589 if (PL_parser->sub_no_recover) {
2594 if (PL_multi_close == '<')
2595 PL_parser->herelines += l - PL_multi_end;
2596 PL_bufend = SvPVX(PL_linestr);
2597 PL_bufend += SvCUR(PL_linestr);
2598 PL_expect = XOPERATOR;
2604 Perl_load_charnames(pTHX_ SV * char_name, const char * context,
2605 const STRLEN context_len, const char ** error_msg)
2607 /* Load the official _charnames module if not already there. The
2608 * parameters are just to give info for any error messages generated:
2609 * char_name a name to look up which is the reason for loading this
2610 * context 'char_name' in the context in the input in which it appears
2611 * context_len how many bytes 'context' occupies
2612 * error_msg *error_msg will be set to any error
2614 * Returns the ^H table if success; otherwise NULL */
2621 PERL_ARGS_ASSERT_LOAD_CHARNAMES;
2623 /* This loop is executed 1 1/2 times. On the first time through, if it
2624 * isn't already loaded, try loading it, and iterate just once to see if it
2626 for (i = 0; i < 2; i++) {
2627 table = GvHV(PL_hintgv); /* ^H */
2630 && (PL_hints & HINT_LOCALIZE_HH)
2631 && (cvp = hv_fetchs(table, "charnames", FALSE))
2634 return table; /* Quit if already loaded */
2638 Perl_load_module(aTHX_
2640 newSVpvs("_charnames"),
2642 /* version parameter; no need to specify it, as if we get too early
2643 * a version, will fail anyway, not being able to find 'charnames'
2652 /* Here, it failed; new_constant will give appropriate error messages */
2654 res = new_constant( NULL, 0, "charnames", char_name, NULL,
2655 context, context_len, error_msg);
2662 S_get_and_check_backslash_N_name_wrapper(pTHX_ const char* s, const char* const e)
2664 /* This justs wraps get_and_check_backslash_N_name() to output any error
2665 * message it returns. */
2667 const char * error_msg = NULL;
2670 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER;
2672 /* charnames doesn't work well if there have been errors found */
2673 if (PL_error_count > 0) {
2677 result = get_and_check_backslash_N_name(s, e, cBOOL(UTF), &error_msg);
2680 yyerror_pv(error_msg, UTF ? SVf_UTF8 : 0);
2687 Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
2688 const char* const e,
2690 const char ** error_msg)
2692 /* <s> points to first character of interior of \N{}, <e> to one beyond the
2693 * interior, hence to the "}". Finds what the name resolves to, returning
2694 * an SV* containing it; NULL if no valid one found.
2696 * 'is_utf8' is TRUE if we know we want the result to be UTF-8 even if it
2697 * doesn't have to be. */
2707 /* Points to the beginning of the \N{... so that any messages include the
2708 * context of what's failing*/
2709 const char* context = s - 3;
2710 STRLEN context_len = e - context + 1; /* include all of \N{...} */
2714 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2717 assert(s > (char *) 3);
2719 char_name = newSVpvn_flags(s, e - s, (is_utf8) ? SVf_UTF8 : 0);
2721 if (!SvCUR(char_name)) {
2722 SvREFCNT_dec_NN(char_name);
2723 /* diag_listed_as: Unknown charname '%s' */
2724 *error_msg = Perl_form(aTHX_ "Unknown charname ''");
2728 /* Autoload the charnames module */
2730 table = load_charnames(char_name, context, context_len, error_msg);
2731 if (table == NULL) {
2736 res = new_constant( NULL, 0, "charnames", char_name, NULL,
2737 context, context_len, error_msg);
2739 *error_msg = Perl_form(aTHX_ "Unknown charname '%s'", SvPVX(char_name));
2745 /* See if the charnames handler is the Perl core's, and if so, we can skip
2746 * the validation needed for a user-supplied one, as Perl's does its own
2748 cvp = hv_fetchs(table, "charnames", FALSE);
2749 if (cvp && (cv = *cvp) && SvROK(cv) && (rv = SvRV(cv),
2750 SvTYPE(rv) == SVt_PVCV) && ((stash = CvSTASH(rv)) != NULL))
2752 const char * const name = HvNAME(stash);
2753 if (memEQs(name, HvNAMELEN(stash), "_charnames")) {
2758 /* Here, it isn't Perl's charname handler. We can't rely on a
2759 * user-supplied handler to validate the input name. For non-ut8 input,
2760 * look to see that the first character is legal. Then loop through the
2761 * rest checking that each is a continuation */
2763 /* This code makes the reasonable assumption that the only Latin1-range
2764 * characters that begin a character name alias are alphabetic, otherwise
2765 * would have to create a isCHARNAME_BEGIN macro */
2768 if (! isALPHAU(*s)) {
2773 if (! isCHARNAME_CONT(*s)) {
2776 if (*s == ' ' && *(s-1) == ' ') {
2783 /* Similarly for utf8. For invariants can check directly; for other
2784 * Latin1, can calculate their code point and check; otherwise use an
2786 if (UTF8_IS_INVARIANT(*s)) {
2787 if (! isALPHAU(*s)) {
2791 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2792 if (! isALPHAU(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)))) {
2798 if (! _invlist_contains_cp(PL_utf8_charname_begin,
2799 utf8_to_uvchr_buf((U8 *) s,
2809 if (UTF8_IS_INVARIANT(*s)) {
2810 if (! isCHARNAME_CONT(*s)) {
2813 if (*s == ' ' && *(s-1) == ' ') {
2818 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2819 if (! isCHARNAME_CONT(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1))))
2826 if (! _invlist_contains_cp(PL_utf8_charname_continue,
2827 utf8_to_uvchr_buf((U8 *) s,
2837 if (*(s-1) == ' ') {
2838 /* diag_listed_as: charnames alias definitions may not contain
2839 trailing white-space; marked by <-- HERE in %s
2841 *error_msg = Perl_form(aTHX_
2842 "charnames alias definitions may not contain trailing "
2843 "white-space; marked by <-- HERE in %.*s<-- HERE %.*s",
2844 (int)(s - context + 1), context,
2845 (int)(e - s + 1), s + 1);
2849 if (SvUTF8(res)) { /* Don't accept malformed charname value */
2850 const U8* first_bad_char_loc;
2852 const char* const str = SvPV_const(res, len);
2853 if (UNLIKELY(! is_utf8_string_loc((U8 *) str, len,
2854 &first_bad_char_loc)))
2856 _force_out_malformed_utf8_message(first_bad_char_loc,
2857 (U8 *) PL_parser->bufend,
2859 0 /* 0 means don't die */ );
2860 /* diag_listed_as: Malformed UTF-8 returned by \N{%s}
2861 immediately after '%s' */
2862 *error_msg = Perl_form(aTHX_
2863 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2864 (int) context_len, context,
2865 (int) ((char *) first_bad_char_loc - str), str);
2874 /* The final %.*s makes sure that should the trailing NUL be missing
2875 * that this print won't run off the end of the string */
2876 /* diag_listed_as: Invalid character in \N{...}; marked by <-- HERE
2878 *error_msg = Perl_form(aTHX_
2879 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2880 (int)(s - context + 1), context,
2881 (int)(e - s + 1), s + 1);
2886 /* diag_listed_as: charnames alias definitions may not contain a
2887 sequence of multiple spaces; marked by <-- HERE
2889 *error_msg = Perl_form(aTHX_
2890 "charnames alias definitions may not contain a sequence of "
2891 "multiple spaces; marked by <-- HERE in %.*s<-- HERE %.*s",
2892 (int)(s - context + 1), context,
2893 (int)(e - s + 1), s + 1);
2900 Extracts the next constant part of a pattern, double-quoted string,
2901 or transliteration. This is terrifying code.
2903 For example, in parsing the double-quoted string "ab\x63$d", it would
2904 stop at the '$' and return an OP_CONST containing 'abc'.
2906 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
2907 processing a pattern (PL_lex_inpat is true), a transliteration
2908 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
2910 Returns a pointer to the character scanned up to. If this is
2911 advanced from the start pointer supplied (i.e. if anything was
2912 successfully parsed), will leave an OP_CONST for the substring scanned
2913 in pl_yylval. Caller must intuit reason for not parsing further
2914 by looking at the next characters herself.
2918 \N{FOO} => \N{U+hex_for_character_FOO}
2919 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
2922 all other \-char, including \N and \N{ apart from \N{ABC}
2925 @ and $ where it appears to be a var, but not for $ as tail anchor
2929 In transliterations:
2930 characters are VERY literal, except for - not at the start or end
2931 of the string, which indicates a range. However some backslash sequences
2932 are recognized: \r, \n, and the like
2933 \007 \o{}, \x{}, \N{}
2934 If all elements in the transliteration are below 256,
2935 scan_const expands the range to the full set of intermediate
2936 characters. If the range is in utf8, the hyphen is replaced with
2937 a certain range mark which will be handled by pmtrans() in op.c.
2939 In double-quoted strings:
2941 all those recognized in transliterations
2942 deprecated backrefs: \1 (in substitution replacements)
2943 case and quoting: \U \Q \E
2946 scan_const does *not* construct ops to handle interpolated strings.
2947 It stops processing as soon as it finds an embedded $ or @ variable
2948 and leaves it to the caller to work out what's going on.
2950 embedded arrays (whether in pattern or not) could be:
2951 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2953 $ in double-quoted strings must be the symbol of an embedded scalar.
2955 $ in pattern could be $foo or could be tail anchor. Assumption:
2956 it's a tail anchor if $ is the last thing in the string, or if it's
2957 followed by one of "()| \r\n\t"
2959 \1 (backreferences) are turned into $1 in substitutions
2961 The structure of the code is
2962 while (there's a character to process) {
2963 handle transliteration ranges
2964 skip regexp comments /(?#comment)/ and codes /(?{code})/
2965 skip #-initiated comments in //x patterns
2966 check for embedded arrays
2967 check for embedded scalars
2969 deprecate \1 in substitution replacements
2970 handle string-changing backslashes \l \U \Q \E, etc.
2971 switch (what was escaped) {
2972 handle \- in a transliteration (becomes a literal -)
2973 if a pattern and not \N{, go treat as regular character
2974 handle \132 (octal characters)
2975 handle \x15 and \x{1234} (hex characters)
2976 handle \N{name} (named characters, also \N{3,5} in a pattern)
2977 handle \cV (control characters)
2978 handle printf-style backslashes (\f, \r, \n, etc)
2981 } (end if backslash)
2982 handle regular character
2983 } (end while character to read)
2988 S_scan_const(pTHX_ char *start)
2990 char *send = PL_bufend; /* end of the constant */
2991 SV *sv = newSV(send - start); /* sv for the constant. See note below
2993 char *s = start; /* start of the constant */
2994 char *d = SvPVX(sv); /* destination for copies */
2995 bool dorange = FALSE; /* are we in a translit range? */
2996 bool didrange = FALSE; /* did we just finish a range? */
2997 bool in_charclass = FALSE; /* within /[...]/ */
2998 bool s_is_utf8 = cBOOL(UTF); /* Is the source string assumed to be
2999 UTF8? But, this can show as true
3000 when the source isn't utf8, as for
3001 example when it is entirely composed
3003 bool d_is_utf8 = FALSE; /* Output constant is UTF8 */
3004 STRLEN utf8_variant_count = 0; /* When not in UTF-8, this counts the
3005 number of characters found so far
3006 that will expand (into 2 bytes)
3007 should we have to convert to
3009 SV *res; /* result from charnames */
3010 STRLEN offset_to_max = 0; /* The offset in the output to where the range
3011 high-end character is temporarily placed */
3013 /* Does something require special handling in tr/// ? This avoids extra
3014 * work in a less likely case. As such, khw didn't feel it was worth
3015 * adding any branches to the more mainline code to handle this, which
3016 * means that this doesn't get set in some circumstances when things like
3017 * \x{100} get expanded out. As a result there needs to be extra testing
3018 * done in the tr code */
3019 bool has_above_latin1 = FALSE;
3021 /* Note on sizing: The scanned constant is placed into sv, which is
3022 * initialized by newSV() assuming one byte of output for every byte of
3023 * input. This routine expects newSV() to allocate an extra byte for a
3024 * trailing NUL, which this routine will append if it gets to the end of
3025 * the input. There may be more bytes of input than output (eg., \N{LATIN
3026 * CAPITAL LETTER A}), or more output than input if the constant ends up
3027 * recoded to utf8, but each time a construct is found that might increase
3028 * the needed size, SvGROW() is called. Its size parameter each time is
3029 * based on the best guess estimate at the time, namely the length used so
3030 * far, plus the length the current construct will occupy, plus room for
3031 * the trailing NUL, plus one byte for every input byte still unscanned */
3033 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3036 int backslash_N = 0; /* ? was the character from \N{} */
3037 int non_portable_endpoint = 0; /* ? In a range is an endpoint
3038 platform-specific like \x65 */
3041 PERL_ARGS_ASSERT_SCAN_CONST;
3043 assert(PL_lex_inwhat != OP_TRANSR);
3045 /* Protect sv from errors and fatal warnings. */
3046 ENTER_with_name("scan_const");
3049 /* A bunch of code in the loop below assumes that if s[n] exists and is not
3050 * NUL, then s[n+1] exists. This assertion makes sure that assumption is
3052 assert(*send == '\0');
3055 || dorange /* Handle tr/// range at right edge of input */
3058 /* get transliterations out of the way (they're most literal) */
3059 if (PL_lex_inwhat == OP_TRANS) {
3061 /* But there isn't any special handling necessary unless there is a
3062 * range, so for most cases we just drop down and handle the value
3063 * as any other. There are two exceptions.
3065 * 1. A hyphen indicates that we are actually going to have a
3066 * range. In this case, skip the '-', set a flag, then drop
3067 * down to handle what should be the end range value.
3068 * 2. After we've handled that value, the next time through, that
3069 * flag is set and we fix up the range.
3071 * Ranges entirely within Latin1 are expanded out entirely, in
3072 * order to make the transliteration a simple table look-up.
3073 * Ranges that extend above Latin1 have to be done differently, so
3074 * there is no advantage to expanding them here, so they are
3075 * stored here as Min, RANGE_INDICATOR, Max. 'RANGE_INDICATOR' is
3076 * a byte that can't occur in legal UTF-8, and hence can signify a
3077 * hyphen without any possible ambiguity. On EBCDIC machines, if
3078 * the range is expressed as Unicode, the Latin1 portion is
3079 * expanded out even if the range extends above Latin1. This is
3080 * because each code point in it has to be processed here
3081 * individually to get its native translation */
3085 /* Here, we don't think we're in a range. If the new character
3086 * is not a hyphen; or if it is a hyphen, but it's too close to
3087 * either edge to indicate a range, or if we haven't output any
3088 * characters yet then it's a regular character. */
3089 if (*s != '-' || s >= send - 1 || s == start || d == SvPVX(sv))
3092 /* A regular character. Process like any other, but first
3093 * clear any flags */
3097 non_portable_endpoint = 0;
3100 /* The tests here for being above Latin1 and similar ones
3101 * in the following 'else' suffice to find all such
3102 * occurences in the constant, except those added by a
3103 * backslash escape sequence, like \x{100}. Mostly, those
3104 * set 'has_above_latin1' as appropriate */
3105 if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3106 has_above_latin1 = TRUE;
3109 /* Drops down to generic code to process current byte */
3111 else { /* Is a '-' in the context where it means a range */
3112 if (didrange) { /* Something like y/A-C-Z// */
3113 Perl_croak(aTHX_ "Ambiguous range in transliteration"
3119 s++; /* Skip past the hyphen */
3121 /* d now points to where the end-range character will be
3122 * placed. Drop down to get that character. We'll finish
3123 * processing the range the next time through the loop */
3125 if (s_is_utf8 && UTF8_IS_ABOVE_LATIN1(*s)) {
3126 has_above_latin1 = TRUE;
3129 /* Drops down to generic code to process current byte */
3131 } /* End of not a range */
3133 /* Here we have parsed a range. Now must handle it. At this
3135 * 'sv' is a SV* that contains the output string we are
3136 * constructing. The final two characters in that string
3137 * are the range start and range end, in order.
3138 * 'd' points to just beyond the range end in the 'sv' string,
3139 * where we would next place something
3144 IV range_max; /* last character in range */
3146 Size_t offset_to_min = 0;
3149 bool convert_unicode;
3150 IV real_range_max = 0;
3152 /* Get the code point values of the range ends. */
3153 max_ptr = (d_is_utf8) ? (char *) utf8_hop( (U8*) d, -1) : d - 1;
3154 offset_to_max = max_ptr - SvPVX_const(sv);
3156 /* We know the utf8 is valid, because we just constructed
3157 * it ourselves in previous loop iterations */
3158 min_ptr = (char*) utf8_hop( (U8*) max_ptr, -1);
3159 range_min = valid_utf8_to_uvchr( (U8*) min_ptr, NULL);
3160 range_max = valid_utf8_to_uvchr( (U8*) max_ptr, NULL);
3162 /* This compensates for not all code setting
3163 * 'has_above_latin1', so that we don't skip stuff that
3164 * should be executed */
3165 if (range_max > 255) {
3166 has_above_latin1 = TRUE;
3170 min_ptr = max_ptr - 1;
3171 range_min = * (U8*) min_ptr;
3172 range_max = * (U8*) max_ptr;
3175 /* If the range is just a single code point, like tr/a-a/.../,
3176 * that code point is already in the output, twice. We can
3177 * just back up over the second instance and avoid all the rest
3178 * of the work. But if it is a variant character, it's been
3179 * counted twice, so decrement. (This unlikely scenario is
3180 * special cased, like the one for a range of 2 code points
3181 * below, only because the main-line code below needs a range
3182 * of 3 or more to work without special casing. Might as well
3183 * get it out of the way now.) */
3184 if (UNLIKELY(range_max == range_min)) {
3186 if (! d_is_utf8 && ! UVCHR_IS_INVARIANT(range_max)) {
3187 utf8_variant_count--;
3193 /* On EBCDIC platforms, we may have to deal with portable
3194 * ranges. These happen if at least one range endpoint is a
3195 * Unicode value (\N{...}), or if the range is a subset of
3196 * [A-Z] or [a-z], and both ends are literal characters,
3197 * like 'A', and not like \x{C1} */
3199 cBOOL(backslash_N) /* \N{} forces Unicode,
3200 hence portable range */
3201 || ( ! non_portable_endpoint
3202 && (( isLOWER_A(range_min) && isLOWER_A(range_max))
3203 || (isUPPER_A(range_min) && isUPPER_A(range_max))));
3204 if (convert_unicode) {
3206 /* Special handling is needed for these portable ranges.
3207 * They are defined to be in Unicode terms, which includes
3208 * all the Unicode code points between the end points.
3209 * Convert to Unicode to get the Unicode range. Later we
3210 * will convert each code point in the range back to
3212 range_min = NATIVE_TO_UNI(range_min);
3213 range_max = NATIVE_TO_UNI(range_max);
3217 if (range_min > range_max) {
3219 if (convert_unicode) {
3220 /* Need to convert back to native for meaningful
3221 * messages for this platform */
3222 range_min = UNI_TO_NATIVE(range_min);
3223 range_max = UNI_TO_NATIVE(range_max);
3226 /* Use the characters themselves for the error message if
3227 * ASCII printables; otherwise some visible representation
3229 if (isPRINT_A(range_min) && isPRINT_A(range_max)) {
3231 "Invalid range \"%c-%c\" in transliteration operator",
3232 (char)range_min, (char)range_max);
3235 else if (convert_unicode) {
3236 /* diag_listed_as: Invalid range "%s" in transliteration operator */
3238 "Invalid range \"\\N{U+%04" UVXf "}-\\N{U+%04"
3239 UVXf "}\" in transliteration operator",
3240 range_min, range_max);
3244 /* diag_listed_as: Invalid range "%s" in transliteration operator */
3246 "Invalid range \"\\x{%04" UVXf "}-\\x{%04" UVXf "}\""
3247 " in transliteration operator",
3248 range_min, range_max);
3252 /* If the range is exactly two code points long, they are
3253 * already both in the output */
3254 if (UNLIKELY(range_min + 1 == range_max)) {
3258 /* Here the range contains at least 3 code points */
3262 /* If everything in the transliteration is below 256, we
3263 * can avoid special handling later. A translation table
3264 * for each of those bytes is created by op.c. So we
3265 * expand out all ranges to their constituent code points.
3266 * But if we've encountered something above 255, the
3267 * expanding won't help, so skip doing that. But if it's
3268 * EBCDIC, we may have to look at each character below 256
3269 * if we have to convert to/from Unicode values */
3270 if ( has_above_latin1
3272 && (range_min > 255 || ! convert_unicode)
3275 const STRLEN off = d - SvPVX(sv);
3276 const STRLEN extra = 1 + (send - s) + 1;
3279 /* Move the high character one byte to the right; then
3280 * insert between it and the range begin, an illegal
3281 * byte which serves to indicate this is a range (using
3282 * a '-' would be ambiguous). */
3284 if (off + extra > SvLEN(sv)) {
3285 d = off + SvGROW(sv, off + extra);
3286 max_ptr = d - off + offset_to_max;
3290 while (e-- > max_ptr) {
3293 *(e + 1) = (char) RANGE_INDICATOR;
3297 /* Here, we're going to expand out the range. For EBCDIC
3298 * the range can extend above 255 (not so in ASCII), so
3299 * for EBCDIC, split it into the parts above and below
3302 if (range_max > 255) {
3303 real_range_max = range_max;
3309 /* Here we need to expand out the string to contain each
3310 * character in the range. Grow the output to handle this.
3311 * For non-UTF8, we need a byte for each code point in the
3312 * range, minus the three that we've already allocated for: the
3313 * hyphen, the min, and the max. For UTF-8, we need this
3314 * plus an extra byte for each code point that occupies two
3315 * bytes (is variant) when in UTF-8 (except we've already
3316 * allocated for the end points, including if they are
3317 * variants). For ASCII platforms and Unicode ranges on EBCDIC
3318 * platforms, it's easy to calculate a precise number. To
3319 * start, we count the variants in the range, which we need
3320 * elsewhere in this function anyway. (For the case where it
3321 * isn't easy to calculate, 'extras' has been initialized to 0,
3322 * and the calculation is done in a loop further down.) */
3324 if (convert_unicode)
3327 /* This is executed unconditionally on ASCII, and for
3328 * Unicode ranges on EBCDIC. Under these conditions, all
3329 * code points above a certain value are variant; and none
3330 * under that value are. We just need to find out how much
3331 * of the range is above that value. We don't count the
3332 * end points here, as they will already have been counted
3333 * as they were parsed. */
3334 if (range_min >= UTF_CONTINUATION_MARK) {
3336 /* The whole range is made up of variants */
3337 extras = (range_max - 1) - (range_min + 1) + 1;
3339 else if (range_max >= UTF_CONTINUATION_MARK) {
3341 /* Only the higher portion of the range is variants */
3342 extras = (range_max - 1) - UTF_CONTINUATION_MARK + 1;
3345 utf8_variant_count += extras;
3348 /* The base growth is the number of code points in the range,
3349 * not including the endpoints, which have already been sized
3350 * for (and output). We don't subtract for the hyphen, as it
3351 * has been parsed but not output, and the SvGROW below is
3352 * based only on what's been output plus what's left to parse.
3354 grow = (range_max - 1) - (range_min + 1) + 1;
3358 /* In some cases in EBCDIC, we haven't yet calculated a
3359 * precise amount needed for the UTF-8 variants. Just
3360 * assume the worst case, that everything will expand by a
3362 if (! convert_unicode) {
3368 /* Otherwise we know exactly how many variants there
3369 * are in the range. */
3374 /* Grow, but position the output to overwrite the range min end
3375 * point, because in some cases we overwrite that */
3376 SvCUR_set(sv, d - SvPVX_const(sv));
3377 offset_to_min = min_ptr - SvPVX_const(sv);
3379 /* See Note on sizing above. */
3380 d = offset_to_min + SvGROW(sv, SvCUR(sv)
3383 + 1 /* Trailing NUL */ );
3385 /* Now, we can expand out the range. */
3387 if (convert_unicode) {
3390 /* Recall that the min and max are now in Unicode terms, so
3391 * we have to convert each character to its native
3394 for (i = range_min; i <= range_max; i++) {
3395 append_utf8_from_native_byte(
3396 LATIN1_TO_NATIVE((U8) i),
3401 for (i = range_min; i <= range_max; i++) {
3402 *d++ = (char)LATIN1_TO_NATIVE((U8) i);
3408 /* Always gets run for ASCII, and sometimes for EBCDIC. */
3410 /* Here, no conversions are necessary, which means that the
3411 * first character in the range is already in 'd' and
3412 * valid, so we can skip overwriting it */
3416 for (i = range_min + 1; i <= range_max; i++) {
3417 append_utf8_from_native_byte((U8) i, (U8 **) &d);
3423 assert(range_min + 1 <= range_max);
3424 for (i = range_min + 1; i < range_max; i++) {
3426 /* In this case on EBCDIC, we haven't calculated
3427 * the variants. Do it here, as we go along */
3428 if (! UVCHR_IS_INVARIANT(i)) {
3429 utf8_variant_count++;
3435 /* The range_max is done outside the loop so as to
3436 * avoid having to special case not incrementing
3437 * 'utf8_variant_count' on EBCDIC (it's already been
3438 * counted when originally parsed) */
3439 *d++ = (char) range_max;
3444 /* If the original range extended above 255, add in that
3446 if (real_range_max) {
3447 *d++ = (char) UTF8_TWO_BYTE_HI(0x100);
3448 *d++ = (char) UTF8_TWO_BYTE_LO(0x100);
3449 if (real_range_max > 0x100) {
3450 if (real_range_max > 0x101) {
3451 *d++ = (char) RANGE_INDICATOR;
3453 d = (char*)uvchr_to_utf8((U8*)d, real_range_max);
3459 /* mark the range as done, and continue */
3463 non_portable_endpoint = 0;
3467 } /* End of is a range */
3468 } /* End of transliteration. Joins main code after these else's */
3469 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3472 while (s1 >= start && *s1-- == '\\')
3475 in_charclass = TRUE;
3477 else if (*s == ']' && PL_lex_inpat && in_charclass) {
3480 while (s1 >= start && *s1-- == '\\')
3483 in_charclass = FALSE;
3485 /* skip for regexp comments /(?#comment)/, except for the last
3486 * char, which will be done separately. Stop on (?{..}) and
3488 else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
3491 PERL_UINT_FAST8_T len = UTF8SKIP(s);
3493 while (s + len < send && *s != ')') {
3494 Copy(s, d, len, U8);
3497 len = UTF8_SAFE_SKIP(s, send);
3500 else while (s+1 < send && *s != ')') {
3504 else if (!PL_lex_casemods
3505 && ( s[2] == '{' /* This should match regcomp.c */
3506 || (s[2] == '?' && s[3] == '{')))
3511 /* likewise skip #-initiated comments in //x patterns */
3515 && ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED)
3517 while (s < send && *s != '\n')
3520 /* no further processing of single-quoted regex */
3521 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3522 goto default_action;
3524 /* check for embedded arrays
3525 * (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
3527 else if (*s == '@' && s[1]) {
3529 ? isIDFIRST_utf8_safe(s+1, send)
3530 : isWORDCHAR_A(s[1]))
3534 if (memCHRs(":'{$", s[1]))
3536 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3537 break; /* in regexp, neither @+ nor @- are interpolated */
3539 /* check for embedded scalars. only stop if we're sure it's a
3541 else if (*s == '$') {
3542 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
3544 if (s + 1 < send && !memCHRs("()| \r\n\t", s[1])) {
3546 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3547 "Possible unintended interpolation of $\\ in regex");
3549 break; /* in regexp, $ might be tail anchor */
3553 /* End of else if chain - OP_TRANS rejoin rest */
3555 if (UNLIKELY(s >= send)) {
3561 if (*s == '\\' && s+1 < send) {
3562 char* e; /* Can be used for ending '}', etc. */
3566 /* warn on \1 - \9 in substitution replacements, but note that \11
3567 * is an octal; and \19 is \1 followed by '9' */
3568 if (PL_lex_inwhat == OP_SUBST
3574 /* diag_listed_as: \%d better written as $%d */
3575 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
3580 /* string-change backslash escapes */
3581 if (PL_lex_inwhat != OP_TRANS && *s && memCHRs("lLuUEQF", *s)) {
3585 /* In a pattern, process \N, but skip any other backslash escapes.
3586 * This is because we don't want to translate an escape sequence
3587 * into a meta symbol and have the regex compiler use the meta
3588 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3589 * in spite of this, we do have to process \N here while the proper
3590 * charnames handler is in scope. See bugs #56444 and #62056.
3592 * There is a complication because \N in a pattern may also stand
3593 * for 'match a non-nl', and not mean a charname, in which case its
3594 * processing should be deferred to the regex compiler. To be a
3595 * charname it must be followed immediately by a '{', and not look
3596 * like \N followed by a curly quantifier, i.e., not something like
3597 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3599 else if (PL_lex_inpat
3602 || regcurly(s + 1)))
3605 goto default_action;
3611 if ((isALPHANUMERIC(*s)))
3612 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3613 "Unrecognized escape \\%c passed through",
3615 /* default action is to copy the quoted character */
3616 goto default_action;
3619 /* eg. \132 indicates the octal constant 0132 */
3620 case '0': case '1': case '2': case '3':
3621 case '4': case '5': case '6': case '7':
3623 I32 flags = PERL_SCAN_SILENT_ILLDIGIT
3624 | PERL_SCAN_NOTIFY_ILLDIGIT;
3626 uv = grok_oct(s, &len, &flags, NULL);
3628 if ( (flags & PERL_SCAN_NOTIFY_ILLDIGIT)
3630 && isDIGIT(*s) /* like \08, \178 */
3631 && ckWARN(WARN_MISC))
3633 Perl_warner(aTHX_ packWARN(WARN_MISC), "%s",
3634 form_alien_digit_msg(8, len, s, send, UTF, FALSE));
3637 goto NUM_ESCAPE_INSERT;
3639 /* eg. \o{24} indicates the octal constant \024 */
3644 if (! grok_bslash_o(&s, send,
3647 FALSE, /* Not strict */
3648 FALSE, /* No illegal cp's */
3652 uv = 0; /* drop through to ensure range ends are set */
3654 goto NUM_ESCAPE_INSERT;
3657 /* eg. \x24 indicates the hex constant 0x24 */
3662 if (! grok_bslash_x(&s, send,
3665 FALSE, /* Not strict */
3666 FALSE, /* No illegal cp's */
3670 uv = 0; /* drop through to ensure range ends are set */
3675 /* Insert oct or hex escaped character. */
3677 /* Here uv is the ordinal of the next character being added */
3678 if (UVCHR_IS_INVARIANT(uv)) {
3682 if (!d_is_utf8 && uv > 255) {
3684 /* Here, 'uv' won't fit unless we convert to UTF-8.
3685 * If we've only seen invariants so far, all we have to
3686 * do is turn on the flag */
3687 if (utf8_variant_count == 0) {
3691 SvCUR_set(sv, d - SvPVX_const(sv));
3695 sv_utf8_upgrade_flags_grow(
3697 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3699 /* Since we're having to grow here,
3700 * make sure we have enough room for
3701 * this escape and a NUL, so the
3702 * code immediately below won't have
3703 * to actually grow again */
3705 + (STRLEN)(send - s) + 1);
3706 d = SvPVX(sv) + SvCUR(sv);
3709 has_above_latin1 = TRUE;
3715 utf8_variant_count++;
3718 /* Usually, there will already be enough room in 'sv'
3719 * since such escapes are likely longer than any UTF-8
3720 * sequence they can end up as. This isn't the case on
3721 * EBCDIC where \x{40000000} contains 12 bytes, and the
3722 * UTF-8 for it contains 14. And, we have to allow for
3723 * a trailing NUL. It probably can't happen on ASCII
3724 * platforms, but be safe. See Note on sizing above. */
3725 const STRLEN needed = d - SvPVX(sv)
3729 if (UNLIKELY(needed > SvLEN(sv))) {
3730 SvCUR_set(sv, d - SvPVX_const(sv));
3731 d = SvCUR(sv) + SvGROW(sv, needed);
3734 d = (char*) uvchr_to_utf8_flags((U8*)d, uv,
3735 (ckWARN(WARN_PORTABLE))
3736 ? UNICODE_WARN_PERL_EXTENDED
3741 non_portable_endpoint++;
3746 /* In a non-pattern \N must be like \N{U+0041}, or it can be a
3747 * named character, like \N{LATIN SMALL LETTER A}, or a named
3748 * sequence, like \N{LATIN CAPITAL LETTER A WITH MACRON AND
3749 * GRAVE} (except y/// can't handle the latter, croaking). For
3750 * convenience all three forms are referred to as "named
3751 * characters" below.
3753 * For patterns, \N also can mean to match a non-newline. Code
3754 * before this 'switch' statement should already have handled
3755 * this situation, and hence this code only has to deal with
3756 * the named character cases.
3758 * For non-patterns, the named characters are converted to
3759 * their string equivalents. In patterns, named characters are
3760 * not converted to their ultimate forms for the same reasons
3761 * that other escapes aren't (mainly that the ultimate
3762 * character could be considered a meta-symbol by the regex
3763 * compiler). Instead, they are converted to the \N{U+...}
3764 * form to get the value from the charnames that is in effect
3765 * right now, while preserving the fact that it was a named
3766 * character, so that the regex compiler knows this.
3768 * The structure of this section of code (besides checking for
3769 * errors and upgrading to utf8) is:
3770 * If the named character is of the form \N{U+...}, pass it
3771 * through if a pattern; otherwise convert the code point
3773 * Otherwise must be some \N{NAME}: convert to
3774 * \N{U+c1.c2...} if a pattern; otherwise convert to utf8
3776 * Transliteration is an exception. The conversion to utf8 is
3777 * only done if the code point requires it to be representable.
3779 * Here, 's' points to the 'N'; the test below is guaranteed to
3780 * succeed if we are being called on a pattern, as we already
3781 * know from a test above that the next character is a '{'. A
3782 * non-pattern \N must mean 'named character', which requires
3786 yyerror("Missing braces on \\N{}");
3792 /* If there is no matching '}', it is an error. */
3793 if (! (e = (char *) memchr(s, '}', send - s))) {
3794 if (! PL_lex_inpat) {
3795 yyerror("Missing right brace on \\N{}");
3797 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N");
3799 yyquit(); /* Have exhausted the input. */
3802 /* Here it looks like a named character */
3804 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3805 s += 2; /* Skip to next char after the 'U+' */
3808 /* In patterns, we can have \N{U+xxxx.yyyy.zzzz...} */
3809 /* Check the syntax. */
3812 if (!isXDIGIT(*s)) {
3815 "Invalid hexadecimal number in \\N{U+...}"
3824 else if ((*s == '.' || *s == '_')
3830 /* Pass everything through unchanged.
3831 * +1 is for the '}' */
3832 Copy(orig_s, d, e - orig_s + 1, char);
3833 d += e - orig_s + 1;
3835 else { /* Not a pattern: convert the hex to string */
3836 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3837 | PERL_SCAN_SILENT_ILLDIGIT
3838 | PERL_SCAN_SILENT_OVERFLOW
3839 | PERL_SCAN_DISALLOW_PREFIX;
3842 uv = grok_hex(s, &len, &flags, NULL);
3843 if (len == 0 || (len != (STRLEN)(e - s)))
3846 if ( uv > MAX_LEGAL_CP
3847 || (flags & PERL_SCAN_GREATER_THAN_UV_MAX))
3849 yyerror(form_cp_too_large_msg(16, s, len, 0));
3850 uv = 0; /* drop through to ensure range ends are
3854 /* For non-tr///, if the destination is not in utf8,
3855 * unconditionally recode it to be so. This is
3856 * because \N{} implies Unicode semantics, and scalars
3857 * have to be in utf8 to guarantee those semantics.
3858 * tr/// doesn't care about Unicode rules, so no need
3859 * there to upgrade to UTF-8 for small enough code
3861 if (! d_is_utf8 && ( uv > 0xFF
3862 || PL_lex_inwhat != OP_TRANS))
3864 /* See Note on sizing above. */
3865 const STRLEN extra = OFFUNISKIP(uv) + (send - e) + 1;
3867 SvCUR_set(sv, d - SvPVX_const(sv));
3871 if (utf8_variant_count == 0) {
3873 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
3876 sv_utf8_upgrade_flags_grow(
3878 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3880 d = SvPVX(sv) + SvCUR(sv);
3884 has_above_latin1 = TRUE;
3887 /* Add the (Unicode) code point to the output. */
3888 if (! d_is_utf8 || OFFUNI_IS_INVARIANT(uv)) {
3889 *d++ = (char) LATIN1_TO_NATIVE(uv);
3892 d = (char*) uvoffuni_to_utf8_flags((U8*)d, uv,
3893 (ckWARN(WARN_PORTABLE))
3894 ? UNICODE_WARN_PERL_EXTENDED
3899 else /* Here is \N{NAME} but not \N{U+...}. */
3900 if (! (res = get_and_check_backslash_N_name_wrapper(s, e)))
3901 { /* Failed. We should die eventually, but for now use a NUL
3905 else { /* Successfully evaluated the name */
3907 const char *str = SvPV_const(res, len);
3910 if (! len) { /* The name resolved to an empty string */
3911 const char empty_N[] = "\\N{_}";
3912 Copy(empty_N, d, sizeof(empty_N) - 1, char);
3913 d += sizeof(empty_N) - 1;
3916 /* In order to not lose information for the regex
3917 * compiler, pass the result in the specially made
3918 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3919 * the code points in hex of each character
3920 * returned by charnames */
3922 const char *str_end = str + len;
3923 const STRLEN off = d - SvPVX_const(sv);
3925 if (! SvUTF8(res)) {
3926 /* For the non-UTF-8 case, we can determine the
3927 * exact length needed without having to parse
3928 * through the string. Each character takes up
3929 * 2 hex digits plus either a trailing dot or
3931 const char initial_text[] = "\\N{U+";
3932 const STRLEN initial_len = sizeof(initial_text)
3934 d = off + SvGROW(sv, off
3937 /* +1 for trailing NUL */
3940 + (STRLEN)(send - e));
3941 Copy(initial_text, d, initial_len, char);
3943 while (str < str_end) {
3946 my_snprintf(hex_string,
3950 /* The regex compiler is
3951 * expecting Unicode, not
3953 NATIVE_TO_LATIN1(*str));
3954 PERL_MY_SNPRINTF_POST_GUARD(len,
3955 sizeof(hex_string));
3956 Copy(hex_string, d, 3, char);
3960 d--; /* Below, we will overwrite the final
3961 dot with a right brace */
3964 STRLEN char_length; /* cur char's byte length */
3966 /* and the number of bytes after this is
3967 * translated into hex digits */
3968 STRLEN output_length;
3970 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3971 * for max('U+', '.'); and 1 for NUL */
3972 char hex_string[2 * UTF8_MAXBYTES + 5];
3974 /* Get the first character of the result. */
3975 U32 uv = utf8n_to_uvchr((U8 *) str,
3979 /* Convert first code point to Unicode hex,
3980 * including the boiler plate before it. */
3982 my_snprintf(hex_string, sizeof(hex_string),
3984 (unsigned int) NATIVE_TO_UNI(uv));
3986 /* Make sure there is enough space to hold it */
3987 d = off + SvGROW(sv, off
3989 + (STRLEN)(send - e)
3990 + 2); /* '}' + NUL */
3992 Copy(hex_string, d, output_length, char);
3995 /* For each subsequent character, append dot and
3996 * its Unicode code point in hex */
3997 while ((str += char_length) < str_end) {
3998 const STRLEN off = d - SvPVX_const(sv);
3999 U32 uv = utf8n_to_uvchr((U8 *) str,
4004 my_snprintf(hex_string,
4007 (unsigned int) NATIVE_TO_UNI(uv));
4009 d = off + SvGROW(sv, off
4011 + (STRLEN)(send - e)
4012 + 2); /* '}' + NUL */
4013 Copy(hex_string, d, output_length, char);
4018 *d++ = '}'; /* Done. Add the trailing brace */
4021 else { /* Here, not in a pattern. Convert the name to a
4024 if (PL_lex_inwhat == OP_TRANS) {
4025 str = SvPV_const(res, len);
4026 if (len > ((SvUTF8(res))
4030 yyerror(Perl_form(aTHX_
4031 "%.*s must not be a named sequence"
4032 " in transliteration operator",
4033 /* +1 to include the "}" */
4034 (int) (e + 1 - start), start));
4036 goto end_backslash_N;
4039 if (SvUTF8(res) && UTF8_IS_ABOVE_LATIN1(*str)) {
4040 has_above_latin1 = TRUE;
4044 else if (! SvUTF8(res)) {
4045 /* Make sure \N{} return is UTF-8. This is because
4046 * \N{} implies Unicode semantics, and scalars have
4047 * to be in utf8 to guarantee those semantics; but
4048 * not needed in tr/// */
4049 sv_utf8_upgrade_flags(res, 0);
4050 str = SvPV_const(res, len);
4053 /* Upgrade destination to be utf8 if this new
4055 if (! d_is_utf8 && SvUTF8(res)) {
4056 /* See Note on sizing above. */
4057 const STRLEN extra = len + (send - s) + 1;
4059 SvCUR_set(sv, d - SvPVX_const(sv));
4063 if (utf8_variant_count == 0) {
4065 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + extra);
4068 sv_utf8_upgrade_flags_grow(sv,
4069 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4071 d = SvPVX(sv) + SvCUR(sv);
4074 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
4076 /* See Note on sizing above. (NOTE: SvCUR() is not
4077 * set correctly here). */
4078 const STRLEN extra = len + (send - e) + 1;
4079 const STRLEN off = d - SvPVX_const(sv);
4080 d = off + SvGROW(sv, off + extra);
4082 Copy(str, d, len, char);
4088 } /* End \N{NAME} */
4092 backslash_N++; /* \N{} is defined to be Unicode */
4094 s = e + 1; /* Point to just after the '}' */
4097 /* \c is a control character */
4101 const char * message;
4103 if (! grok_bslash_c(*s, (U8 *) d, &message, NULL)) {
4105 yyquit(); /* Have always immediately croaked on
4111 yyerror("Missing control char name in \\c");
4112 yyquit(); /* Are at end of input, no sense continuing */
4115 non_portable_endpoint++;
4119 /* printf-style backslashes, formfeeds, newlines, etc */
4145 } /* end if (backslash) */
4148 /* Just copy the input to the output, though we may have to convert
4151 * If the input has the same representation in UTF-8 as not, it will be
4152 * a single byte, and we don't care about UTF8ness; just copy the byte */
4153 if (NATIVE_BYTE_IS_INVARIANT((U8)(*s))) {
4156 else if (! s_is_utf8 && ! d_is_utf8) {
4157 /* If neither source nor output is UTF-8, is also a single byte,
4158 * just copy it; but this byte counts should we later have to
4159 * convert to UTF-8 */
4161 utf8_variant_count++;
4163 else if (s_is_utf8 && d_is_utf8) { /* Both UTF-8, can just copy */
4164 const STRLEN len = UTF8SKIP(s);
4166 /* We expect the source to have already been checked for
4168 assert(isUTF8_CHAR((U8 *) s, (U8 *) send));
4170 Copy(s, d, len, U8);
4174 else if (s_is_utf8) { /* UTF8ness matters: convert output to utf8 */
4175 STRLEN need = send - s + 1; /* See Note on sizing above. */
4177 SvCUR_set(sv, d - SvPVX_const(sv));
4181 if (utf8_variant_count == 0) {
4183 d = SvCUR(sv) + SvGROW(sv, SvCUR(sv) + need);
4186 sv_utf8_upgrade_flags_grow(sv,
4187 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4189 d = SvPVX(sv) + SvCUR(sv);
4192 goto default_action; /* Redo, having upgraded so both are UTF-8 */
4194 else { /* UTF8ness matters: convert this non-UTF8 source char to
4195 UTF-8 for output. It will occupy 2 bytes, but don't include
4196 the input byte since we haven't incremented 's' yet. See
4197 Note on sizing above. */
4198 const STRLEN off = d - SvPVX(sv);
4199 const STRLEN extra = 2 + (send - s - 1) + 1;
4200 if (off + extra > SvLEN(sv)) {
4201 d = off + SvGROW(sv, off + extra);
4203 *d++ = UTF8_EIGHT_BIT_HI(*s);
4204 *d++ = UTF8_EIGHT_BIT_LO(*s);
4207 } /* while loop to process each character */
4210 const STRLEN off = d - SvPVX(sv);
4212 /* See if room for the terminating NUL */
4213 if (UNLIKELY(off >= SvLEN(sv))) {
4217 if (off > SvLEN(sv))
4219 Perl_croak(aTHX_ "panic: constant overflowed allocated space,"
4220 " %" UVuf " >= %" UVuf, (UV)off, (UV)SvLEN(sv));
4222 /* Whew! Here we don't have room for the terminating NUL, but
4223 * everything else so far has fit. It's not too late to grow
4224 * to fit the NUL and continue on. But it is a bug, as the code
4225 * above was supposed to have made room for this, so under
4226 * DEBUGGING builds, we panic anyway. */
4227 d = off + SvGROW(sv, off + 1);
4231 /* terminate the string and set up the sv */
4233 SvCUR_set(sv, d - SvPVX_const(sv));
4240 /* shrink the sv if we allocated more than we used */
4241 if (SvCUR(sv) + 5 < SvLEN(sv)) {
4242 SvPV_shrink_to_cur(sv);
4245 /* return the substring (via pl_yylval) only if we parsed anything */
4248 for (; s2 < s; s2++) {
4250 COPLINE_INC_WITH_HERELINES;
4252 SvREFCNT_inc_simple_void_NN(sv);
4253 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
4254 && ! PL_parser->lex_re_reparsing)
4256 const char *const key = PL_lex_inpat ? "qr" : "q";
4257 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
4261 if (PL_lex_inwhat == OP_TRANS) {
4264 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
4267 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
4275 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
4276 type, typelen, NULL);
4278 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
4280 LEAVE_with_name("scan_const");
4285 * Returns TRUE if there's more to the expression (e.g., a subscript),
4288 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
4290 * ->[ and ->{ return TRUE
4291 * ->$* ->$#* ->@* ->@[ ->@{ return TRUE if postderef_qq is enabled
4292 * { and [ outside a pattern are always subscripts, so return TRUE
4293 * if we're outside a pattern and it's not { or [, then return FALSE
4294 * if we're in a pattern and the first char is a {
4295 * {4,5} (any digits around the comma) returns FALSE
4296 * if we're in a pattern and the first char is a [
4298 * [SOMETHING] has a funky algorithm to decide whether it's a
4299 * character class or not. It has to deal with things like
4300 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
4301 * anything else returns TRUE
4304 /* This is the one truly awful dwimmer necessary to conflate C and sed. */
4307 S_intuit_more(pTHX_ char *s, char *e)
4309 PERL_ARGS_ASSERT_INTUIT_MORE;
4311 if (PL_lex_brackets)
4313 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
4315 if (*s == '-' && s[1] == '>'
4316 && FEATURE_POSTDEREF_QQ_IS_ENABLED
4317 && ( (s[2] == '$' && (s[3] == '*' || (s[3] == '#' && s[4] == '*')))
4318 ||(s[2] == '@' && memCHRs("*[{",s[3])) ))
4320 if (*s != '{' && *s != '[')
4322 PL_parser->sub_no_recover = TRUE;
4326 /* In a pattern, so maybe we have {n,m}. */
4334 /* On the other hand, maybe we have a character class */
4337 if (*s == ']' || *s == '^')
4340 /* this is terrifying, and it works */
4343 const char * const send = (char *) memchr(s, ']', e - s);
4344 unsigned char un_char, last_un_char;
4345 char tmpbuf[sizeof PL_tokenbuf * 4];
4347 if (!send) /* has to be an expression */
4349 weight = 2; /* let's weigh the evidence */
4353 else if (isDIGIT(*s)) {
4355 if (isDIGIT(s[1]) && s[2] == ']')
4361 Zero(seen,256,char);
4363 for (; s < send; s++) {
4364 last_un_char = un_char;
4365 un_char = (unsigned char)*s;
4370 weight -= seen[un_char] * 10;
4371 if (isWORDCHAR_lazy_if_safe(s+1, PL_bufend, UTF)) {
4373 scan_ident(s, tmpbuf, sizeof tmpbuf, FALSE);
4374 len = (int)strlen(tmpbuf);
4375 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
4376 UTF ? SVf_UTF8 : 0, SVt_PV))
4383 && memCHRs("[#!%*<>()-=",s[1]))
4385 if (/*{*/ memCHRs("])} =",s[2]))
4394 if (memCHRs("wds]",s[1]))
4396 else if (seen[(U8)'\''] || seen[(U8)'"'])
4398 else if (memCHRs("rnftbxcav",s[1]))
4400 else if (isDIGIT(s[1])) {
4402 while (s[1] && isDIGIT(s[1]))
4412 if (memCHRs("aA01! ",last_un_char))
4414 if (memCHRs("zZ79~",s[1]))
4416 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4417 weight -= 5; /* cope with negative subscript */
4420 if (!isWORDCHAR(last_un_char)
4421 && !(last_un_char == '$' || last_un_char == '@'
4422 || last_un_char == '&')
4423 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
4427 if (keyword(d, s - d, 0))
4430 if (un_char == last_un_char + 1)
4432 weight -= seen[un_char];
4437 if (weight >= 0) /* probably a character class */
4447 * Does all the checking to disambiguate
4449 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
4450 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4452 * First argument is the stuff after the first token, e.g. "bar".
4454 * Not a method if foo is a filehandle.
4455 * Not a method if foo is a subroutine prototyped to take a filehandle.
4456 * Not a method if it's really "Foo $bar"
4457 * Method if it's "foo $bar"
4458 * Not a method if it's really "print foo $bar"
4459 * Method if it's really "foo package::" (interpreted as package->foo)
4460 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
4461 * Not a method if bar is a filehandle or package, but is quoted with
4466 S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv)
4468 char *s = start + (*start == '$');
4469 char tmpbuf[sizeof PL_tokenbuf];
4472 /* Mustn't actually add anything to a symbol table.
4473 But also don't want to "initialise" any placeholder
4474 constants that might already be there into full
4475 blown PVGVs with attached PVCV. */
4477 ioname ? gv_fetchsv(ioname, GV_NOADD_NOINIT, SVt_PVCV) : NULL;
4479 PERL_ARGS_ASSERT_INTUIT_METHOD;
4481 if (!FEATURE_INDIRECT_IS_ENABLED)
4484 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
4486 if (cv && SvPOK(cv)) {
4487 const char *proto = CvPROTO(cv);
4489 while (*proto && (isSPACE(*proto) || *proto == ';'))
4496 if (*start == '$') {
4497 SSize_t start_off = start - SvPVX(PL_linestr);
4498 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY
4499 || isUPPER(*PL_tokenbuf))
4501 /* this could be $# */
4504 PL_bufptr = SvPVX(PL_linestr) + start_off;
4506 return *s == '(' ? FUNCMETH : METHOD;
4509 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4510 /* start is the beginning of the possible filehandle/object,
4511 * and s is the end of it
4512 * tmpbuf is a copy of it (but with single quotes as double colons)
4515 if (!keyword(tmpbuf, len, 0)) {
4516 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4521 indirgv = gv_fetchpvn_flags(tmpbuf, len,
4522 GV_NOADD_NOINIT|( UTF ? SVf_UTF8 : 0 ),
4524 if (indirgv && SvTYPE(indirgv) != SVt_NULL
4525 && (!isGV(indirgv) || GvCVu(indirgv)))
4527 /* filehandle or package name makes it a method */
4528 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
4530 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
4531 return 0; /* no assumptions -- "=>" quotes bareword */
4533 NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0,
4534 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
4535 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
4537 force_next(BAREWORD);
4539 return *s == '(' ? FUNCMETH : METHOD;
4545 /* Encoded script support. filter_add() effectively inserts a
4546 * 'pre-processing' function into the current source input stream.
4547 * Note that the filter function only applies to the current source file
4548 * (e.g., it will not affect files 'require'd or 'use'd by this one).
4550 * The datasv parameter (which may be NULL) can be used to pass
4551 * private data to this instance of the filter. The filter function
4552 * can recover the SV using the FILTER_DATA macro and use it to
4553 * store private buffers and state information.
4555 * The supplied datasv parameter is upgraded to a PVIO type
4556 * and the IoDIRP/IoANY field is used to store the function pointer,
4557 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
4558 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4559 * private use must be set using malloc'd pointers.
4563 Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
4571 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4572 Perl_croak(aTHX_ "Source filters apply only to byte streams");
4574 if (!PL_rsfp_filters)
4575 PL_rsfp_filters = newAV();
4578 SvUPGRADE(datasv, SVt_PVIO);
4579 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
4580 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
4581 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
4582 FPTR2DPTR(void *, IoANY(datasv)),
4583 SvPV_nolen(datasv)));
4584 av_unshift(PL_rsfp_filters, 1);
4585 av_store(PL_rsfp_filters, 0, datasv) ;
4587 !PL_parser->filtered
4588 && PL_parser->lex_flags & LEX_EVALBYTES
4589 && PL_bufptr < PL_bufend
4591 const char *s = PL_bufptr;
4592 while (s < PL_bufend) {
4594 SV *linestr = PL_parser->linestr;
4595 char *buf = SvPVX(linestr);
4596 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4597 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4598 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4599 STRLEN const linestart_pos = PL_parser->linestart - buf;
4600 STRLEN const last_uni_pos =
4601 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4602 STRLEN const last_lop_pos =
4603 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4604 av_push(PL_rsfp_filters, linestr);
4605 PL_parser->linestr =
4606 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4607 buf = SvPVX(PL_parser->linestr);
4608 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4609 PL_parser->bufptr = buf + bufptr_pos;
4610 PL_parser->oldbufptr = buf + oldbufptr_pos;
4611 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4612 PL_parser->linestart = buf + linestart_pos;
4613 if (PL_parser->last_uni)
4614 PL_parser->last_uni = buf + last_uni_pos;
4615 if (PL_parser->last_lop)
4616 PL_parser->last_lop = buf + last_lop_pos;
4617 SvLEN_set(linestr, SvCUR(linestr));
4618 SvCUR_set(linestr, s - SvPVX(linestr));
4619 PL_parser->filtered = 1;
4629 /* Delete most recently added instance of this filter function. */
4631 Perl_filter_del(pTHX_ filter_t funcp)
4635 PERL_ARGS_ASSERT_FILTER_DEL;
4638 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4639 FPTR2DPTR(void*, funcp)));
4641 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
4643 /* if filter is on top of stack (usual case) just pop it off */
4644 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
4645 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
4646 sv_free(av_pop(PL_rsfp_filters));
4650 /* we need to search for the correct entry and clear it */
4651 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
4655 /* Invoke the idxth filter function for the current rsfp. */
4656 /* maxlen 0 = read one text line */
4658 Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
4663 /* This API is bad. It should have been using unsigned int for maxlen.
4664 Not sure if we want to change the API, but if not we should sanity
4665 check the value here. */
4666 unsigned int correct_length = maxlen < 0 ? PERL_INT_MAX : maxlen;
4668 PERL_ARGS_ASSERT_FILTER_READ;
4670 if (!PL_parser || !PL_rsfp_filters)
4672 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
4673 /* Provide a default input filter to make life easy. */
4674 /* Note that we append to the line. This is handy. */
4675 DEBUG_P(PerlIO_printf(Perl_debug_log,
4676 "filter_read %d: from rsfp\n", idx));
4677 if (correct_length) {
4680 const int old_len = SvCUR(buf_sv);
4682 /* ensure buf_sv is large enough */
4683 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
4684 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4685 correct_length)) <= 0) {
4686 if (PerlIO_error(PL_rsfp))
4687 return -1; /* error */
4689 return 0 ; /* end of file */
4691 SvCUR_set(buf_sv, old_len + len) ;
4692 SvPVX(buf_sv)[old_len + len] = '\0';
4695 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4696 if (PerlIO_error(PL_rsfp))
4697 return -1; /* error */
4699 return 0 ; /* end of file */
4702 return SvCUR(buf_sv);
4704 /* Skip this filter slot if filter has been deleted */
4705 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
4706 DEBUG_P(PerlIO_printf(Perl_debug_log,
4707 "filter_read %d: skipped (filter deleted)\n",
4709 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
4711 if (SvTYPE(datasv) != SVt_PVIO) {
4712 if (correct_length) {
4714 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4715 if (!remainder) return 0; /* eof */
4716 if (correct_length > remainder) correct_length = remainder;
4717 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4718 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4721 const char *s = SvEND(datasv);
4722 const char *send = SvPVX(datasv) + SvLEN(datasv);
4730 if (s == send) return 0; /* eof */
4731 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4732 SvCUR_set(datasv, s-SvPVX(datasv));
4734 return SvCUR(buf_sv);
4736 /* Get function pointer hidden within datasv */
4737 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
4738 DEBUG_P(PerlIO_printf(Perl_debug_log,
4739 "filter_read %d: via function %p (%s)\n",
4740 idx, (void*)datasv, SvPV_nolen_const(datasv)));
4741 /* Call function. The function is expected to */
4742 /* call "FILTER_READ(idx+1, buf_sv)" first. */
4743 /* Return: <0:error, =0:eof, >0:not eof */
4745 save_scalar(PL_errgv);
4746 ret = (*funcp)(aTHX_ idx, buf_sv, correct_length);
4752 S_filter_gets(pTHX_ SV *sv, STRLEN append)
4754 PERL_ARGS_ASSERT_FILTER_GETS;
4756 #ifdef PERL_CR_FILTER
4757 if (!PL_rsfp_filters) {
4758 filter_add(S_cr_textfilter,NULL);
4761 if (PL_rsfp_filters) {
4763 SvCUR_set(sv, 0); /* start with empty line */
4764 if (FILTER_READ(0, sv, 0) > 0)
4765 return ( SvPVX(sv) ) ;
4770 return (sv_gets(sv, PL_rsfp, append));
4774 S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
4778 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4780 if (memEQs(pkgname, len, "__PACKAGE__"))
4784 && (pkgname[len - 2] == ':' && pkgname[len - 1] == ':')
4785 && (gv = gv_fetchpvn_flags(pkgname,
4787 ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
4789 return GvHV(gv); /* Foo:: */
4792 /* use constant CLASS => 'MyClass' */
4793 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
4794 if (gv && GvCV(gv)) {
4795 SV * const sv = cv_const_sv(GvCV(gv));
4797 return gv_stashsv(sv, 0);
4800 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
4805 S_tokenize_use(pTHX_ int is_use, char *s) {
4806 PERL_ARGS_ASSERT_TOKENIZE_USE;
4808 if (PL_expect != XSTATE)
4809 /* diag_listed_as: "use" not allowed in expression */
4810 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4811 is_use ? "use" : "no"));
4814 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4815 s = force_version(s, TRUE);
4816 if (*s == ';' || *s == '}'
4817 || (s = skipspace(s), (*s == ';' || *s == '}'))) {
4818 NEXTVAL_NEXTTOKE.opval = NULL;
4819 force_next(BAREWORD);
4821 else if (*s == 'v') {
4822 s = force_word(s,BAREWORD,FALSE,TRUE);
4823 s = force_version(s, FALSE);
4827 s = force_word(s,BAREWORD,FALSE,TRUE);
4828 s = force_version(s, FALSE);
4830 pl_yylval.ival = is_use;
4834 static const char* const exp_name[] =
4835 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
4836 "ATTRTERM", "TERMBLOCK", "XBLOCKTERM", "POSTDEREF",
4837 "SIGVAR", "TERMORDORDOR"
4841 #define word_takes_any_delimiter(p,l) S_word_takes_any_delimiter(p,l)
4843 S_word_takes_any_delimiter(char *p, STRLEN len)
4845 return (len == 1 && memCHRs("msyq", p[0]))
4847 && ((p[0] == 't' && p[1] == 'r')
4848 || (p[0] == 'q' && memCHRs("qwxr", p[1]))));
4852 S_check_scalar_slice(pTHX_ char *s)
4855 while (SPACE_OR_TAB(*s)) s++;
4856 if (*s == 'q' && s[1] == 'w' && !isWORDCHAR_lazy_if_safe(s+2,
4862 while ( isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF)
4863 || (*s && memCHRs(" \t$#+-'\"", *s)))
4865 s += UTF ? UTF8SKIP(s) : 1;
4867 if (*s == '}' || *s == ']')
4868 pl_yylval.ival = OPpSLICEWARNING;
4871 #define lex_token_boundary() S_lex_token_boundary(aTHX)
4873 S_lex_token_boundary(pTHX)
4875 PL_oldoldbufptr = PL_oldbufptr;
4876 PL_oldbufptr = PL_bufptr;
4879 #define vcs_conflict_marker(s) S_vcs_conflict_marker(aTHX_ s)
4881 S_vcs_conflict_marker(pTHX_ char *s)
4883 lex_token_boundary();
4885 yyerror("Version control conflict marker");
4886 while (s < PL_bufend && *s != '\n')
4892 yyl_sigvar(pTHX_ char *s)
4894 /* we expect the sigil and optional var name part of a
4895 * signature element here. Since a '$' is not necessarily
4896 * followed by a var name, handle it specially here; the general
4897 * yylex code would otherwise try to interpret whatever follows
4898 * as a var; e.g. ($, ...) would be seen as the var '$,'
4905 PL_bufptr = s; /* for error reporting */
4910 /* spot stuff that looks like an prototype */
4911 if (memCHRs("$:@%&*;\\[]", *s)) {
4912 yyerror("Illegal character following sigil in a subroutine signature");
4915 /* '$#' is banned, while '$ # comment' isn't */
4917 yyerror("'#' not allowed immediately following a sigil in a subroutine signature");
4921 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
4922 char *dest = PL_tokenbuf + 1;
4923 /* read var name, including sigil, into PL_tokenbuf */
4924 PL_tokenbuf[0] = sigil;
4925 parse_ident(&s, &dest, dest + sizeof(PL_tokenbuf) - 1,
4926 0, cBOOL(UTF), FALSE, FALSE);
4928 assert(PL_tokenbuf[1]); /* we have a variable name */
4936 /* parse the = for the default ourselves to avoid '+=' etc being accepted here
4937 * as the ASSIGNOP, and exclude other tokens that start with =
4939 if (*s == '=' && (!s[1] || memCHRs("=~>", s[1]) == 0)) {
4940 /* save now to report with the same context as we did when
4941 * all ASSIGNOPS were accepted */
4945 NEXTVAL_NEXTTOKE.ival = 0;
4946 force_next(ASSIGNOP);
4949 else if (*s == ',' || *s == ')') {
4950 PL_expect = XOPERATOR;
4953 /* make sure the context shows the unexpected character and
4954 * hopefully a bit more */
4956 while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
4958 PL_bufptr = s; /* for error reporting */
4959 yyerror("Illegal operator following parameter in a subroutine signature");
4963 NEXTVAL_NEXTTOKE.ival = sigil;
4964 force_next('p'); /* force a signature pending identifier */
4971 case ',': /* handle ($a,,$b) */
4976 yyerror("A signature parameter must start with '$', '@' or '%'");
4977 /* very crude error recovery: skip to likely next signature
4979 while (*s && *s != '$' && *s != '@' && *s != '%' && *s != ')')
4988 yyl_dollar(pTHX_ char *s)
4992 if (PL_expect == XPOSTDEREF) {
4995 POSTDEREF(DOLSHARP);
5001 && ( isIDFIRST_lazy_if_safe(s+2, PL_bufend, UTF)
5002 || memCHRs("{$:+-@", s[2])))
5004 PL_tokenbuf[0] = '@';
5005 s = scan_ident(s + 1, PL_tokenbuf + 1,
5006 sizeof PL_tokenbuf - 1, FALSE);
5007 if (PL_expect == XOPERATOR) {
5009 if (PL_bufptr > s) {
5011 PL_bufptr = PL_oldbufptr;
5013 no_op("Array length", d);
5015 if (!PL_tokenbuf[1])
5017 PL_expect = XOPERATOR;
5018 force_ident_maybe_lex('#');
5022 PL_tokenbuf[0] = '$';
5023 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5024 if (PL_expect == XOPERATOR) {
5026 if (PL_bufptr > s) {
5028 PL_bufptr = PL_oldbufptr;
5032 if (!PL_tokenbuf[1]) {
5034 yyerror("Final $ should be \\$ or $name");
5039 const char tmp = *s;
5040 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
5043 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5044 && intuit_more(s, PL_bufend)) {
5046 PL_tokenbuf[0] = '@';
5047 if (ckWARN(WARN_SYNTAX)) {
5050 while ( t < PL_bufend ) {
5052 do { t += UTF ? UTF8SKIP(t) : 1; } while (t < PL_bufend && isSPACE(*t));
5053 /* consumed one or more space chars */
5054 } else if (*t == '$' || *t == '@') {
5055 /* could be more than one '$' like $$ref or @$ref */
5056 do { t++; } while (t < PL_bufend && *t == '$');
5058 /* could be an abigail style identifier like $ foo */
5059 while (t < PL_bufend && *t == ' ') t++;
5061 /* strip off the name of the var */
5062 while (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
5063 t += UTF ? UTF8SKIP(t) : 1;
5064 /* consumed a varname */
5065 } else if (isDIGIT(*t)) {
5066 /* deal with hex constants like 0x11 */
5067 if (t[0] == '0' && t[1] == 'x') {
5069 while (t < PL_bufend && isXDIGIT(*t)) t++;
5071 /* deal with decimal/octal constants like 1 and 0123 */
5072 do { t++; } while (isDIGIT(*t));
5073 if (t<PL_bufend && *t == '.') {
5074 do { t++; } while (isDIGIT(*t));
5077 /* consumed a number */
5079 /* not a var nor a space nor a number */
5083 if (t < PL_bufend && *t++ == ',') {
5084 PL_bufptr = skipspace(PL_bufptr); /* XXX can realloc */
5085 while (t < PL_bufend && *t != ']')
5087 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5088 "Multidimensional syntax %" UTF8f " not supported",
5089 UTF8fARG(UTF,(int)((t - PL_bufptr) + 1), PL_bufptr));
5093 else if (*s == '{') {
5095 PL_tokenbuf[0] = '%';
5096 if ( strEQ(PL_tokenbuf+1, "SIG")
5097 && ckWARN(WARN_SYNTAX)
5098 && (t = (char *) memchr(s, '}', PL_bufend - s))
5099 && (t = (char *) memchr(t, '=', PL_bufend - t)))
5101 char tmpbuf[sizeof PL_tokenbuf];
5104 } while (isSPACE(*t));
5105 if (isIDFIRST_lazy_if_safe(t, PL_bufend, UTF)) {
5107 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
5112 && get_cvn_flags(tmpbuf, len, UTF
5116 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5117 "You need to quote \"%" UTF8f "\"",
5118 UTF8fARG(UTF, len, tmpbuf));
5125 PL_expect = XOPERATOR;
5126 if ((PL_lex_state == LEX_NORMAL || PL_lex_brackets) && isSPACE((char)tmp)) {
5127 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5128 if (!islop || PL_last_lop_op == OP_GREPSTART)
5129 PL_expect = XOPERATOR;
5130 else if (memCHRs("$@\"'`q", *s))
5131 PL_expect = XTERM; /* e.g. print $fh "foo" */
5132 else if ( memCHRs("&*<%", *s)
5133 && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
5135 PL_expect = XTERM; /* e.g. print $fh &sub */
5137 else if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5138 char tmpbuf[sizeof PL_tokenbuf];
5141 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5142 if ((t2 = keyword(tmpbuf, len, 0))) {
5143 /* binary operators exclude handle interpretations */
5155 PL_expect = XTERM; /* e.g. print $fh length() */
5160 PL_expect = XTERM; /* e.g. print $fh subr() */
5163 else if (isDIGIT(*s))
5164 PL_expect = XTERM; /* e.g. print $fh 3 */
5165 else if (*s == '.' && isDIGIT(s[1]))
5166 PL_expect = XTERM; /* e.g. print $fh .3 */
5167 else if ((*s == '?' || *s == '-' || *s == '+')
5168 && !isSPACE(s[1]) && s[1] != '=')
5169 PL_expect = XTERM; /* e.g. print $fh -1 */
5170 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5172 PL_expect = XTERM; /* e.g. print $fh /.../
5173 XXX except DORDOR operator
5175 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5177 PL_expect = XTERM; /* print $fh <<"EOF" */
5180 force_ident_maybe_lex('$');
5185 yyl_sub(pTHX_ char *s, const int key)
5187 char * const tmpbuf = PL_tokenbuf + 1;
5188 bool have_name, have_proto;
5190 SV *format_name = NULL;
5191 bool is_sigsub = FEATURE_SIGNATURES_IS_ENABLED;
5193 SSize_t off = s-SvPVX(PL_linestr);
5196 s = skipspace(s); /* can move PL_linestr */
5198 d = SvPVX(PL_linestr)+off;
5200 SAVEBOOL(PL_parser->sig_seen);
5201 PL_parser->sig_seen = FALSE;
5203 if ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
5205 || (*s == ':' && s[1] == ':'))
5208 PL_expect = XATTRBLOCK;
5209 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
5211 if (key == KEY_format)
5212 format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
5214 if (memchr(tmpbuf, ':', len) || key != KEY_sub
5216 PL_tokenbuf, len + 1, 0
5218 sv_setpvn(PL_subname, tmpbuf, len);
5220 sv_setsv(PL_subname,PL_curstname);
5221 sv_catpvs(PL_subname,"::");
5222 sv_catpvn(PL_subname,tmpbuf,len);
5224 if (SvUTF8(PL_linestr))
5225 SvUTF8_on(PL_subname);
5231 if (key == KEY_my || key == KEY_our || key==KEY_state) {
5233 /* diag_listed_as: Missing name in "%s sub" */
5235 "Missing name in \"%s\"", PL_bufptr);
5237 PL_expect = XATTRTERM;
5238 sv_setpvs(PL_subname,"?");
5242 if (key == KEY_format) {
5244 NEXTVAL_NEXTTOKE.opval
5245 = newSVOP(OP_CONST,0, format_name);
5246 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
5247 force_next(BAREWORD);
5252 /* Look for a prototype */
5253 if (*s == '(' && !is_sigsub) {
5254 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5256 Perl_croak(aTHX_ "Prototype not terminated");
5257 COPLINE_SET_FROM_MULTI_END;
5258 (void)validate_proto(PL_subname, PL_lex_stuff,
5259 ckWARN(WARN_ILLEGALPROTO), 0);
5267 if ( !(*s == ':' && s[1] != ':')
5268 && (*s != '{' && *s != '(') && key != KEY_format)
5270 assert(key == KEY_sub || key == KEY_AUTOLOAD ||
5271 key == KEY_DESTROY || key == KEY_BEGIN ||
5272 key == KEY_UNITCHECK || key == KEY_CHECK ||
5273 key == KEY_INIT || key == KEY_END ||
5274 key == KEY_my || key == KEY_state ||
5277 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
5278 else if (*s != ';' && *s != '}')
5279 Perl_croak(aTHX_ "Illegal declaration of subroutine %" SVf, SVfARG(PL_subname));
5283 NEXTVAL_NEXTTOKE.opval =
5284 newSVOP(OP_CONST, 0, PL_lex_stuff);
5285 PL_lex_stuff = NULL;
5290 sv_setpvs(PL_subname, "__ANON__");
5292 sv_setpvs(PL_subname, "__ANON__::__ANON__");
5298 force_ident_maybe_lex('&');
5306 yyl_interpcasemod(pTHX_ char *s)
5309 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
5311 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
5312 PL_bufptr, PL_bufend, *PL_bufptr);
5315 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
5317 if (PL_lex_casemods) {
5318 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
5319 PL_lex_casestack[PL_lex_casemods] = '\0';
5321 if (PL_bufptr != PL_bufend
5322 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
5323 || oldmod == 'F')) {
5325 PL_lex_state = LEX_INTERPCONCAT;
5327 PL_lex_allbrackets--;
5330 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
5331 /* Got an unpaired \E */
5332 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
5333 "Useless use of \\E");
5335 if (PL_bufptr != PL_bufend)
5337 PL_lex_state = LEX_INTERPCONCAT;
5342 PerlIO_printf(Perl_debug_log, "### Saw case modifier\n");
5345 if (s[1] == '\\' && s[2] == 'E') {
5347 PL_lex_state = LEX_INTERPCONCAT;
5352 if ( memBEGINs(s, (STRLEN) (PL_bufend - s), "L\\u")
5353 || memBEGINs(s, (STRLEN) (PL_bufend - s), "U\\l"))
5355 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
5357 if ((*s == 'L' || *s == 'U' || *s == 'F')
5358 && (strpbrk(PL_lex_casestack, "LUF")))
5360 PL_lex_casestack[--PL_lex_casemods] = '\0';
5361 PL_lex_allbrackets--;
5364 if (PL_lex_casemods > 10)
5365 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
5366 PL_lex_casestack[PL_lex_casemods++] = *s;
5367 PL_lex_casestack[PL_lex_casemods] = '\0';
5368 PL_lex_state = LEX_INTERPCONCAT;
5369 NEXTVAL_NEXTTOKE.ival = 0;
5370 force_next((2<<24)|'(');
5372 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
5374 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
5376 NEXTVAL_NEXTTOKE.ival = OP_LC;
5378 NEXTVAL_NEXTTOKE.ival = OP_UC;
5380 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
5382 NEXTVAL_NEXTTOKE.ival = OP_FC;
5384 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
5388 if (PL_lex_starts) {
5391 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5392 if (PL_lex_casemods == 1 && PL_lex_inpat)
5395 AopNOASSIGN(OP_CONCAT);
5403 yyl_secondclass_keyword(pTHX_ char *s, STRLEN len, int key, I32 *orig_keyword,
5404 GV **pgv, GV ***pgvp)
5406 GV *ogv = NULL; /* override (winner) */
5407 GV *hgv = NULL; /* hidden (loser) */
5410 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
5412 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5413 (UTF ? SVf_UTF8 : 0)|GV_NOTQUAL,
5415 && (cv = GvCVu(gv)))
5417 if (GvIMPORTED_CV(gv))
5419 else if (! CvMETHOD(cv))
5423 && (*pgvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf, len, FALSE))
5425 && (isGV_with_GP(gv)
5426 ? GvCVu(gv) && GvIMPORTED_CV(gv)
5427 : SvPCS_IMPORTED(gv)
5428 && (gv_init(gv, PL_globalstash, PL_tokenbuf,
5438 *orig_keyword = key;
5439 return 0; /* overridden by import or by GLOBAL */
5441 else if (gv && !*pgvp
5442 && -key==KEY_lock /* XXX generalizable kludge */
5445 return 0; /* any sub overrides "weak" keyword */
5447 else { /* no override */
5449 if (key == KEY_dump) {
5450 Perl_croak(aTHX_ "dump() must be written as CORE::dump() as of Perl 5.30");
5454 if (hgv && key != KEY_x) /* never ambiguous */
5455 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5456 "Ambiguous call resolved as CORE::%s(), "
5457 "qualify as such or use &",
5464 yyl_qw(pTHX_ char *s, STRLEN len)
5468 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
5470 missingterm(NULL, 0);
5472 COPLINE_SET_FROM_MULTI_END;
5473 PL_expect = XOPERATOR;
5474 if (SvCUR(PL_lex_stuff)) {
5475 int warned_comma = !ckWARN(WARN_QW);
5476 int warned_comment = warned_comma;
5477 char *d = SvPV_force(PL_lex_stuff, len);
5479 for (; isSPACE(*d) && len; --len, ++d)
5484 if (!warned_comma || !warned_comment) {
5485 for (; !isSPACE(*d) && len; --len, ++d) {
5486 if (!warned_comma && *d == ',') {
5487 Perl_warner(aTHX_ packWARN(WARN_QW),
5488 "Possible attempt to separate words with commas");
5491 else if (!warned_comment && *d == '#') {
5492 Perl_warner(aTHX_ packWARN(WARN_QW),
5493 "Possible attempt to put comments in qw() list");
5499 for (; !isSPACE(*d) && len; --len, ++d)
5502 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
5503 words = op_append_elem(OP_LIST, words,
5504 newSVOP(OP_CONST, 0, tokeq(sv)));
5509 words = newNULLLIST();
5510 SvREFCNT_dec_NN(PL_lex_stuff);
5511 PL_lex_stuff = NULL;
5512 PL_expect = XOPERATOR;
5513 pl_yylval.opval = sawparens(words);
5518 yyl_hyphen(pTHX_ char *s)
5520 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
5528 while (s < PL_bufend && SPACE_OR_TAB(*s))
5531 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=>")) {
5532 s = force_word(PL_bufptr,BAREWORD,FALSE,FALSE);
5533 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
5534 OPERATOR('-'); /* unary minus */
5537 case 'r': ftst = OP_FTEREAD; break;
5538 case 'w': ftst = OP_FTEWRITE; break;
5539 case 'x': ftst = OP_FTEEXEC; break;
5540 case 'o': ftst = OP_FTEOWNED; break;
5541 case 'R': ftst = OP_FTRREAD; break;
5542 case 'W': ftst = OP_FTRWRITE; break;
5543 case 'X': ftst = OP_FTREXEC; break;
5544 case 'O': ftst = OP_FTROWNED; break;
5545 case 'e': ftst = OP_FTIS; break;
5546 case 'z': ftst = OP_FTZERO; break;
5547 case 's': ftst = OP_FTSIZE; break;
5548 case 'f': ftst = OP_FTFILE; break;
5549 case 'd': ftst = OP_FTDIR; break;
5550 case 'l': ftst = OP_FTLINK; break;
5551 case 'p': ftst = OP_FTPIPE; break;
5552 case 'S': ftst = OP_FTSOCK; break;
5553 case 'u': ftst = OP_FTSUID; break;
5554 case 'g': ftst = OP_FTSGID; break;
5555 case 'k': ftst = OP_FTSVTX; break;
5556 case 'b': ftst = OP_FTBLK; break;
5557 case 'c': ftst = OP_FTCHR; break;
5558 case 't': ftst = OP_FTTTY; break;
5559 case 'T': ftst = OP_FTTEXT; break;
5560 case 'B': ftst = OP_FTBINARY; break;
5561 case 'M': case 'A': case 'C':
5562 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
5564 case 'M': ftst = OP_FTMTIME; break;
5565 case 'A': ftst = OP_FTATIME; break;
5566 case 'C': ftst = OP_FTCTIME; break;
5574 PL_last_uni = PL_oldbufptr;
5575 PL_last_lop_op = (OPCODE)ftst;
5577 PerlIO_printf(Perl_debug_log, "### Saw file test %c\n", (int)tmp);
5582 /* Assume it was a minus followed by a one-letter named
5583 * subroutine call (or a -bareword), then. */
5585 PerlIO_printf(Perl_debug_log,
5586 "### '-%c' looked like a file test but was not\n",
5593 const char tmp = *s++;
5596 if (PL_expect == XOPERATOR)
5601 else if (*s == '>') {
5604 if (((*s == '$' || *s == '&') && s[1] == '*')
5605 ||(*s == '$' && s[1] == '#' && s[2] == '*')
5606 ||((*s == '@' || *s == '%') && memCHRs("*[{", s[1]))
5607 ||(*s == '*' && (s[1] == '*' || s[1] == '{'))
5610 PL_expect = XPOSTDEREF;
5613 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5614 s = force_word(s,METHOD,FALSE,TRUE);
5622 if (PL_expect == XOPERATOR) {
5624 && !PL_lex_allbrackets
5625 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5633 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5635 OPERATOR('-'); /* unary minus */
5641 yyl_plus(pTHX_ char *s)
5643 const char tmp = *s++;
5646 if (PL_expect == XOPERATOR)
5651 if (PL_expect == XOPERATOR) {
5653 && !PL_lex_allbrackets
5654 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5662 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5669 yyl_star(pTHX_ char *s)
5671 if (PL_expect == XPOSTDEREF)
5674 if (PL_expect != XOPERATOR) {
5675 s = scan_ident(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5676 PL_expect = XOPERATOR;
5677 force_ident(PL_tokenbuf, '*');
5686 if (*s == '=' && !PL_lex_allbrackets
5687 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5696 && !PL_lex_allbrackets
5697 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5707 yyl_percent(pTHX_ char *s)
5709 if (PL_expect == XOPERATOR) {
5711 && !PL_lex_allbrackets
5712 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5719 else if (PL_expect == XPOSTDEREF)
5722 PL_tokenbuf[0] = '%';
5723 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5725 if (!PL_tokenbuf[1]) {
5728 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5729 && intuit_more(s, PL_bufend)) {
5731 PL_tokenbuf[0] = '@';
5733 PL_expect = XOPERATOR;
5734 force_ident_maybe_lex('%');
5739 yyl_caret(pTHX_ char *s)
5742 const bool bof = cBOOL(FEATURE_BITWISE_IS_ENABLED);
5743 if (bof && s[1] == '.')
5745 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5746 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5752 BOop(bof ? d == s-2 ? OP_SBIT_XOR : OP_NBIT_XOR : OP_BIT_XOR);
5756 yyl_colon(pTHX_ char *s)
5760 switch (PL_expect) {
5762 if (!PL_in_my || (PL_lex_state != LEX_NORMAL && !PL_lex_brackets))
5764 PL_bufptr = s; /* update in case we back off */
5767 "Use of := for an empty attribute list is not allowed");
5774 PL_expect = XTERMBLOCK;
5776 /* NB: as well as parsing normal attributes, we also end up
5777 * here if there is something looking like attributes
5778 * following a signature (which is illegal, but used to be
5779 * legal in 5.20..5.26). If the latter, we still parse the
5780 * attributes so that error messages(s) are less confusing,
5781 * but ignore them (parser->sig_seen).
5785 while (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
5786 bool sig = PL_parser->sig_seen;
5790 char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5791 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
5792 if (tmp < 0) tmp = -tmp;
5807 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
5809 d = scan_str(d,TRUE,TRUE,FALSE,NULL);
5814 Perl_croak(aTHX_ "Unterminated attribute parameter in attribute list");
5816 COPLINE_SET_FROM_MULTI_END;
5819 sv_catsv(sv, PL_lex_stuff);
5820 attrs = op_append_elem(OP_LIST, attrs,
5821 newSVOP(OP_CONST, 0, sv));
5822 SvREFCNT_dec_NN(PL_lex_stuff);
5823 PL_lex_stuff = NULL;
5826 /* NOTE: any CV attrs applied here need to be part of
5827 the CVf_BUILTIN_ATTRS define in cv.h! */
5828 if (!PL_in_my && memEQs(SvPVX(sv), len, "lvalue")) {
5831 CvLVALUE_on(PL_compcv);
5833 else if (!PL_in_my && memEQs(SvPVX(sv), len, "method")) {
5836 CvMETHOD_on(PL_compcv);
5838 else if (!PL_in_my && memEQs(SvPVX(sv), len, "const")) {
5841 Perl_ck_warner_d(aTHX_
5842 packWARN(WARN_EXPERIMENTAL__CONST_ATTR),
5843 ":const is experimental"
5845 CvANONCONST_on(PL_compcv);
5846 if (!CvANON(PL_compcv))
5847 yyerror(":const is not permitted on named "
5851 /* After we've set the flags, it could be argued that
5852 we don't need to do the attributes.pm-based setting
5853 process, and shouldn't bother appending recognized
5854 flags. To experiment with that, uncomment the
5855 following "else". (Note that's already been
5856 uncommented. That keeps the above-applied built-in
5857 attributes from being intercepted (and possibly
5858 rejected) by a package's attribute routines, but is
5859 justified by the performance win for the common case
5860 of applying only built-in attributes.) */
5862 attrs = op_append_elem(OP_LIST, attrs,
5863 newSVOP(OP_CONST, 0,
5867 if (*s == ':' && s[1] != ':')
5870 break; /* require real whitespace or :'s */
5871 /* XXX losing whitespace on sequential attributes here */
5876 && !(PL_expect == XOPERATOR
5877 ? (*s == '=' || *s == ')')
5878 : (*s == '{' || *s == '(')))
5880 const char q = ((*s == '\'') ? '"' : '\'');
5881 /* If here for an expression, and parsed no attrs, back off. */
5882 if (PL_expect == XOPERATOR && !attrs) {
5886 /* MUST advance bufptr here to avoid bogus "at end of line"
5887 context messages from yyerror().
5890 yyerror( (const char *)
5892 ? Perl_form(aTHX_ "Invalid separator character "
5893 "%c%c%c in attribute list", q, *s, q)
5894 : "Unterminated attribute list" ) );
5901 if (PL_parser->sig_seen) {
5902 /* see comment about about sig_seen and parser error
5906 Perl_croak(aTHX_ "Subroutine attributes must come "
5907 "before the signature");
5910 NEXTVAL_NEXTTOKE.opval = attrs;
5916 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5921 PL_lex_allbrackets--;
5926 yyl_subproto(pTHX_ char *s, CV *cv)
5928 STRLEN protolen = CvPROTOLEN(cv);
5929 const char *proto = CvPROTO(cv);
5932 proto = S_strip_spaces(aTHX_ proto, &protolen);
5935 if ((optional = *proto == ';')) {
5938 } while (*proto == ';');
5944 *proto == '$' || *proto == '_'
5945 || *proto == '*' || *proto == '+'
5950 *proto == '\\' && proto[1] && proto[2] == '\0'
5953 UNIPROTO(UNIOPSUB,optional);
5956 if (*proto == '\\' && proto[1] == '[') {
5957 const char *p = proto + 2;
5958 while(*p && *p != ']')
5960 if(*p == ']' && !p[1])
5961 UNIPROTO(UNIOPSUB,optional);
5964 if (*proto == '&' && *s == '{') {
5966 sv_setpvs(PL_subname, "__ANON__");
5968 sv_setpvs(PL_subname, "__ANON__::__ANON__");
5969 if (!PL_lex_allbrackets
5970 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
5972 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
5981 yyl_leftcurly(pTHX_ char *s, const U8 formbrack)
5984 if (PL_lex_brackets > 100) {
5985 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5988 switch (PL_expect) {
5991 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5992 PL_lex_allbrackets++;
5993 OPERATOR(HASHBRACK);
5995 while (s < PL_bufend && SPACE_OR_TAB(*s))
5998 PL_tokenbuf[0] = '\0';
5999 if (d < PL_bufend && *d == '-') {
6000 PL_tokenbuf[0] = '-';
6002 while (d < PL_bufend && SPACE_OR_TAB(*d))
6005 if (d < PL_bufend && isIDFIRST_lazy_if_safe(d, PL_bufend, UTF)) {
6007 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6009 while (d < PL_bufend && SPACE_OR_TAB(*d))
6012 const char minus = (PL_tokenbuf[0] == '-');
6013 s = force_word(s + minus, BAREWORD, FALSE, TRUE);
6021 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6022 PL_lex_allbrackets++;
6027 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
6028 PL_lex_allbrackets++;
6032 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6033 PL_lex_allbrackets++;
6038 if (PL_oldoldbufptr == PL_last_lop)
6039 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
6041 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
6042 PL_lex_allbrackets++;
6045 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6047 /* This hack is to get the ${} in the message. */
6049 yyerror("syntax error");
6052 OPERATOR(HASHBRACK);
6054 if (PL_expect == XREF && PL_oldoldbufptr != PL_last_lop) {
6055 /* ${...} or @{...} etc., but not print {...}
6056 * Skip the disambiguation and treat this as a block.
6058 goto block_expectation;
6060 /* This hack serves to disambiguate a pair of curlies
6061 * as being a block or an anon hash. Normally, expectation
6062 * determines that, but in cases where we're not in a
6063 * position to expect anything in particular (like inside
6064 * eval"") we have to resolve the ambiguity. This code
6065 * covers the case where the first term in the curlies is a
6066 * quoted string. Most other cases need to be explicitly
6067 * disambiguated by prepending a "+" before the opening
6068 * curly in order to force resolution as an anon hash.
6070 * XXX should probably propagate the outer expectation
6071 * into eval"" to rely less on this hack, but that could
6072 * potentially break current behavior of eval"".
6076 if (*s == '\'' || *s == '"' || *s == '`') {
6077 /* common case: get past first string, handling escapes */
6078 for (t++; t < PL_bufend && *t != *s;)
6083 else if (*s == 'q') {
6086 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
6087 && !isWORDCHAR(*t))))
6089 /* skip q//-like construct */
6091 char open, close, term;
6094 while (t < PL_bufend && isSPACE(*t))
6096 /* check for q => */
6097 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6098 OPERATOR(HASHBRACK);
6102 if (term && (tmps = memCHRs("([{< )]}> )]}>",term)))
6106 for (t++; t < PL_bufend; t++) {
6107 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
6109 else if (*t == open)
6113 for (t++; t < PL_bufend; t++) {
6114 if (*t == '\\' && t+1 < PL_bufend)
6116 else if (*t == close && --brackets <= 0)
6118 else if (*t == open)
6125 /* skip plain q word */
6126 while ( t < PL_bufend
6127 && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6129 t += UTF ? UTF8SKIP(t) : 1;
6132 else if (isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF)) {
6133 t += UTF ? UTF8SKIP(t) : 1;
6134 while ( t < PL_bufend
6135 && isWORDCHAR_lazy_if_safe(t, PL_bufend, UTF))
6137 t += UTF ? UTF8SKIP(t) : 1;
6140 while (t < PL_bufend && isSPACE(*t))
6142 /* if comma follows first term, call it an anon hash */
6143 /* XXX it could be a comma expression with loop modifiers */
6144 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
6145 || (*t == '=' && t[1] == '>')))
6146 OPERATOR(HASHBRACK);
6147 if (PL_expect == XREF) {
6149 /* If there is an opening brace or 'sub:', treat it
6150 as a term to make ${{...}}{k} and &{sub:attr...}
6151 dwim. Otherwise, treat it as a statement, so
6152 map {no strict; ...} works.
6159 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "sub")) {
6172 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6179 pl_yylval.ival = CopLINE(PL_curcop);
6180 PL_copline = NOLINE; /* invalidate current command line number */
6181 TOKEN(formbrack ? '=' : '{');
6185 yyl_rightcurly(pTHX_ char *s, const U8 formbrack)
6187 assert(s != PL_bufend);
6190 if (PL_lex_brackets <= 0)
6191 /* diag_listed_as: Unmatched right %s bracket */
6192 yyerror("Unmatched right curly bracket");
6194 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
6196 PL_lex_allbrackets--;
6198 if (PL_lex_state == LEX_INTERPNORMAL) {
6199 if (PL_lex_brackets == 0) {
6200 if (PL_expect & XFAKEBRACK) {
6201 PL_expect &= XENUMMASK;
6202 PL_lex_state = LEX_INTERPEND;
6204 return yylex(); /* ignore fake brackets */
6206 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6207 && SvEVALED(PL_lex_repl))
6208 PL_lex_state = LEX_INTERPEND;
6209 else if (*s == '-' && s[1] == '>')
6210 PL_lex_state = LEX_INTERPENDMAYBE;
6211 else if (*s != '[' && *s != '{')
6212 PL_lex_state = LEX_INTERPEND;
6216 if (PL_expect & XFAKEBRACK) {
6217 PL_expect &= XENUMMASK;
6219 return yylex(); /* ignore fake brackets */
6222 force_next(formbrack ? '.' : '}');
6223 if (formbrack) LEAVE_with_name("lex_format");
6224 if (formbrack == 2) { /* means . where arguments were expected */
6233 yyl_ampersand(pTHX_ char *s)
6235 if (PL_expect == XPOSTDEREF)
6240 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6241 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6249 if (PL_expect == XOPERATOR) {
6252 if ( PL_bufptr == PL_linestart
6253 && ckWARN(WARN_SEMICOLON)
6254 && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
6256 CopLINE_dec(PL_curcop);
6257 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
6258 CopLINE_inc(PL_curcop);
6261 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6263 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6264 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6270 BAop(bof ? OP_NBIT_AND : OP_BIT_AND);
6275 PL_tokenbuf[0] = '&';
6276 s = scan_ident(s - 1, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, TRUE);
6277 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
6280 force_ident_maybe_lex('&');
6288 yyl_verticalbar(pTHX_ char *s)
6295 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6296 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6305 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.')
6308 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6309 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6314 BOop(bof ? s == d ? OP_NBIT_OR : OP_SBIT_OR : OP_BIT_OR);
6318 yyl_bang(pTHX_ char *s)
6320 const char tmp = *s++;
6322 /* was this !=~ where !~ was meant?
6323 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6325 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6326 const char *t = s+1;
6328 while (t < PL_bufend && isSPACE(*t))
6331 if (*t == '/' || *t == '?'
6332 || ((*t == 'm' || *t == 's' || *t == 'y')
6333 && !isWORDCHAR(t[1]))
6334 || (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
6335 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6336 "!=~ should be !~");
6339 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6355 yyl_snail(pTHX_ char *s)
6357 if (PL_expect == XPOSTDEREF)
6359 PL_tokenbuf[0] = '@';
6360 s = scan_ident(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6361 if (PL_expect == XOPERATOR) {
6363 if (PL_bufptr > s) {
6365 PL_bufptr = PL_oldbufptr;
6370 if (!PL_tokenbuf[1]) {
6373 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
6375 if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6376 && intuit_more(s, PL_bufend))
6379 PL_tokenbuf[0] = '%';
6381 /* Warn about @ where they meant $. */
6382 if (*s == '[' || *s == '{') {
6383 if (ckWARN(WARN_SYNTAX)) {
6384 S_check_scalar_slice(aTHX_ s);
6388 PL_expect = XOPERATOR;
6389 force_ident_maybe_lex('@');
6394 yyl_slash(pTHX_ char *s)
6396 if ((PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR) && s[1] == '/') {
6397 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6398 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6403 else if (PL_expect == XOPERATOR) {
6405 if (*s == '=' && !PL_lex_allbrackets
6406 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
6414 /* Disable warning on "study /blah/" */
6415 if ( PL_oldoldbufptr == PL_last_uni
6416 && ( *PL_last_uni != 's' || s - PL_last_uni < 5
6417 || memNE(PL_last_uni, "study", 5)
6418 || isWORDCHAR_lazy_if_safe(PL_last_uni+5, PL_bufend, UTF)
6421 s = scan_pat(s,OP_MATCH);
6422 TERM(sublex_start());
6427 yyl_leftsquare(pTHX_ char *s)
6431 if (PL_lex_brackets > 100)
6432 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
6433 PL_lex_brackstack[PL_lex_brackets++] = 0;
6434 PL_lex_allbrackets++;
6440 yyl_rightsquare(pTHX_ char *s)
6442 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6445 if (PL_lex_brackets <= 0)
6446 /* diag_listed_as: Unmatched right %s bracket */
6447 yyerror("Unmatched right square bracket");
6450 PL_lex_allbrackets--;
6451 if (PL_lex_state == LEX_INTERPNORMAL) {
6452 if (PL_lex_brackets == 0) {
6453 if (*s == '-' && s[1] == '>')
6454 PL_lex_state = LEX_INTERPENDMAYBE;
6455 else if (*s != '[' && *s != '{')
6456 PL_lex_state = LEX_INTERPEND;
6463 yyl_tilde(pTHX_ char *s)
6466 if (s[1] == '~' && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR)) {
6467 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
6470 Perl_ck_warner_d(aTHX_
6471 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
6472 "Smartmatch is experimental");
6473 NCEop(OP_SMARTMATCH);
6476 if ((bof = FEATURE_BITWISE_IS_ENABLED) && *s == '.') {
6478 BCop(OP_SCOMPLEMENT);
6480 BCop(bof ? OP_NCOMPLEMENT : OP_COMPLEMENT);
6484 yyl_leftparen(pTHX_ char *s)
6486 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6487 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
6491 PL_lex_allbrackets++;
6496 yyl_rightparen(pTHX_ char *s)
6498 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6501 PL_lex_allbrackets--;
6509 yyl_leftpointy(pTHX_ char *s)
6513 if (PL_expect != XOPERATOR) {
6514 if (s[1] != '<' && !memchr(s,'>', PL_bufend - s))
6516 if (s[1] == '<' && s[2] != '>')
6517 s = scan_heredoc(s);
6519 s = scan_inputsymbol(s);
6520 PL_expect = XOPERATOR;
6521 TOKEN(sublex_start());
6528 if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6532 SHop(OP_LEFT_SHIFT);
6537 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6544 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6552 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6561 yyl_rightpointy(pTHX_ char *s)
6563 const char tmp = *s++;
6566 if (*s == '=' && !PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6570 SHop(OP_RIGHT_SHIFT);
6572 else if (tmp == '=') {
6573 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6581 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6590 yyl_sglquote(pTHX_ char *s)
6592 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6594 missingterm(NULL, 0);
6595 COPLINE_SET_FROM_MULTI_END;
6596 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
6597 if (PL_expect == XOPERATOR) {
6600 pl_yylval.ival = OP_CONST;
6601 TERM(sublex_start());
6605 yyl_dblquote(pTHX_ char *s)
6609 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6612 printbuf("### Saw string before %s\n", s);
6614 PerlIO_printf(Perl_debug_log,
6615 "### Saw unterminated string\n");
6617 if (PL_expect == XOPERATOR) {
6621 missingterm(NULL, 0);
6622 pl_yylval.ival = OP_CONST;
6623 /* FIXME. I think that this can be const if char *d is replaced by
6624 more localised variables. */
6625 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
6626 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6627 pl_yylval.ival = OP_STRINGIFY;
6631 if (pl_yylval.ival == OP_CONST)
6632 COPLINE_SET_FROM_MULTI_END;
6633 TERM(sublex_start());
6637 yyl_backtick(pTHX_ char *s)
6639 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
6642 printbuf("### Saw backtick string before %s\n", s);
6644 PerlIO_printf(Perl_debug_log,
6645 "### Saw unterminated backtick string\n");
6647 if (PL_expect == XOPERATOR)
6648 no_op("Backticks",s);
6650 missingterm(NULL, 0);
6651 pl_yylval.ival = OP_BACKTICK;
6652 TERM(sublex_start());
6656 yyl_backslash(pTHX_ char *s)
6658 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr && isDIGIT(*s))
6659 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6661 if (PL_expect == XOPERATOR)
6662 no_op("Backslash",s);
6667 yyl_data_handle(pTHX)
6669 HV * const stash = PL_tokenbuf[2] == 'D' && PL_curstash
6672 GV *gv = (GV *)*hv_fetchs(stash, "DATA", 1);
6675 gv_init(gv,stash,"DATA",4,0);
6679 GvIOp(gv) = newIO();
6680 IoIFP(GvIOp(gv)) = PL_rsfp;
6682 /* Mark this internal pseudo-handle as clean */
6683 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
6684 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
6685 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
6687 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
6689 #if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6690 /* if the script was opened in binmode, we need to revert
6691 * it to text mode for compatibility; but only iff it has CRs
6692 * XXX this is a questionable hack at best. */
6693 if (PL_bufend-PL_bufptr > 2
6694 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
6697 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
6698 loc = PerlIO_tell(PL_rsfp);
6699 (void)PerlIO_seek(PL_rsfp, 0L, 0);
6701 if (PerlLIO_setmode(RSFP_FILENO, O_TEXT) != -1) {
6703 PerlIO_seek(PL_rsfp, loc, 0);
6708 #ifdef PERLIO_LAYERS
6711 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6718 PERL_STATIC_NO_RET void yyl_croak_unrecognised(pTHX_ char*)
6719 __attribute__noreturn__;
6721 PERL_STATIC_NO_RET void
6722 yyl_croak_unrecognised(pTHX_ char *s)
6724 SV *dsv = newSVpvs_flags("", SVs_TEMP);
6730 STRLEN skiplen = UTF8SKIP(s);
6731 STRLEN stravail = PL_bufend - s;
6732 c = sv_uni_display(dsv, newSVpvn_flags(s,
6733 skiplen > stravail ? stravail : skiplen,
6734 SVs_TEMP | SVf_UTF8),
6735 10, UNI_DISPLAY_ISPRINT);
6738 c = Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
6741 if (s >= PL_linestart) {
6745 /* somehow (probably due to a parse failure), PL_linestart has advanced
6746 * pass PL_bufptr, get a reasonable beginning of line
6749 while (d > SvPVX(PL_linestr) && d[-1] && d[-1] != '\n')
6752 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) d, (U8 *) s) : (STRLEN) (s - d);
6753 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
6754 d = UTF ? (char *) utf8_hop_back((U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT, (U8 *)d) : s - UNRECOGNIZED_PRECEDE_COUNT;
6757 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %" UTF8f "<-- HERE near column %d", c,
6758 UTF8fARG(UTF, (s - d), d),
6763 yyl_require(pTHX_ char *s, I32 orig_keyword)
6767 s = force_version(s, FALSE);
6769 else if (*s != 'v' || !isDIGIT(s[1])
6770 || (s = force_version(s, TRUE), *s == 'v'))
6772 *PL_tokenbuf = '\0';
6773 s = force_word(s,BAREWORD,TRUE,TRUE);
6774 if (isIDFIRST_lazy_if_safe(PL_tokenbuf,
6775 PL_tokenbuf + sizeof(PL_tokenbuf),
6778 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
6779 GV_ADD | (UTF ? SVf_UTF8 : 0));
6782 yyerror("<> at require-statement should be quotes");
6785 if (orig_keyword == KEY_require)
6790 PL_expect = PL_nexttoke ? XOPERATOR : XTERM;
6792 PL_last_uni = PL_oldbufptr;
6793 PL_last_lop_op = OP_REQUIRE;
6795 return REPORT( (int)REQUIRE );
6799 yyl_foreach(pTHX_ char *s)
6801 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6803 pl_yylval.ival = CopLINE(PL_curcop);
6805 if (PL_expect == XSTATE && isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6807 SSize_t s_off = s - SvPVX(PL_linestr);
6810 if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "my") && isSPACE(p[2])) {
6813 else if (memBEGINPs(p, (STRLEN) (PL_bufend - p), "our") && isSPACE(p[3])) {
6818 /* skip optional package name, as in "for my abc $x (..)" */
6819 if (isIDFIRST_lazy_if_safe(p, PL_bufend, UTF)) {
6820 p = scan_word(p, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6823 if (*p != '$' && *p != '\\')
6824 Perl_croak(aTHX_ "Missing $ on loop variable");
6826 /* The buffer may have been reallocated, update s */
6827 s = SvPVX(PL_linestr) + s_off;
6833 yyl_do(pTHX_ char *s, I32 orig_keyword)
6842 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
6844 if (len && memNEs(PL_tokenbuf+1, len, "CORE")
6845 && !keyword(PL_tokenbuf + 1, len, 0)) {
6846 SSize_t off = s-SvPVX(PL_linestr);
6848 s = SvPVX(PL_linestr)+off;
6850 force_ident_maybe_lex('&');
6855 if (orig_keyword == KEY_do)
6863 yyl_my(pTHX_ char *s, I32 my)
6867 yyerror(Perl_form(aTHX_
6868 "Can't redeclare \"%s\" in \"%s\"",
6869 my == KEY_my ? "my" :
6870 my == KEY_state ? "state" : "our",
6871 PL_in_my == KEY_my ? "my" :
6872 PL_in_my == KEY_state ? "state" : "our"));
6876 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
6878 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
6879 if (memEQs(PL_tokenbuf, len, "sub"))
6880 return yyl_sub(aTHX_ s, my);
6881 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
6882 if (!PL_in_my_stash) {
6886 i = my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
6887 PERL_MY_SNPRINTF_POST_GUARD(i, sizeof(tmpbuf));
6888 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
6891 else if (*s == '\\') {
6892 if (!FEATURE_MYREF_IS_ENABLED)
6893 Perl_croak(aTHX_ "The experimental declared_refs "
6894 "feature is not enabled");
6895 Perl_ck_warner_d(aTHX_
6896 packWARN(WARN_EXPERIMENTAL__DECLARED_REFS),
6897 "Declaring references is experimental");
6902 static int yyl_try(pTHX_ char*);
6905 yyl_eol_needs_semicolon(pTHX_ char **ps)
6908 if (PL_lex_state != LEX_NORMAL
6909 || (PL_in_eval && !PL_rsfp && !PL_parser->filtered))
6911 const bool in_comment = *s == '#';
6913 if (*s == '#' && s == PL_linestart && PL_in_eval
6914 && !PL_rsfp && !PL_parser->filtered) {
6915 /* handle eval qq[#line 1 "foo"\n ...] */
6916 CopLINE_dec(PL_curcop);
6917 incline(s, PL_bufend);
6920 while (d < PL_bufend && *d != '\n')
6925 if (in_comment && d == PL_bufend
6926 && PL_lex_state == LEX_INTERPNORMAL
6927 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6928 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
6930 incline(s, PL_bufend);
6931 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
6932 PL_lex_state = LEX_FORMLINE;
6933 force_next(FORMRBRACK);
6939 while (s < PL_bufend && *s != '\n')
6941 if (s < PL_bufend) {
6944 incline(s, PL_bufend);
6952 yyl_fake_eof(pTHX_ U32 fake_eof, bool bof, char *s)
6960 bof = cBOOL(PL_rsfp);
6963 PL_bufptr = PL_bufend;
6964 COPLINE_INC_WITH_HERELINES;
6965 if (!lex_next_chunk(fake_eof)) {
6966 CopLINE_dec(PL_curcop);
6968 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
6970 CopLINE_dec(PL_curcop);
6972 /* If it looks like the start of a BOM or raw UTF-16,
6973 * check if it in fact is. */
6976 || *(U8*)s == BOM_UTF8_FIRST_BYTE
6980 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
6981 bof = (offset == (Off_t)SvCUR(PL_linestr));
6982 #if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
6983 /* offset may include swallowed CR */
6985 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
6988 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
6989 s = swallow_bom((U8*)s);
6992 if (PL_parser->in_pod) {
6993 /* Incest with pod. */
6994 if ( memBEGINPs(s, (STRLEN) (PL_bufend - s), "=cut")
6997 SvPVCLEAR(PL_linestr);
6998 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
6999 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7000 PL_last_lop = PL_last_uni = NULL;
7001 PL_parser->in_pod = 0;
7004 if (PL_rsfp || PL_parser->filtered)
7005 incline(s, PL_bufend);
7006 } while (PL_parser->in_pod);
7008 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
7009 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7010 PL_last_lop = PL_last_uni = NULL;
7011 if (CopLINE(PL_curcop) == 1) {
7012 while (s < PL_bufend && isSPACE(*s))
7014 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
7018 if (*s == '#' && *(s+1) == '!')
7020 #ifdef ALTERNATE_SHEBANG
7022 static char const as[] = ALTERNATE_SHEBANG;
7023 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
7024 d = s + (sizeof(as) - 1);
7026 #endif /* ALTERNATE_SHEBANG */
7035 while (*d && !isSPACE(*d))
7039 #ifdef ARG_ZERO_IS_SCRIPT
7040 if (ipathend > ipath) {
7042 * HP-UX (at least) sets argv[0] to the script name,
7043 * which makes $^X incorrect. And Digital UNIX and Linux,
7044 * at least, set argv[0] to the basename of the Perl
7045 * interpreter. So, having found "#!", we'll set it right.
7047 SV* copfilesv = CopFILESV(PL_curcop);
7050 GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
7052 assert(SvPOK(x) || SvGMAGICAL(x));
7053 if (sv_eq(x, copfilesv)) {
7054 sv_setpvn(x, ipath, ipathend - ipath);
7060 const char *bstart = SvPV_const(copfilesv, blen);
7061 const char * const lstart = SvPV_const(x, llen);
7063 bstart += blen - llen;
7064 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
7065 sv_setpvn(x, ipath, ipathend - ipath);
7072 /* Anything to do if no copfilesv? */
7074 TAINT_NOT; /* $^X is always tainted, but that's OK */
7076 #endif /* ARG_ZERO_IS_SCRIPT */
7081 d = instr(s,"perl -");
7083 d = instr(s,"perl");
7085 /* avoid getting into infinite loops when shebang
7086 * line contains "Perl" rather than "perl" */
7088 for (d = ipathend-4; d >= ipath; --d) {
7089 if (isALPHA_FOLD_EQ(*d, 'p')
7090 && !ibcmp(d, "perl", 4))
7100 #ifdef ALTERNATE_SHEBANG
7102 * If the ALTERNATE_SHEBANG on this system starts with a
7103 * character that can be part of a Perl expression, then if
7104 * we see it but not "perl", we're probably looking at the
7105 * start of Perl code, not a request to hand off to some
7106 * other interpreter. Similarly, if "perl" is there, but
7107 * not in the first 'word' of the line, we assume the line
7108 * contains the start of the Perl program.
7110 if (d && *s != '#') {
7111 const char *c = ipath;
7112 while (*c && !memCHRs("; \t\r\n\f\v#", *c))
7115 d = NULL; /* "perl" not in first word; ignore */
7117 *s = '#'; /* Don't try to parse shebang line */
7119 #endif /* ALTERNATE_SHEBANG */
7124 && !instr(s,"indir")
7125 && instr(PL_origargv[0],"perl"))
7132 while (s < PL_bufend && isSPACE(*s))
7134 if (s < PL_bufend) {
7135 Newx(newargv,PL_origargc+3,char*);
7137 while (s < PL_bufend && !isSPACE(*s))
7140 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
7143 newargv = PL_origargv;
7146 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
7148 Perl_croak(aTHX_ "Can't exec %s", ipath);
7151 while (*d && !isSPACE(*d))
7153 while (SPACE_OR_TAB(*d))
7157 const bool switches_done = PL_doswitches;
7158 const U32 oldpdb = PL_perldb;
7159 const bool oldn = PL_minus_n;
7160 const bool oldp = PL_minus_p;
7164 bool baduni = FALSE;
7166 const char *d2 = d1 + 1;
7167 if (parse_unicode_opts((const char **)&d2)
7171 if (baduni || isALPHA_FOLD_EQ(*d1, 'M')) {
7172 const char * const m = d1;
7173 while (*d1 && !isSPACE(*d1))
7175 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
7178 d1 = moreswitches(d1);
7180 if (PL_doswitches && !switches_done) {
7181 int argc = PL_origargc;
7182 char **argv = PL_origargv;
7185 } while (argc && argv[0][0] == '-' && argv[0][1]);
7186 init_argv_symbols(argc,argv);
7188 if ( (PERLDB_LINE_OR_SAVESRC && !oldpdb)
7189 || ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
7190 /* if we have already added "LINE: while (<>) {",
7191 we must not do it again */
7193 SvPVCLEAR(PL_linestr);
7194 PL_bufptr = PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
7195 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7196 PL_last_lop = PL_last_uni = NULL;
7197 PL_preambled = FALSE;
7198 if (PERLDB_LINE_OR_SAVESRC)
7199 (void)gv_fetchfile(PL_origfilename);
7207 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
7208 PL_lex_state = LEX_FORMLINE;
7209 force_next(FORMRBRACK);
7218 yyl_fatcomma(pTHX_ char *s, STRLEN len)
7222 = newSVOP(OP_CONST, 0,
7223 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
7224 pl_yylval.opval->op_private = OPpCONST_BARE;
7229 yyl_safe_bareword(pTHX_ char *s, const char lastchar)
7231 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7232 && PL_parser->saw_infix_sigil)
7234 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7235 "Operator or semicolon missing before %c%" UTF8f,
7237 UTF8fARG(UTF, strlen(PL_tokenbuf),
7239 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7240 "Ambiguous use of %c resolved as operator %c",
7241 lastchar, lastchar);
7247 yyl_constant_op(pTHX_ char *s, SV *sv, CV *cv, OP *rv2cv_op, PADOFFSET off)
7251 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7252 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
7253 if (SvTYPE(sv) == SVt_PVAV)
7254 pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7257 pl_yylval.opval->op_private = 0;
7258 pl_yylval.opval->op_folded = 1;
7259 pl_yylval.opval->op_flags |= OPf_SPECIAL;
7264 op_free(pl_yylval.opval);
7266 off ? newCVREF(0, rv2cv_op) : rv2cv_op;
7267 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7268 PL_last_lop = PL_oldbufptr;
7269 PL_last_lop_op = OP_ENTERSUB;
7271 /* Is there a prototype? */
7273 int k = yyl_subproto(aTHX_ s, cv);
7278 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
7280 force_next(off ? PRIVATEREF : BAREWORD);
7281 if (!PL_lex_allbrackets
7282 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7284 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7290 /* Honour "reserved word" warnings, and enforce strict subs */
7292 yyl_strictwarn_bareword(pTHX_ const char lastchar)
7294 /* after "print" and similar functions (corresponding to
7295 * "F? L" in opcode.pl), whatever wasn't already parsed as
7296 * a filehandle should be subject to "strict subs".
7297 * Likewise for the optional indirect-object argument to system
7298 * or exec, which can't be a bareword */
7299 if ((PL_last_lop_op == OP_PRINT
7300 || PL_last_lop_op == OP_PRTF
7301 || PL_last_lop_op == OP_SAY
7302 || PL_last_lop_op == OP_SYSTEM
7303 || PL_last_lop_op == OP_EXEC)
7304 && (PL_hints & HINT_STRICT_SUBS))
7306 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7309 if (lastchar != '-' && ckWARN(WARN_RESERVED)) {
7310 char *d = PL_tokenbuf;
7313 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0)) {
7314 /* PL_warn_reserved is constant */
7315 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
7316 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
7318 GCC_DIAG_RESTORE_STMT;
7324 yyl_just_a_word(pTHX_ char *s, STRLEN len, I32 orig_keyword, struct code c)
7327 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
7329 bool no_op_error = FALSE;
7330 /* Use this var to track whether intuit_method has been
7331 called. intuit_method returns 0 or > 255. */
7334 if (PL_expect == XOPERATOR) {
7335 if (PL_bufptr == PL_linestart) {
7336 CopLINE_dec(PL_curcop);
7337 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
7338 CopLINE_inc(PL_curcop);
7341 /* We want to call no_op with s pointing after the
7342 bareword, so defer it. But we want it to come
7343 before the Bad name croak. */
7347 /* Get the rest if it looks like a package qualifier */
7349 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
7351 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
7354 no_op("Bareword",s);
7355 no_op_error = FALSE;
7358 Perl_croak(aTHX_ "Bad name after %" UTF8f "%s",
7359 UTF8fARG(UTF, len, PL_tokenbuf),
7360 *s == '\'' ? "'" : "::");
7366 no_op("Bareword",s);
7368 /* See if the name is "Foo::",
7369 in which case Foo is a bareword
7370 (and a package name). */
7372 if (len > 2 && PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':') {
7373 if (ckWARN(WARN_BAREWORD)
7374 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
7375 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
7376 "Bareword \"%" UTF8f
7377 "\" refers to nonexistent package",
7378 UTF8fARG(UTF, len, PL_tokenbuf));
7380 PL_tokenbuf[len] = '\0';
7389 /* if we saw a global override before, get the right name */
7392 c.sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len);
7394 SV *sv = newSVpvs("CORE::GLOBAL::");
7400 /* Presume this is going to be a bareword of some sort. */
7402 pl_yylval.opval = newSVOP(OP_CONST, 0, c.sv);
7403 pl_yylval.opval->op_private = OPpCONST_BARE;
7405 /* And if "Foo::", then that's what it certainly is. */
7407 return yyl_safe_bareword(aTHX_ s, lastchar);
7410 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(c.sv));
7411 const_op->op_private = OPpCONST_BARE;
7412 c.rv2cv_op = newCVREF(OPpMAY_RETURN_CONSTANT<<8, const_op);
7416 : SvROK(c.gv) && SvTYPE(SvRV(c.gv)) == SVt_PVCV
7419 : rv2cv_op_cv(c.rv2cv_op, RV2CVOPCV_RETURN_STUB);
7422 /* See if it's the indirect object for a list operator. */
7425 && PL_oldoldbufptr < PL_bufptr
7426 && (PL_oldoldbufptr == PL_last_lop
7427 || PL_oldoldbufptr == PL_last_uni)
7428 && /* NO SKIPSPACE BEFORE HERE! */
7430 || ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7)
7433 bool immediate_paren = *s == '(';
7436 /* (Now we can afford to cross potential line boundary.) */
7439 /* intuit_method() can indirectly call lex_next_chunk(),
7442 s_off = s - SvPVX(PL_linestr);
7443 /* Two barewords in a row may indicate method call. */
7444 if ( ( isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)
7446 && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7448 /* the code at method: doesn't use s */
7451 s = SvPVX(PL_linestr) + s_off;
7453 /* If not a declared subroutine, it's an indirect object. */
7454 /* (But it's an indir obj regardless for sort.) */
7455 /* Also, if "_" follows a filetest operator, it's a bareword */
7458 ( !immediate_paren && (PL_last_lop_op == OP_SORT
7460 && (PL_last_lop_op != OP_MAPSTART
7461 && PL_last_lop_op != OP_GREPSTART))))
7462 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7463 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK)
7467 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
7468 yyl_strictwarn_bareword(aTHX_ lastchar);
7469 op_free(c.rv2cv_op);
7470 return yyl_safe_bareword(aTHX_ s, lastchar);
7474 PL_expect = XOPERATOR;
7477 /* Is this a word before a => operator? */
7478 if (*s == '=' && s[1] == '>' && !pkgname) {
7479 op_free(c.rv2cv_op);
7481 if (c.gvp || (c.lex && !c.off)) {
7482 assert (cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7483 /* This is our own scalar, created a few lines
7484 above, so this is safe. */
7485 SvREADONLY_off(c.sv);
7486 sv_setpv(c.sv, PL_tokenbuf);
7487 if (UTF && !IN_BYTES
7488 && is_utf8_string((U8*)PL_tokenbuf, len))
7490 SvREADONLY_on(c.sv);
7495 /* If followed by a paren, it's certainly a subroutine. */
7500 while (SPACE_OR_TAB(*d))
7502 if (*d == ')' && (c.sv = cv_const_sv_or_av(c.cv)))
7503 return yyl_constant_op(aTHX_ d + 1, c.sv, c.cv, c.rv2cv_op, c.off);
7505 NEXTVAL_NEXTTOKE.opval =
7506 c.off ? c.rv2cv_op : pl_yylval.opval;
7508 op_free(pl_yylval.opval), force_next(PRIVATEREF);
7509 else op_free(c.rv2cv_op), force_next(BAREWORD);
7514 /* If followed by var or block, call it a method (unless sub) */
7516 if ((*s == '$' || *s == '{') && !c.cv && FEATURE_INDIRECT_IS_ENABLED) {
7517 op_free(c.rv2cv_op);
7518 PL_last_lop = PL_oldbufptr;
7519 PL_last_lop_op = OP_METHOD;
7520 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7521 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7522 PL_expect = XBLOCKTERM;
7524 return REPORT(METHOD);
7527 /* If followed by a bareword, see if it looks like indir obj. */
7531 && (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF) || *s == '$')
7532 && (key = intuit_method(s, c.lex ? NULL : c.sv, c.cv)))
7535 if (c.lex && !c.off) {
7536 assert(cSVOPx(pl_yylval.opval)->op_sv == c.sv);
7537 SvREADONLY_off(c.sv);
7538 sv_setpvn(c.sv, PL_tokenbuf, len);
7539 if (UTF && !IN_BYTES
7540 && is_utf8_string((U8*)PL_tokenbuf, len))
7542 else SvUTF8_off(c.sv);
7544 op_free(c.rv2cv_op);
7545 if (key == METHOD && !PL_lex_allbrackets
7546 && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7548 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
7553 /* Not a method, so call it a subroutine (if defined) */
7556 /* Check for a constant sub */
7557 c.sv = cv_const_sv_or_av(c.cv);
7558 return yyl_constant_op(aTHX_ s, c.sv, c.cv, c.rv2cv_op, c.off);
7561 /* Call it a bare word */
7563 if (PL_hints & HINT_STRICT_SUBS)
7564 pl_yylval.opval->op_private |= OPpCONST_STRICT;
7566 yyl_strictwarn_bareword(aTHX_ lastchar);
7568 op_free(c.rv2cv_op);
7570 return yyl_safe_bareword(aTHX_ s, lastchar);
7574 yyl_word_or_keyword(pTHX_ char *s, STRLEN len, I32 key, I32 orig_keyword, struct code c)
7577 default: /* not a keyword */
7578 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7581 FUN0OP( newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0)) );
7585 newSVOP(OP_CONST, 0,
7586 Perl_newSVpvf(aTHX_ "%" IVdf, (IV)CopLINE(PL_curcop)))
7589 case KEY___PACKAGE__:
7591 newSVOP(OP_CONST, 0, (PL_curstash
7592 ? newSVhek(HvNAME_HEK(PL_curstash))
7598 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D'))
7599 yyl_data_handle(aTHX);
7600 return yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s);
7603 FUN0OP(CvCLONE(PL_compcv)
7604 ? newOP(OP_RUNCV, 0)
7605 : newPVOP(OP_RUNCV,0,NULL));
7614 if (PL_expect == XSTATE)
7615 return yyl_sub(aTHX_ PL_bufptr, key);
7616 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
7625 LOP(OP_ACCEPT,XTERM);
7628 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7633 LOP(OP_ATAN2,XTERM);
7639 LOP(OP_BINMODE,XTERM);
7642 LOP(OP_BLESS,XTERM);
7651 /* We have to disambiguate the two senses of
7652 "continue". If the next token is a '{' then
7653 treat it as the start of a continue block;
7654 otherwise treat it as a control operator.
7664 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
7674 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7683 if (!PL_cryptseen) {
7684 PL_cryptseen = TRUE;
7688 LOP(OP_CRYPT,XTERM);
7691 LOP(OP_CHMOD,XTERM);
7694 LOP(OP_CHOWN,XTERM);
7697 LOP(OP_CONNECT,XTERM);
7712 return yyl_do(aTHX_ s, orig_keyword);
7715 PL_hints |= HINT_BLOCK_SCOPE;
7725 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7726 STR_WITH_LEN("NDBM_File::"),
7727 STR_WITH_LEN("DB_File::"),
7728 STR_WITH_LEN("GDBM_File::"),
7729 STR_WITH_LEN("SDBM_File::"),
7730 STR_WITH_LEN("ODBM_File::"),
7732 LOP(OP_DBMOPEN,XTERM);
7744 pl_yylval.ival = CopLINE(PL_curcop);
7748 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7760 if (*s == '{') { /* block eval */
7761 PL_expect = XTERMBLOCK;
7762 UNIBRACK(OP_ENTERTRY);
7764 else { /* string eval */
7766 UNIBRACK(OP_ENTEREVAL);
7771 UNIBRACK(-OP_ENTEREVAL);
7785 case KEY_endhostent:
7791 case KEY_endservent:
7794 case KEY_endprotoent:
7805 return yyl_foreach(aTHX_ s);
7808 LOP(OP_FORMLINE,XTERM);
7817 LOP(OP_FCNTL,XTERM);
7823 LOP(OP_FLOCK,XTERM);
7826 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7831 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7836 LOP(OP_GREPSTART, XREF);
7853 case KEY_getpriority:
7854 LOP(OP_GETPRIORITY,XTERM);
7856 case KEY_getprotobyname:
7859 case KEY_getprotobynumber:
7860 LOP(OP_GPBYNUMBER,XTERM);
7862 case KEY_getprotoent:
7874 case KEY_getpeername:
7875 UNI(OP_GETPEERNAME);
7877 case KEY_gethostbyname:
7880 case KEY_gethostbyaddr:
7881 LOP(OP_GHBYADDR,XTERM);
7883 case KEY_gethostent:
7886 case KEY_getnetbyname:
7889 case KEY_getnetbyaddr:
7890 LOP(OP_GNBYADDR,XTERM);
7895 case KEY_getservbyname:
7896 LOP(OP_GSBYNAME,XTERM);
7898 case KEY_getservbyport:
7899 LOP(OP_GSBYPORT,XTERM);
7901 case KEY_getservent:
7904 case KEY_getsockname:
7905 UNI(OP_GETSOCKNAME);
7907 case KEY_getsockopt:
7908 LOP(OP_GSOCKOPT,XTERM);
7923 pl_yylval.ival = CopLINE(PL_curcop);
7924 Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
7925 "given is experimental");
7929 LOP( orig_keyword==KEY_glob ? -OP_GLOB : OP_GLOB, XTERM );
7935 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7937 pl_yylval.ival = CopLINE(PL_curcop);
7941 LOP(OP_INDEX,XTERM);
7947 LOP(OP_IOCTL,XTERM);
7950 Perl_ck_warner_d(aTHX_
7951 packWARN(WARN_EXPERIMENTAL__ISA), "isa is experimental");
7979 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7984 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7998 LOP(OP_LISTEN,XTERM);
8007 s = scan_pat(s,OP_MATCH);
8008 TERM(sublex_start());
8011 LOP(OP_MAPSTART, XREF);
8014 LOP(OP_MKDIR,XTERM);
8017 LOP(OP_MSGCTL,XTERM);
8020 LOP(OP_MSGGET,XTERM);
8023 LOP(OP_MSGRCV,XTERM);
8026 LOP(OP_MSGSND,XTERM);
8031 return yyl_my(aTHX_ s, key);
8037 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8042 s = tokenize_use(0, s);
8046 if (*s == '(' || (s = skipspace(s), *s == '('))
8049 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8050 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8056 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
8058 char *d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8059 for (t=d; isSPACE(*t);)
8061 if ( *t && memCHRs("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
8063 && !(t[0] == '=' && t[1] == '>')
8064 && !(t[0] == ':' && t[1] == ':')
8065 && !keyword(s, d-s, 0)
8067 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
8068 "Precedence problem: open %" UTF8f " should be open(%" UTF8f ")",
8069 UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
8075 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8077 pl_yylval.ival = OP_OR;
8087 LOP(OP_OPEN_DIR,XTERM);
8090 checkcomma(s,PL_tokenbuf,"filehandle");
8094 checkcomma(s,PL_tokenbuf,"filehandle");
8113 s = force_word(s,BAREWORD,FALSE,TRUE);
8115 s = force_strict_version(s);
8119 LOP(OP_PIPE_OP,XTERM);
8122 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8124 missingterm(NULL, 0);
8125 COPLINE_SET_FROM_MULTI_END;
8126 pl_yylval.ival = OP_CONST;
8127 TERM(sublex_start());
8133 return yyl_qw(aTHX_ s, len);
8136 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8138 missingterm(NULL, 0);
8139 pl_yylval.ival = OP_STRINGIFY;
8140 if (SvIVX(PL_lex_stuff) == '\'')
8141 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
8142 TERM(sublex_start());
8145 s = scan_pat(s,OP_QR);
8146 TERM(sublex_start());
8149 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
8151 missingterm(NULL, 0);
8152 pl_yylval.ival = OP_BACKTICK;
8153 TERM(sublex_start());
8159 return yyl_require(aTHX_ s, orig_keyword);
8168 LOP(OP_RENAME,XTERM);
8177 LOP(OP_RINDEX,XTERM);
8186 UNIDOR(OP_READLINE);
8189 UNIDOR(OP_BACKTICK);
8198 LOP(OP_REVERSE,XTERM);
8201 UNIDOR(OP_READLINK);
8208 if (pl_yylval.opval)
8209 TERM(sublex_start());
8211 TOKEN(1); /* force error */
8214 checkcomma(s,PL_tokenbuf,"filehandle");
8224 LOP(OP_SELECT,XTERM);
8230 LOP(OP_SEMCTL,XTERM);
8233 LOP(OP_SEMGET,XTERM);
8236 LOP(OP_SEMOP,XTERM);
8242 LOP(OP_SETPGRP,XTERM);
8244 case KEY_setpriority:
8245 LOP(OP_SETPRIORITY,XTERM);
8247 case KEY_sethostent:
8253 case KEY_setservent:
8256 case KEY_setprotoent:
8266 LOP(OP_SEEKDIR,XTERM);
8268 case KEY_setsockopt:
8269 LOP(OP_SSOCKOPT,XTERM);
8275 LOP(OP_SHMCTL,XTERM);
8278 LOP(OP_SHMGET,XTERM);
8281 LOP(OP_SHMREAD,XTERM);
8284 LOP(OP_SHMWRITE,XTERM);
8287 LOP(OP_SHUTDOWN,XTERM);
8296 LOP(OP_SOCKET,XTERM);
8298 case KEY_socketpair:
8299 LOP(OP_SOCKPAIR,XTERM);
8302 checkcomma(s,PL_tokenbuf,"subroutine name");
8305 s = force_word(s,BAREWORD,TRUE,TRUE);
8309 LOP(OP_SPLIT,XTERM);
8312 LOP(OP_SPRINTF,XTERM);
8315 LOP(OP_SPLICE,XTERM);
8330 LOP(OP_SUBSTR,XTERM);
8334 return yyl_sub(aTHX_ s, key);
8337 LOP(OP_SYSTEM,XREF);
8340 LOP(OP_SYMLINK,XTERM);
8343 LOP(OP_SYSCALL,XTERM);
8346 LOP(OP_SYSOPEN,XTERM);
8349 LOP(OP_SYSSEEK,XTERM);
8352 LOP(OP_SYSREAD,XTERM);
8355 LOP(OP_SYSWRITE,XTERM);
8360 TERM(sublex_start());
8381 LOP(OP_TRUNCATE,XTERM);
8393 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8395 pl_yylval.ival = CopLINE(PL_curcop);
8399 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8401 pl_yylval.ival = CopLINE(PL_curcop);
8405 LOP(OP_UNLINK,XTERM);
8411 LOP(OP_UNPACK,XTERM);
8414 LOP(OP_UTIME,XTERM);
8420 LOP(OP_UNSHIFT,XTERM);
8423 s = tokenize_use(1, s);
8433 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8435 pl_yylval.ival = CopLINE(PL_curcop);
8436 Perl_ck_warner_d(aTHX_
8437 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8438 "when is experimental");
8442 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8444 pl_yylval.ival = CopLINE(PL_curcop);
8448 PL_hints |= HINT_BLOCK_SCOPE;
8455 LOP(OP_WAITPID,XTERM);
8461 /* Make sure $^L is defined. 0x0C is CTRL-L on ASCII platforms, and
8462 * we use the same number on EBCDIC */
8463 gv_fetchpvs("\x0C", GV_ADD|GV_NOTQUAL, SVt_PV);
8467 if (PL_expect == XOPERATOR) {
8468 if (*s == '=' && !PL_lex_allbrackets
8469 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8476 return yyl_just_a_word(aTHX_ s, len, orig_keyword, c);
8479 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8481 pl_yylval.ival = OP_XOR;
8487 yyl_key_core(pTHX_ char *s, STRLEN len, struct code c)
8490 I32 orig_keyword = 0;
8494 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8495 if ((*s == ':' && s[1] == ':')
8496 || (!(key = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
8498 Copy(PL_bufptr, PL_tokenbuf, olen, char);
8499 return yyl_just_a_word(aTHX_ d, olen, 0, c);
8502 Perl_croak(aTHX_ "CORE::%" UTF8f " is not a keyword",
8503 UTF8fARG(UTF, len, PL_tokenbuf));
8506 else if (key == KEY_require || key == KEY_do
8508 /* that's a way to remember we saw "CORE::" */
8511 /* Known to be a reserved word at this point */
8512 return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8516 yyl_keylookup(pTHX_ char *s, GV *gv)
8522 struct code c = no_code;
8523 I32 orig_keyword = 0;
8529 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8531 /* Some keywords can be followed by any delimiter, including ':' */
8532 anydelim = word_takes_any_delimiter(PL_tokenbuf, len);
8534 /* x::* is just a word, unless x is "CORE" */
8535 if (!anydelim && *s == ':' && s[1] == ':') {
8536 if (memEQs(PL_tokenbuf, len, "CORE"))
8537 return yyl_key_core(aTHX_ s, len, c);
8538 return yyl_just_a_word(aTHX_ s, len, 0, c);
8542 while (d < PL_bufend && isSPACE(*d))
8543 d++; /* no comments skipped here, or s### is misparsed */
8545 /* Is this a word before a => operator? */
8546 if (*d == '=' && d[1] == '>') {
8547 return yyl_fatcomma(aTHX_ s, len);
8550 /* Check for plugged-in keyword */
8554 char *saved_bufptr = PL_bufptr;
8556 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
8558 if (result == KEYWORD_PLUGIN_DECLINE) {
8559 /* not a plugged-in keyword */
8560 PL_bufptr = saved_bufptr;
8561 } else if (result == KEYWORD_PLUGIN_STMT) {
8562 pl_yylval.opval = o;
8564 if (!PL_nexttoke) PL_expect = XSTATE;
8565 return REPORT(PLUGSTMT);
8566 } else if (result == KEYWORD_PLUGIN_EXPR) {
8567 pl_yylval.opval = o;
8569 if (!PL_nexttoke) PL_expect = XOPERATOR;
8570 return REPORT(PLUGEXPR);
8572 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'", PL_tokenbuf);
8576 /* Is this a label? */
8577 if (!anydelim && PL_expect == XSTATE
8578 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
8581 newSVOP(OP_CONST, 0,
8582 newSVpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0));
8587 /* Check for lexical sub */
8588 if (PL_expect != XOPERATOR) {
8589 char tmpbuf[sizeof PL_tokenbuf + 1];
8591 Copy(PL_tokenbuf, tmpbuf+1, len, char);
8592 c.off = pad_findmy_pvn(tmpbuf, len+1, 0);
8593 if (c.off != NOT_IN_PAD) {
8594 assert(c.off); /* we assume this is boolean-true below */
8595 if (PAD_COMPNAME_FLAGS_isOUR(c.off)) {
8596 HV * const stash = PAD_COMPNAME_OURSTASH(c.off);
8597 HEK * const stashname = HvNAME_HEK(stash);
8598 c.sv = newSVhek(stashname);
8599 sv_catpvs(c.sv, "::");
8600 sv_catpvn_flags(c.sv, PL_tokenbuf, len,
8601 (UTF ? SV_CATUTF8 : SV_CATBYTES));
8602 c.gv = gv_fetchsv(c.sv, GV_NOADD_NOINIT | SvUTF8(c.sv),
8608 return yyl_just_a_word(aTHX_ s, len, 0, c);
8612 c.rv2cv_op = newOP(OP_PADANY, 0);
8613 c.rv2cv_op->op_targ = c.off;
8614 c.cv = find_lexical_cv(c.off);
8617 return yyl_just_a_word(aTHX_ s, len, 0, c);
8622 /* Check for built-in keyword */
8623 key = keyword(PL_tokenbuf, len, 0);
8626 key = yyl_secondclass_keyword(aTHX_ s, len, key, &orig_keyword, &c.gv, &c.gvp);
8628 if (key && key != KEY___DATA__ && key != KEY___END__
8629 && (!anydelim || *s != '#')) {
8630 /* no override, and not s### either; skipspace is safe here
8631 * check for => on following line */
8633 STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
8634 STRLEN soff = s - SvPVX(PL_linestr);
8636 arrow = *s == '=' && s[1] == '>';
8637 PL_bufptr = SvPVX(PL_linestr) + bufoff;
8638 s = SvPVX(PL_linestr) + soff;
8640 return yyl_fatcomma(aTHX_ s, len);
8643 return yyl_word_or_keyword(aTHX_ s, len, key, orig_keyword, c);
8647 yyl_try(pTHX_ char *s)
8656 if (UTF ? isIDFIRST_utf8_safe(s, PL_bufend) : isALNUMC(*s)) {
8657 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
8661 yyl_croak_unrecognised(aTHX_ s);
8665 /* emulate EOF on ^D or ^Z */
8666 if ((tok = yyl_fake_eof(aTHX_ LEX_FAKE_EOF, FALSE, s)) != YYL_RETRY)
8673 if ((!PL_rsfp || PL_lex_inwhat)
8674 && (!PL_parser->filtered || s+1 < PL_bufend)) {
8678 && PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF)
8680 yyerror((const char *)
8682 ? "Format not terminated"
8683 : "Missing right curly or square bracket"));
8686 PerlIO_printf(Perl_debug_log, "### Tokener got EOF\n");
8690 if (s++ < PL_bufend)
8691 goto retry; /* ignore stray nulls */
8694 if (!PL_in_eval && !PL_preambled) {
8695 PL_preambled = TRUE;
8697 /* Generate a string of Perl code to load the debugger.
8698 * If PERL5DB is set, it will return the contents of that,
8699 * otherwise a compile-time require of perl5db.pl. */
8701 const char * const pdb = PerlEnv_getenv("PERL5DB");
8704 sv_setpv(PL_linestr, pdb);
8705 sv_catpvs(PL_linestr,";");
8707 SETERRNO(0,SS_NORMAL);
8708 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
8710 PL_parser->preambling = CopLINE(PL_curcop);
8712 SvPVCLEAR(PL_linestr);
8713 if (PL_preambleav) {
8714 SV **svp = AvARRAY(PL_preambleav);
8715 SV **const end = svp + AvFILLp(PL_preambleav);
8717 sv_catsv(PL_linestr, *svp);
8719 sv_catpvs(PL_linestr, ";");
8721 sv_free(MUTABLE_SV(PL_preambleav));
8722 PL_preambleav = NULL;
8725 sv_catpvs(PL_linestr,
8726 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
8727 if (PL_minus_n || PL_minus_p) {
8728 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
8730 sv_catpvs(PL_linestr,"chomp;");
8733 if ( ( *PL_splitstr == '/'
8734 || *PL_splitstr == '\''
8735 || *PL_splitstr == '"')
8736 && strchr(PL_splitstr + 1, *PL_splitstr))
8738 /* strchr is ok, because -F pattern can't contain
8740 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
8743 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
8744 bytes can be used as quoting characters. :-) */
8745 const char *splits = PL_splitstr;
8746 sv_catpvs(PL_linestr, "our @F=split(q\0");
8749 if (*splits == '\\')
8750 sv_catpvn(PL_linestr, splits, 1);
8751 sv_catpvn(PL_linestr, splits, 1);
8752 } while (*splits++);
8753 /* This loop will embed the trailing NUL of
8754 PL_linestr as the last thing it does before
8756 sv_catpvs(PL_linestr, ");");
8760 sv_catpvs(PL_linestr,"our @F=split(' ');");
8763 sv_catpvs(PL_linestr, "\n");
8764 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
8765 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
8766 PL_last_lop = PL_last_uni = NULL;
8767 if (PERLDB_LINE_OR_SAVESRC && PL_curstash != PL_debstash)
8768 update_debugger_info(PL_linestr, NULL, 0);
8771 if ((tok = yyl_fake_eof(aTHX_ 0, cBOOL(PL_rsfp), s)) != YYL_RETRY)
8776 #ifdef PERL_STRICT_CR
8777 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
8779 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
8781 case ' ': case '\t': case '\f': case '\v':
8787 const bool needs_semicolon = yyl_eol_needs_semicolon(aTHX_ &s);
8788 if (needs_semicolon)
8795 return yyl_hyphen(aTHX_ s);
8798 return yyl_plus(aTHX_ s);
8801 return yyl_star(aTHX_ s);
8804 return yyl_percent(aTHX_ s);
8807 return yyl_caret(aTHX_ s);
8810 return yyl_leftsquare(aTHX_ s);
8813 return yyl_tilde(aTHX_ s);
8816 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
8822 return yyl_just_a_word(aTHX_ s, 0, 0, no_code);
8823 return yyl_colon(aTHX_ s + 1);
8826 return yyl_leftparen(aTHX_ s + 1);
8829 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8837 return yyl_rightparen(aTHX_ s);
8840 return yyl_rightsquare(aTHX_ s);
8843 return yyl_leftcurly(aTHX_ s + 1, 0);
8846 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
8848 return yyl_rightcurly(aTHX_ s, 0);
8851 return yyl_ampersand(aTHX_ s);
8854 return yyl_verticalbar(aTHX_ s);
8857 if (s[1] == '=' && (s == PL_linestart || s[-1] == '\n')
8858 && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "====="))
8860 s = vcs_conflict_marker(s + 7);
8866 const char tmp = *s++;
8868 if (!PL_lex_allbrackets
8869 && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8877 if (!PL_lex_allbrackets
8878 && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
8887 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
8888 && memCHRs("+-*/%.^&|<",tmp))
8889 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
8890 "Reversed %c= operator",(int)tmp);
8892 if (PL_expect == XSTATE
8894 && (s == PL_linestart+1 || s[-2] == '\n') )
8896 if ( (PL_in_eval && !PL_rsfp && !PL_parser->filtered)
8897 || PL_lex_state != LEX_NORMAL)
8902 incline(s, PL_bufend);
8903 if (memBEGINs(s, (STRLEN) (PL_bufend - s), "=cut"))
8905 s = (char *) memchr(s,'\n', d - s);
8910 incline(s, PL_bufend);
8918 PL_parser->in_pod = 1;
8922 if (PL_expect == XBLOCK) {
8924 #ifdef PERL_STRICT_CR
8925 while (SPACE_OR_TAB(*t))
8927 while (SPACE_OR_TAB(*t) || *t == '\r')
8930 if (*t == '\n' || *t == '#') {
8931 ENTER_with_name("lex_format");
8932 SAVEI8(PL_parser->form_lex_state);
8933 SAVEI32(PL_lex_formbrack);
8934 PL_parser->form_lex_state = PL_lex_state;
8935 PL_lex_formbrack = PL_lex_brackets + 1;
8936 PL_parser->sub_error_count = PL_error_count;
8937 return yyl_leftcurly(aTHX_ s, 1);
8940 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
8948 return yyl_bang(aTHX_ s + 1);
8951 if (s[1] == '<' && (s == PL_linestart || s[-1] == '\n')
8952 && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), "<<<<<"))
8954 s = vcs_conflict_marker(s + 7);
8957 return yyl_leftpointy(aTHX_ s);
8960 if (s[1] == '>' && (s == PL_linestart || s[-1] == '\n')
8961 && memBEGINs(s + 2, (STRLEN) (PL_bufend - (s + 2)), ">>>>>"))
8963 s = vcs_conflict_marker(s + 7);
8966 return yyl_rightpointy(aTHX_ s + 1);
8969 return yyl_dollar(aTHX_ s);
8972 return yyl_snail(aTHX_ s);
8974 case '/': /* may be division, defined-or, or pattern */
8975 return yyl_slash(aTHX_ s);
8977 case '?': /* conditional */
8979 if (!PL_lex_allbrackets
8980 && PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE)
8985 PL_lex_allbrackets++;
8989 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
8990 #ifdef PERL_STRICT_CR
8993 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
8995 && (s == PL_linestart || s[-1] == '\n') )
8998 /* formbrack==2 means dot seen where arguments expected */
8999 return yyl_rightcurly(aTHX_ s, 2);
9001 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
9005 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
9008 if (!PL_lex_allbrackets
9009 && PL_lex_fakeeof >= LEX_FAKEEOF_RANGE)
9017 pl_yylval.ival = OPf_SPECIAL;
9023 if (*s == '=' && !PL_lex_allbrackets
9024 && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
9032 case '0': case '1': case '2': case '3': case '4':
9033 case '5': case '6': case '7': case '8': case '9':
9034 s = scan_num(s, &pl_yylval);
9035 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
9036 if (PL_expect == XOPERATOR)
9041 return yyl_sglquote(aTHX_ s);
9044 return yyl_dblquote(aTHX_ s);
9047 return yyl_backtick(aTHX_ s);
9050 return yyl_backslash(aTHX_ s + 1);
9053 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
9054 char *start = s + 2;
9055 while (isDIGIT(*start) || *start == '_')
9057 if (*start == '.' && isDIGIT(start[1])) {
9058 s = scan_num(s, &pl_yylval);
9061 else if ((*start == ':' && start[1] == ':')
9062 || (PL_expect == XSTATE && *start == ':')) {
9063 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9067 else if (PL_expect == XSTATE) {
9069 while (d < PL_bufend && isSPACE(*d)) d++;
9071 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9076 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
9077 if (!isALPHA(*start) && (PL_expect == XTERM
9078 || PL_expect == XREF || PL_expect == XSTATE
9079 || PL_expect == XTERMORDORDOR)) {
9080 GV *const gv = gv_fetchpvn_flags(s, start - s,
9081 UTF ? SVf_UTF8 : 0, SVt_PVCV);
9083 s = scan_num(s, &pl_yylval);
9088 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9093 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
9097 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9128 if ((tok = yyl_keylookup(aTHX_ s, gv)) != YYL_RETRY)
9138 Works out what to call the token just pulled out of the input
9139 stream. The yacc parser takes care of taking the ops we return and
9140 stitching them into a tree.
9143 The type of the next token
9146 Check if we have already built the token; if so, use it.
9147 Switch based on the current state:
9148 - if we have a case modifier in a string, deal with that
9149 - handle other cases of interpolation inside a string
9150 - scan the next line if we are inside a format
9151 In the normal state, switch on the next character:
9153 if alphabetic, go to key lookup
9154 unrecognized character - croak
9155 - 0/4/26: handle end-of-line or EOF
9156 - cases for whitespace
9157 - \n and #: handle comments and line numbers
9158 - various operators, brackets and sigils
9161 - 'v': vstrings (or go to key lookup)
9162 - 'x' repetition operator (or go to key lookup)
9163 - other ASCII alphanumerics (key lookup begins here):
9166 scan built-in keyword (but do nothing with it yet)
9167 check for statement label
9168 check for lexical subs
9169 return yyl_just_a_word if there is one
9170 see whether built-in keyword is overridden
9171 switch on keyword number:
9172 - default: return yyl_just_a_word:
9173 not a built-in keyword; handle bareword lookup
9174 disambiguate between method and sub call
9175 fall back to bareword
9176 - cases for built-in keywords
9180 #define RSFP_FILENO (PL_rsfp)
9182 #define RSFP_FILENO (PerlIO_fileno(PL_rsfp))
9190 char *s = PL_bufptr;
9192 if (UNLIKELY(PL_parser->recheck_utf8_validity)) {
9193 const U8* first_bad_char_loc;
9194 if (UTF && UNLIKELY(! is_utf8_string_loc((U8 *) PL_bufptr,
9195 PL_bufend - PL_bufptr,
9196 &first_bad_char_loc)))
9198 _force_out_malformed_utf8_message(first_bad_char_loc,
9201 1 /* 1 means die */ );
9202 NOT_REACHED; /* NOTREACHED */
9204 PL_parser->recheck_utf8_validity = FALSE;
9207 SV* tmp = newSVpvs("");
9208 PerlIO_printf(Perl_debug_log, "### %" IVdf ":LEX_%s/X%s %s\n",
9209 (IV)CopLINE(PL_curcop),
9210 lex_state_names[PL_lex_state],
9211 exp_name[PL_expect],
9212 pv_display(tmp, s, strlen(s), 0, 60));
9216 /* when we've already built the next token, just pull it out of the queue */
9219 pl_yylval = PL_nextval[PL_nexttoke];
9222 next_type = PL_nexttype[PL_nexttoke];
9223 if (next_type & (7<<24)) {
9224 if (next_type & (1<<24)) {
9225 if (PL_lex_brackets > 100)
9226 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
9227 PL_lex_brackstack[PL_lex_brackets++] =
9228 (char) ((next_type >> 16) & 0xff);
9230 if (next_type & (2<<24))
9231 PL_lex_allbrackets++;
9232 if (next_type & (4<<24))
9233 PL_lex_allbrackets--;
9234 next_type &= 0xffff;
9236 return REPORT(next_type == 'p' ? pending_ident() : next_type);
9240 switch (PL_lex_state) {
9242 case LEX_INTERPNORMAL:
9245 /* interpolated case modifiers like \L \U, including \Q and \E.
9246 when we get here, PL_bufptr is at the \
9248 case LEX_INTERPCASEMOD:
9249 /* handle \E or end of string */
9250 return yyl_interpcasemod(aTHX_ s);
9252 case LEX_INTERPPUSH:
9253 return REPORT(sublex_push());
9255 case LEX_INTERPSTART:
9256 if (PL_bufptr == PL_bufend)
9257 return REPORT(sublex_done());
9259 if(*PL_bufptr != '(')
9260 PerlIO_printf(Perl_debug_log, "### Interpolated variable\n");
9263 /* for /@a/, we leave the joining for the regex engine to do
9264 * (unless we're within \Q etc) */
9265 PL_lex_dojoin = (*PL_bufptr == '@'
9266 && (!PL_lex_inpat || PL_lex_casemods));
9267 PL_lex_state = LEX_INTERPNORMAL;
9268 if (PL_lex_dojoin) {
9269 NEXTVAL_NEXTTOKE.ival = 0;
9271 force_ident("\"", '$');
9272 NEXTVAL_NEXTTOKE.ival = 0;
9274 NEXTVAL_NEXTTOKE.ival = 0;
9275 force_next((2<<24)|'(');
9276 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
9279 /* Convert (?{...}) and friends to 'do {...}' */
9280 if (PL_lex_inpat && *PL_bufptr == '(') {
9281 PL_parser->lex_shared->re_eval_start = PL_bufptr;
9283 if (*PL_bufptr != '{')
9285 PL_expect = XTERMBLOCK;
9289 if (PL_lex_starts++) {
9291 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9292 if (!PL_lex_casemods && PL_lex_inpat)
9295 AopNOASSIGN(OP_CONCAT);
9299 case LEX_INTERPENDMAYBE:
9300 if (intuit_more(PL_bufptr, PL_bufend)) {
9301 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
9307 if (PL_lex_dojoin) {
9308 const U8 dojoin_was = PL_lex_dojoin;
9309 PL_lex_dojoin = FALSE;
9310 PL_lex_state = LEX_INTERPCONCAT;
9311 PL_lex_allbrackets--;
9312 return REPORT(dojoin_was == 1 ? (int)')' : (int)POSTJOIN);
9314 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
9315 && SvEVALED(PL_lex_repl))
9317 if (PL_bufptr != PL_bufend)
9318 Perl_croak(aTHX_ "Bad evalled substitution pattern");
9321 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
9322 re_eval_str. If the here-doc body’s length equals the previous
9323 value of re_eval_start, re_eval_start will now be null. So
9324 check re_eval_str as well. */
9325 if (PL_parser->lex_shared->re_eval_start
9326 || PL_parser->lex_shared->re_eval_str) {
9328 if (*PL_bufptr != ')')
9329 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
9331 /* having compiled a (?{..}) expression, return the original
9332 * text too, as a const */
9333 if (PL_parser->lex_shared->re_eval_str) {
9334 sv = PL_parser->lex_shared->re_eval_str;
9335 PL_parser->lex_shared->re_eval_str = NULL;
9337 PL_bufptr - PL_parser->lex_shared->re_eval_start);
9338 SvPV_shrink_to_cur(sv);
9340 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
9341 PL_bufptr - PL_parser->lex_shared->re_eval_start);
9342 NEXTVAL_NEXTTOKE.opval =
9343 newSVOP(OP_CONST, 0,
9346 PL_parser->lex_shared->re_eval_start = NULL;
9352 case LEX_INTERPCONCAT:
9354 if (PL_lex_brackets)
9355 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
9356 (long) PL_lex_brackets);
9358 if (PL_bufptr == PL_bufend)
9359 return REPORT(sublex_done());
9361 /* m'foo' still needs to be parsed for possible (?{...}) */
9362 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
9363 SV *sv = newSVsv(PL_linestr);
9365 pl_yylval.opval = newSVOP(OP_CONST, 0, sv);
9369 int save_error_count = PL_error_count;
9371 s = scan_const(PL_bufptr);
9373 /* Set flag if this was a pattern and there were errors. op.c will
9374 * refuse to compile a pattern with this flag set. Otherwise, we
9375 * could get segfaults, etc. */
9376 if (PL_lex_inpat && PL_error_count > save_error_count) {
9377 ((PMOP*)PL_lex_inpat)->op_pmflags |= PMf_HAS_ERROR;
9380 PL_lex_state = LEX_INTERPCASEMOD;
9382 PL_lex_state = LEX_INTERPSTART;
9385 if (s != PL_bufptr) {
9386 NEXTVAL_NEXTTOKE = pl_yylval;
9389 if (PL_lex_starts++) {
9390 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
9391 if (!PL_lex_casemods && PL_lex_inpat)
9394 AopNOASSIGN(OP_CONCAT);
9404 if (PL_parser->sub_error_count != PL_error_count) {
9405 /* There was an error parsing a formline, which tends to
9407 Unlike interpolated sub-parsing, we can't treat any of
9408 these as recoverable, so no need to check sub_no_recover.
9412 assert(PL_lex_formbrack);
9413 s = scan_formline(PL_bufptr);
9414 if (!PL_lex_formbrack)
9415 return yyl_rightcurly(aTHX_ s, 1);
9420 /* We really do *not* want PL_linestr ever becoming a COW. */
9421 assert (!SvIsCOW(PL_linestr));
9423 PL_oldoldbufptr = PL_oldbufptr;
9426 if (PL_in_my == KEY_sigvar) {
9427 PL_parser->saw_infix_sigil = 0;
9428 return yyl_sigvar(aTHX_ s);
9432 /* yyl_try() and its callees might consult PL_parser->saw_infix_sigil.
9433 On its return, we then need to set it to indicate whether the token
9434 we just encountered was an infix operator that (if we hadn't been
9435 expecting an operator) have been a sigil.
9437 bool expected_operator = (PL_expect == XOPERATOR);
9438 int ret = yyl_try(aTHX_ s);
9439 switch (pl_yylval.ival) {
9444 if (expected_operator) {
9445 PL_parser->saw_infix_sigil = 1;
9450 PL_parser->saw_infix_sigil = 0;
9460 Looks up an identifier in the pad or in a package
9462 PL_in_my == KEY_sigvar indicates that this is a subroutine signature variable
9463 rather than a plain pad var.
9466 PRIVATEREF if this is a lexical name.
9467 BAREWORD if this belongs to a package.
9470 if we're in a my declaration
9471 croak if they tried to say my($foo::bar)
9472 build the ops for a my() declaration
9473 if it's an access to a my() variable
9474 build ops for access to a my() variable
9475 if in a dq string, and they've said @foo and we can't find @foo
9477 build ops for a bareword
9481 S_pending_ident(pTHX)
9484 const char pit = (char)pl_yylval.ival;
9485 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
9486 /* All routes through this function want to know if there is a colon. */
9487 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
9489 DEBUG_T({ PerlIO_printf(Perl_debug_log,
9490 "### Pending identifier '%s'\n", PL_tokenbuf); });
9491 assert(tokenbuf_len >= 2);
9493 /* if we're in a my(), we can't allow dynamics here.
9494 $foo'bar has already been turned into $foo::bar, so
9495 just check for colons.
9497 if it's a legal name, the OP is a PADANY.
9500 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9502 /* diag_listed_as: No package name allowed for variable %s
9504 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
9506 *PL_tokenbuf=='&' ? "subroutine" : "variable",
9507 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
9508 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
9513 /* "my" variable %s can't be in a package */
9514 /* PL_no_myglob is constant */
9515 GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
9516 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
9517 PL_in_my == KEY_my ? "my" : "state",
9518 *PL_tokenbuf == '&' ? "subroutine" : "variable",
9520 UTF ? SVf_UTF8 : 0);
9521 GCC_DIAG_RESTORE_STMT;
9524 if (PL_in_my == KEY_sigvar) {
9525 /* A signature 'padop' needs in addition, an op_first to
9526 * point to a child sigdefelem, and an extra field to hold
9527 * the signature index. We can achieve both by using an
9528 * UNOP_AUX and (ab)using the op_aux field to hold the
9529 * index. If we ever need more fields, use a real malloced
9530 * aux strut instead.
9532 o = newUNOP_AUX(OP_ARGELEM, 0, NULL,
9533 INT2PTR(UNOP_AUX_item *,
9534 (PL_parser->sig_elems)));
9535 o->op_private |= ( PL_tokenbuf[0] == '$' ? OPpARGELEM_SV
9536 : PL_tokenbuf[0] == '@' ? OPpARGELEM_AV
9540 o = newOP(OP_PADANY, 0);
9541 o->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
9542 UTF ? SVf_UTF8 : 0);
9543 if (PL_in_my == KEY_sigvar)
9546 pl_yylval.opval = o;
9552 build the ops for accesses to a my() variable.
9557 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9559 if (tmp != NOT_IN_PAD) {
9560 /* might be an "our" variable" */
9561 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
9562 /* build ops for a bareword */
9563 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9564 HEK * const stashname = HvNAME_HEK(stash);
9565 SV * const sym = newSVhek(stashname);
9566 sv_catpvs(sym, "::");
9567 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
9568 pl_yylval.opval = newSVOP(OP_CONST, 0, sym);
9569 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9573 ((PL_tokenbuf[0] == '$') ? SVt_PV
9574 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9579 pl_yylval.opval = newOP(OP_PADANY, 0);
9580 pl_yylval.opval->op_targ = tmp;
9586 Whine if they've said @foo or @foo{key} in a doublequoted string,
9587 and @foo (or %foo) isn't a variable we can find in the symbol
9590 if (ckWARN(WARN_AMBIGUOUS)
9592 && PL_lex_state != LEX_NORMAL
9593 && !PL_lex_brackets)
9595 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9596 ( UTF ? SVf_UTF8 : 0 ) | GV_ADDMG,
9598 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
9601 /* Downgraded from fatal to warning 20000522 mjd */
9602 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
9603 "Possible unintended interpolation of %" UTF8f
9605 UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
9609 /* build ops for a bareword */
9610 pl_yylval.opval = newSVOP(OP_CONST, 0,
9611 newSVpvn_flags(PL_tokenbuf + 1,
9612 tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9613 UTF ? SVf_UTF8 : 0 ));
9614 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9616 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len > 0 ? tokenbuf_len - 1 : 0,
9617 (PL_in_eval ? GV_ADDMULTI : GV_ADD)
9618 | ( UTF ? SVf_UTF8 : 0 ),
9619 ((PL_tokenbuf[0] == '$') ? SVt_PV
9620 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9626 S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
9628 PERL_ARGS_ASSERT_CHECKCOMMA;
9630 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
9631 if (ckWARN(WARN_SYNTAX)) {
9634 for (w = s+2; *w && level; w++) {
9642 /* the list of chars below is for end of statements or
9643 * block / parens, boolean operators (&&, ||, //) and branch
9644 * constructs (or, and, if, until, unless, while, err, for).
9645 * Not a very solid hack... */
9646 if (!*w || !memCHRs(";&/|})]oaiuwef!=", *w))
9647 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9648 "%s (...) interpreted as function",name);
9651 while (s < PL_bufend && isSPACE(*s))
9655 while (s < PL_bufend && isSPACE(*s))
9657 if (isIDFIRST_lazy_if_safe(s, PL_bufend, UTF)) {
9658 const char * const w = s;
9659 s += UTF ? UTF8SKIP(s) : 1;
9660 while (isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
9661 s += UTF ? UTF8SKIP(s) : 1;
9662 while (s < PL_bufend && isSPACE(*s))
9666 if (keyword(w, s - w, 0))
9669 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
9670 if (gv && GvCVu(gv))
9675 Copy(w, tmpbuf+1, s - w, char);
9677 off = pad_findmy_pvn(tmpbuf, s-w+1, 0);
9678 if (off != NOT_IN_PAD) return;
9680 Perl_croak(aTHX_ "No comma allowed after %s", what);
9685 /* S_new_constant(): do any overload::constant lookup.
9687 Either returns sv, or mortalizes/frees sv and returns a new SV*.
9688 Best used as sv=new_constant(..., sv, ...).
9689 If s, pv are NULL, calls subroutine with one argument,
9690 and <type> is used with error messages only.
9691 <type> is assumed to be well formed UTF-8.
9693 If error_msg is not NULL, *error_msg will be set to any error encountered.
9694 Otherwise yyerror() will be used to output it */
9697 S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9698 SV *sv, SV *pv, const char *type, STRLEN typelen,
9699 const char ** error_msg)
9702 HV * table = GvHV(PL_hintgv); /* ^H */
9707 const char *why1 = "", *why2 = "", *why3 = "";
9708 const char * optional_colon = ":"; /* Only some messages have a colon */
9711 PERL_ARGS_ASSERT_NEW_CONSTANT;
9712 /* We assume that this is true: */
9715 sv_2mortal(sv); /* Parent created it permanently */
9718 || ! (PL_hints & HINT_LOCALIZE_HH))
9721 optional_colon = "";
9725 cvp = hv_fetch(table, key, keylen, FALSE);
9726 if (!cvp || !SvOK(*cvp)) {
9729 why3 = "} is not defined";
9735 pv = newSVpvn_flags(s, len, SVs_TEMP);
9737 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
9739 typesv = &PL_sv_undef;
9741 PUSHSTACKi(PERLSI_OVERLOAD);
9753 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
9757 /* Check the eval first */
9758 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
9760 const char * errstr;
9761 sv_catpvs(errsv, "Propagated");
9762 errstr = SvPV_const(errsv, errlen);
9763 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
9765 res = SvREFCNT_inc_simple_NN(sv);
9769 SvREFCNT_inc_simple_void_NN(res);
9782 (void)sv_2mortal(sv);
9784 why1 = "Call to &{$^H{";
9786 why3 = "}} did not return a defined value";
9790 msg = Perl_form(aTHX_ "Constant(%.*s)%s %s%s%s",
9791 (int)(type ? typelen : len),
9799 yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9801 return SvREFCNT_inc_simple_NN(sv);
9804 PERL_STATIC_INLINE void
9805 S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
9806 bool is_utf8, bool check_dollar, bool tick_warn)
9809 const char *olds = *s;
9810 PERL_ARGS_ASSERT_PARSE_IDENT;
9812 while (*s < PL_bufend) {
9814 Perl_croak(aTHX_ "%s", ident_too_long);
9815 if (is_utf8 && isIDFIRST_utf8_safe(*s, PL_bufend)) {
9816 /* The UTF-8 case must come first, otherwise things
9817 * like c\N{COMBINING TILDE} would start failing, as the
9818 * isWORDCHAR_A case below would gobble the 'c' up.
9821 char *t = *s + UTF8SKIP(*s);
9822 while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
9825 if (*d + (t - *s) > e)
9826 Perl_croak(aTHX_ "%s", ident_too_long);
9827 Copy(*s, *d, t - *s, char);
9831 else if ( isWORDCHAR_A(**s) ) {
9834 } while (isWORDCHAR_A(**s) && *d < e);
9836 else if ( allow_package
9838 && isIDFIRST_lazy_if_safe((*s)+1, PL_bufend, is_utf8))
9845 else if (allow_package && **s == ':' && (*s)[1] == ':'
9846 /* Disallow things like Foo::$bar. For the curious, this is
9847 * the code path that triggers the "Bad name after" warning
9848 * when looking for barewords.
9850 && !(check_dollar && (*s)[2] == '$')) {
9857 if (UNLIKELY(tick_warn && saw_tick && PL_lex_state == LEX_INTERPNORMAL
9858 && !PL_lex_brackets && ckWARN(WARN_SYNTAX))) {
9861 Newx(this_d, *s - olds + saw_tick + 2, char); /* +2 for $# */
9864 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9865 "Old package separator used in string");
9866 if (olds[-1] == '#')
9870 if (*olds == '\'') {
9877 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
9878 "\t(Did you mean \"%" UTF8f "\" instead?)\n",
9879 UTF8fARG(is_utf8, d2-this_d, this_d));
9884 /* Returns a NUL terminated string, with the length of the string written to
9888 Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
9891 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9892 bool is_utf8 = cBOOL(UTF);
9894 PERL_ARGS_ASSERT_SCAN_WORD;
9896 parse_ident(&s, &d, e, allow_package, is_utf8, TRUE, FALSE);
9902 /* Is the byte 'd' a legal single character identifier name? 'u' is true
9903 * iff Unicode semantics are to be used. The legal ones are any of:
9904 * a) all ASCII characters except:
9905 * 1) control and space-type ones, like NUL, SOH, \t, and SPACE;
9907 * The final case currently doesn't get this far in the program, so we
9908 * don't test for it. If that were to change, it would be ok to allow it.
9909 * b) When not under Unicode rules, any upper Latin1 character
9910 * c) Otherwise, when unicode rules are used, all XIDS characters.
9912 * Because all ASCII characters have the same representation whether
9913 * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
9914 * '{' without knowing if is UTF-8 or not. */
9915 #define VALID_LEN_ONE_IDENT(s, e, is_utf8) \
9916 (isGRAPH_A(*(s)) || ((is_utf8) \
9917 ? isIDFIRST_utf8_safe(s, e) \
9919 && LIKELY((U8) *(s) != LATIN1_TO_NATIVE(0xAD)))))
9922 S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
9924 I32 herelines = PL_parser->herelines;
9925 SSize_t bracket = -1;
9928 char * const e = d + destlen - 3; /* two-character token, ending NUL */
9929 bool is_utf8 = cBOOL(UTF);
9930 I32 orig_copline = 0, tmp_copline = 0;
9932 PERL_ARGS_ASSERT_SCAN_IDENT;
9934 if (isSPACE(*s) || !*s)
9936 if (isDIGIT(*s)) { /* handle $0 and $1 $2 and $10 and etc */
9937 bool is_zero= *s == '0' ? TRUE : FALSE;
9938 char *digit_start= d;
9940 while (s < PL_bufend && isDIGIT(*s)) {
9942 Perl_croak(aTHX_ "%s", ident_too_long);
9945 if (is_zero && d - digit_start > 1)
9946 Perl_croak(aTHX_ ident_var_zero_multi_digit);
9948 else { /* See if it is a "normal" identifier */
9949 parse_ident(&s, &d, e, 1, is_utf8, FALSE, TRUE);
9954 /* Either a digit variable, or parse_ident() found an identifier
9955 (anything valid as a bareword), so job done and return. */
9956 if (PL_lex_state != LEX_NORMAL)
9957 PL_lex_state = LEX_INTERPENDMAYBE;
9961 /* Here, it is not a run-of-the-mill identifier name */
9963 if (*s == '$' && s[1]
9964 && ( isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8)
9965 || isDIGIT_A((U8)s[1])
9968 || memBEGINs(s+1, (STRLEN) (PL_bufend - (s+1)), "::")) )
9970 /* Dereferencing a value in a scalar variable.
9971 The alternatives are different syntaxes for a scalar variable.
9972 Using ' as a leading package separator isn't allowed. :: is. */
9975 /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */
9977 bracket = s - SvPVX(PL_linestr);
9979 orig_copline = CopLINE(PL_curcop);
9980 if (s < PL_bufend && isSPACE(*s)) {
9984 if ((s <= PL_bufend - ((is_utf8)
9987 && VALID_LEN_ONE_IDENT(s, PL_bufend, is_utf8))
9990 const STRLEN skip = UTF8SKIP(s);
9993 for ( i = 0; i < skip; i++ )
9998 /* special case to handle ${10}, ${11} the same way we handle ${1} etc */
10000 bool is_zero= *d == '0' ? TRUE : FALSE;
10001 char *digit_start= d;
10002 while (s < PL_bufend && isDIGIT(*s)) {
10005 Perl_croak(aTHX_ "%s", ident_too_long);
10008 if (is_zero && d - digit_start > 1)
10009 Perl_croak(aTHX_ ident_var_zero_multi_digit);
10014 /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
10015 if (*d == '^' && *s && isCONTROLVAR(*s)) {
10019 /* Warn about ambiguous code after unary operators if {...} notation isn't
10020 used. There's no difference in ambiguity; it's merely a heuristic
10021 about when not to warn. */
10022 else if (ck_uni && bracket == -1)
10024 if (bracket != -1) {
10027 /* If we were processing {...} notation then... */
10028 if (isIDFIRST_lazy_if_safe(d, e, is_utf8)
10029 || (!isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
10032 /* note we have to check for a normal identifier first,
10033 * as it handles utf8 symbols, and only after that has
10034 * been ruled out can we look at the caret words */
10035 if (isIDFIRST_lazy_if_safe(d, e, is_utf8) ) {
10036 /* if it starts as a valid identifier, assume that it is one.
10037 (the later check for } being at the expected point will trap
10038 cases where this doesn't pan out.) */
10039 d += is_utf8 ? UTF8SKIP(d) : 1;
10040 parse_ident(&s, &d, e, 1, is_utf8, TRUE, TRUE);
10043 else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
10045 while (isWORDCHAR(*s) && d < e) {
10049 Perl_croak(aTHX_ "%s", ident_too_long);
10052 tmp_copline = CopLINE(PL_curcop);
10053 if (s < PL_bufend && isSPACE(*s)) {
10056 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
10057 /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation. */
10058 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10059 const char * const brack =
10061 ((*s == '[') ? "[...]" : "{...}");
10062 orig_copline = CopLINE(PL_curcop);
10063 CopLINE_set(PL_curcop, tmp_copline);
10064 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
10065 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10066 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
10067 funny, dest, brack, funny, dest, brack);
10068 CopLINE_set(PL_curcop, orig_copline);
10071 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
10072 PL_lex_allbrackets++;
10077 if ( !tmp_copline )
10078 tmp_copline = CopLINE(PL_curcop);
10079 if ((skip = s < PL_bufend && isSPACE(*s))) {
10080 /* Avoid incrementing line numbers or resetting PL_linestart,
10081 in case we have to back up. */
10082 STRLEN s_off = s - SvPVX(PL_linestr);
10084 s = SvPVX(PL_linestr) + s_off;
10089 /* Expect to find a closing } after consuming any trailing whitespace.
10092 /* Now increment line numbers if applicable. */
10096 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
10097 PL_lex_state = LEX_INTERPEND;
10100 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets) {
10101 if (ckWARN(WARN_AMBIGUOUS)
10102 && (keyword(dest, d - dest, 0)
10103 || get_cvn_flags(dest, d - dest, is_utf8
10107 SV *tmp = newSVpvn_flags( dest, d - dest,
10108 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
10111 orig_copline = CopLINE(PL_curcop);
10112 CopLINE_set(PL_curcop, tmp_copline);
10113 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
10114 "Ambiguous use of %c{%" SVf "} resolved to %c%" SVf,
10115 funny, SVfARG(tmp), funny, SVfARG(tmp));
10116 CopLINE_set(PL_curcop, orig_copline);
10121 /* Didn't find the closing } at the point we expected, so restore
10122 state such that the next thing to process is the opening { and */
10123 s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
10124 CopLINE_set(PL_curcop, orig_copline);
10125 PL_parser->herelines = herelines;
10127 PL_parser->sub_no_recover = TRUE;
10130 else if ( PL_lex_state == LEX_INTERPNORMAL
10131 && !PL_lex_brackets
10132 && !intuit_more(s, PL_bufend))
10133 PL_lex_state = LEX_INTERPEND;
10138 S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset, unsigned int * x_mod_count) {
10140 /* Adds, subtracts to/from 'pmfl' based on the next regex modifier flag
10141 * found in the parse starting at 's', based on the subset that are valid
10142 * in this context input to this routine in 'valid_flags'. Advances s.
10143 * Returns TRUE if the input should be treated as a valid flag, so the next
10144 * char may be as well; otherwise FALSE. 'charset' should point to a NUL
10145 * upon first call on the current regex. This routine will set it to any
10146 * charset modifier found. The caller shouldn't change it. This way,
10147 * another charset modifier encountered in the parse can be detected as an
10148 * error, as we have decided to allow only one */
10150 const char c = **s;
10151 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
10153 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
10154 if (isWORDCHAR_lazy_if_safe( *s, PL_bufend, UTF)) {
10155 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
10156 UTF ? SVf_UTF8 : 0);
10158 /* Pretend that it worked, so will continue processing before
10167 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl, *x_mod_count);
10168 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
10169 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
10170 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
10171 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
10172 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
10173 case LOCALE_PAT_MOD:
10175 goto multiple_charsets;
10177 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
10180 case UNICODE_PAT_MOD:
10182 goto multiple_charsets;
10184 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
10187 case ASCII_RESTRICT_PAT_MOD:
10189 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
10193 /* Error if previous modifier wasn't an 'a', but if it was, see
10194 * if, and accept, a second occurrence (only) */
10195 if (*charset != 'a'
10196 || get_regex_charset(*pmfl)
10197 != REGEX_ASCII_RESTRICTED_CHARSET)
10199 goto multiple_charsets;
10201 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
10205 case DEPENDS_PAT_MOD:
10207 goto multiple_charsets;
10209 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
10218 if (*charset != c) {
10219 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
10221 else if (c == 'a') {
10222 /* diag_listed_as: Regexp modifier "/%c" may appear a maximum of twice */
10223 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
10226 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
10229 /* Pretend that it worked, so will continue processing before dieing */
10235 S_scan_pat(pTHX_ char *start, I32 type)
10239 const char * const valid_flags =
10240 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
10241 char charset = '\0'; /* character set modifier */
10242 unsigned int x_mod_count = 0;
10244 PERL_ARGS_ASSERT_SCAN_PAT;
10246 s = scan_str(start,TRUE,FALSE, (PL_in_eval & EVAL_RE_REPARSING), NULL);
10248 Perl_croak(aTHX_ "Search pattern not terminated");
10250 pm = (PMOP*)newPMOP(type, 0);
10251 if (PL_multi_open == '?') {
10252 /* This is the only point in the code that sets PMf_ONCE: */
10253 pm->op_pmflags |= PMf_ONCE;
10255 /* Hence it's safe to do this bit of PMOP book-keeping here, which
10256 allows us to restrict the list needed by reset to just the ??
10258 assert(type != OP_TRANS);
10260 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
10263 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
10266 elements = mg->mg_len / sizeof(PMOP**);
10267 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
10268 ((PMOP**)mg->mg_ptr) [elements++] = pm;
10269 mg->mg_len = elements * sizeof(PMOP**);
10270 PmopSTASH_set(pm,PL_curstash);
10274 /* if qr/...(?{..}).../, then need to parse the pattern within a new
10275 * anon CV. False positives like qr/[(?{]/ are harmless */
10277 if (type == OP_QR) {
10279 char *e, *p = SvPV(PL_lex_stuff, len);
10281 for (; p < e; p++) {
10282 if (p[0] == '(' && p[1] == '?'
10283 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
10285 pm->op_pmflags |= PMf_HAS_CV;
10289 pm->op_pmflags |= PMf_IS_QR;
10292 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags),
10293 &s, &charset, &x_mod_count))
10295 /* issue a warning if /c is specified,but /g is not */
10296 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
10298 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
10299 "Use of /c modifier is meaningless without /g" );
10302 PL_lex_op = (OP*)pm;
10303 pl_yylval.ival = OP_MATCH;
10308 S_scan_subst(pTHX_ char *start)
10314 line_t linediff = 0;
10316 char charset = '\0'; /* character set modifier */
10317 unsigned int x_mod_count = 0;
10320 PERL_ARGS_ASSERT_SCAN_SUBST;
10322 pl_yylval.ival = OP_NULL;
10324 s = scan_str(start, TRUE, FALSE, FALSE, &t);
10327 Perl_croak(aTHX_ "Substitution pattern not terminated");
10331 first_start = PL_multi_start;
10332 first_line = CopLINE(PL_curcop);
10333 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10335 SvREFCNT_dec_NN(PL_lex_stuff);
10336 PL_lex_stuff = NULL;
10337 Perl_croak(aTHX_ "Substitution replacement not terminated");
10339 PL_multi_start = first_start; /* so whole substitution is taken together */
10341 pm = (PMOP*)newPMOP(OP_SUBST, 0);
10345 if (*s == EXEC_PAT_MOD) {
10349 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags),
10350 &s, &charset, &x_mod_count))
10356 if ((pm->op_pmflags & PMf_CONTINUE)) {
10357 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
10361 SV * const repl = newSVpvs("");
10364 pm->op_pmflags |= PMf_EVAL;
10365 for (; es > 1; es--) {
10366 sv_catpvs(repl, "eval ");
10368 sv_catpvs(repl, "do {");
10369 sv_catsv(repl, PL_parser->lex_sub_repl);
10370 sv_catpvs(repl, "}");
10371 SvREFCNT_dec(PL_parser->lex_sub_repl);
10372 PL_parser->lex_sub_repl = repl;
10376 linediff = CopLINE(PL_curcop) - first_line;
10378 CopLINE_set(PL_curcop, first_line);
10380 if (linediff || es) {
10381 /* the IVX field indicates that the replacement string is a s///e;
10382 * the NVX field indicates how many src code lines the replacement
10384 sv_upgrade(PL_parser->lex_sub_repl, SVt_PVNV);
10385 ((XPVNV*)SvANY(PL_parser->lex_sub_repl))->xnv_u.xnv_lines = linediff;
10386 ((XPVIV*)SvANY(PL_parser->lex_sub_repl))->xiv_u.xivu_eval_seen =
10390 PL_lex_op = (OP*)pm;
10391 pl_yylval.ival = OP_SUBST;
10396 S_scan_trans(pTHX_ char *start)
10403 bool nondestruct = 0;
10406 PERL_ARGS_ASSERT_SCAN_TRANS;
10408 pl_yylval.ival = OP_NULL;
10410 s = scan_str(start,FALSE,FALSE,FALSE,&t);
10412 Perl_croak(aTHX_ "Transliteration pattern not terminated");
10416 s = scan_str(s,FALSE,FALSE,FALSE,NULL);
10418 SvREFCNT_dec_NN(PL_lex_stuff);
10419 PL_lex_stuff = NULL;
10420 Perl_croak(aTHX_ "Transliteration replacement not terminated");
10423 complement = del = squash = 0;
10427 complement = OPpTRANS_COMPLEMENT;
10430 del = OPpTRANS_DELETE;
10433 squash = OPpTRANS_SQUASH;
10445 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
10446 o->op_private &= ~OPpTRANS_ALL;
10447 o->op_private |= del|squash|complement;
10450 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
10457 Takes a pointer to the first < in <<FOO.
10458 Returns a pointer to the byte following <<FOO.
10460 This function scans a heredoc, which involves different methods
10461 depending on whether we are in a string eval, quoted construct, etc.
10462 This is because PL_linestr could containing a single line of input, or
10463 a whole string being evalled, or the contents of the current quote-
10466 The two basic methods are:
10467 - Steal lines from the input stream
10468 - Scan the heredoc in PL_linestr and remove it therefrom
10470 In a file scope or filtered eval, the first method is used; in a
10471 string eval, the second.
10473 In a quote-like operator, we have to choose between the two,
10474 depending on where we can find a newline. We peek into outer lex-
10475 ing scopes until we find one with a newline in it. If we reach the
10476 outermost lexing scope and it is a file, we use the stream method.
10477 Otherwise it is treated as an eval.
10481 S_scan_heredoc(pTHX_ char *s)
10483 I32 op_type = OP_SCALAR;
10491 I32 indent_len = 0;
10492 bool indented = FALSE;
10493 const bool infile = PL_rsfp || PL_parser->filtered;
10494 const line_t origline = CopLINE(PL_curcop);
10495 LEXSHARED *shared = PL_parser->lex_shared;
10497 PERL_ARGS_ASSERT_SCAN_HEREDOC;
10500 d = PL_tokenbuf + 1;
10501 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
10502 *PL_tokenbuf = '\n';
10505 if (*peek == '~') {
10510 while (SPACE_OR_TAB(*peek))
10513 if (*peek == '`' || *peek == '\'' || *peek =='"') {
10516 s = delimcpy(d, e, s, PL_bufend, term, &len);
10517 if (s == PL_bufend)
10518 Perl_croak(aTHX_ "Unterminated delimiter for here document");
10524 /* <<\FOO is equivalent to <<'FOO' */
10529 if (! isWORDCHAR_lazy_if_safe(s, PL_bufend, UTF))
10530 Perl_croak(aTHX_ "Use of bare << to mean <<\"\" is forbidden");
10534 while (isWORDCHAR_lazy_if_safe(peek, PL_bufend, UTF)) {
10535 peek += UTF ? UTF8SKIP(peek) : 1;
10538 len = (peek - s >= e - d) ? (e - d) : (peek - s);
10539 Copy(s, d, len, char);
10544 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
10545 Perl_croak(aTHX_ "Delimiter for here document is too long");
10549 len = d - PL_tokenbuf;
10551 #ifndef PERL_STRICT_CR
10552 d = (char *) memchr(s, '\r', PL_bufend - s);
10554 char * const olds = s;
10556 while (s < PL_bufend) {
10562 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
10571 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10576 tmpstr = newSV_type(SVt_PVIV);
10577 SvGROW(tmpstr, 80);
10578 if (term == '\'') {
10579 op_type = OP_CONST;
10580 SvIV_set(tmpstr, -1);
10582 else if (term == '`') {
10583 op_type = OP_BACKTICK;
10584 SvIV_set(tmpstr, '\\');
10587 PL_multi_start = origline + 1 + PL_parser->herelines;
10588 PL_multi_open = PL_multi_close = '<';
10590 /* inside a string eval or quote-like operator */
10591 if (!infile || PL_lex_inwhat) {
10594 char * const olds = s;
10595 PERL_CONTEXT * const cx = CX_CUR();
10596 /* These two fields are not set until an inner lexing scope is
10597 entered. But we need them set here. */
10598 shared->ls_bufptr = s;
10599 shared->ls_linestr = PL_linestr;
10601 if (PL_lex_inwhat) {
10602 /* Look for a newline. If the current buffer does not have one,
10603 peek into the line buffer of the parent lexing scope, going
10604 up as many levels as necessary to find one with a newline
10607 while (!(s = (char *)memchr(
10608 (void *)shared->ls_bufptr, '\n',
10609 SvEND(shared->ls_linestr)-shared->ls_bufptr
10612 shared = shared->ls_prev;
10613 /* shared is only null if we have gone beyond the outermost
10614 lexing scope. In a file, we will have broken out of the
10615 loop in the previous iteration. In an eval, the string buf-
10616 fer ends with "\n;", so the while condition above will have
10617 evaluated to false. So shared can never be null. Or so you
10618 might think. Odd syntax errors like s;@{<<; can gobble up
10619 the implicit semicolon at the end of a flie, causing the
10620 file handle to be closed even when we are not in a string
10621 eval. So shared may be null in that case.
10622 (Closing '>>}' here to balance the earlier open brace for
10623 editors that look for matched pairs.) */
10624 if (UNLIKELY(!shared))
10626 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10627 most lexing scope. In a file, shared->ls_linestr at that
10628 level is just one line, so there is no body to steal. */
10629 if (infile && !shared->ls_prev) {
10635 else { /* eval or we've already hit EOF */
10636 s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10641 linestr = shared->ls_linestr;
10642 bufend = SvEND(linestr);
10647 while (s < bufend - len + 1) {
10649 ++PL_parser->herelines;
10651 if (memEQ(s, PL_tokenbuf + 1, len - 1)) {
10655 /* Only valid if it's preceded by whitespace only */
10656 while (backup != myolds && --backup >= myolds) {
10657 if (! SPACE_OR_TAB(*backup)) {
10663 /* No whitespace or all! */
10664 if (backup == s || *backup == '\n') {
10665 Newx(indent, indent_len + 1, char);
10666 memcpy(indent, backup + 1, indent_len);
10667 indent[indent_len] = 0;
10668 s--; /* before our delimiter */
10669 PL_parser->herelines--; /* this line doesn't count */
10676 while (s < bufend - len + 1
10677 && memNE(s,PL_tokenbuf,len) )
10680 ++PL_parser->herelines;
10684 if (s >= bufend - len + 1) {
10688 sv_setpvn(tmpstr,d+1,s-d);
10690 /* the preceding stmt passes a newline */
10691 PL_parser->herelines++;
10693 /* s now points to the newline after the heredoc terminator.
10694 d points to the newline before the body of the heredoc.
10697 /* We are going to modify linestr in place here, so set
10698 aside copies of the string if necessary for re-evals or
10700 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
10701 check shared->re_eval_str. */
10702 if (shared->re_eval_start || shared->re_eval_str) {
10703 /* Set aside the rest of the regexp */
10704 if (!shared->re_eval_str)
10705 shared->re_eval_str =
10706 newSVpvn(shared->re_eval_start,
10707 bufend - shared->re_eval_start);
10708 shared->re_eval_start -= s-d;
10711 if (cxstack_ix >= 0
10712 && CxTYPE(cx) == CXt_EVAL
10713 && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
10714 && cx->blk_eval.cur_text == linestr)
10716 cx->blk_eval.cur_text = newSVsv(linestr);
10717 cx->blk_u16 |= 0x40; /* indicate cur_text is ref counted */
10720 /* Copy everything from s onwards back to d. */
10721 Move(s,d,bufend-s + 1,char);
10722 SvCUR_set(linestr, SvCUR(linestr) - (s-d));
10723 /* Setting PL_bufend only applies when we have not dug deeper
10724 into other scopes, because sublex_done sets PL_bufend to
10725 SvEND(PL_linestr). */
10726 if (shared == PL_parser->lex_shared)
10727 PL_bufend = SvEND(linestr);
10732 char *oldbufptr_save;
10733 char *oldoldbufptr_save;
10735 SvPVCLEAR(tmpstr); /* avoid "uninitialized" warning */
10736 term = PL_tokenbuf[1];
10738 linestr_save = PL_linestr; /* must restore this afterwards */
10739 d = s; /* and this */
10740 oldbufptr_save = PL_oldbufptr;
10741 oldoldbufptr_save = PL_oldoldbufptr;
10742 PL_linestr = newSVpvs("");
10743 PL_bufend = SvPVX(PL_linestr);
10746 PL_bufptr = PL_bufend;
10747 CopLINE_set(PL_curcop,
10748 origline + 1 + PL_parser->herelines);
10750 if ( !lex_next_chunk(LEX_NO_TERM)
10751 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n'))
10753 /* Simply freeing linestr_save might seem simpler here, as it
10754 does not matter what PL_linestr points to, since we are
10755 about to croak; but in a quote-like op, linestr_save
10756 will have been prospectively freed already, via
10757 SAVEFREESV(PL_linestr) in sublex_push, so it’s easier to
10758 restore PL_linestr. */
10759 SvREFCNT_dec_NN(PL_linestr);
10760 PL_linestr = linestr_save;
10761 PL_oldbufptr = oldbufptr_save;
10762 PL_oldoldbufptr = oldoldbufptr_save;
10766 CopLINE_set(PL_curcop, origline);
10768 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10769 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10770 /* ^That should be enough to avoid this needing to grow: */
10771 sv_catpvs(PL_linestr, "\n\0");
10772 assert(s == SvPVX(PL_linestr));
10773 PL_bufend = SvEND(PL_linestr);
10777 PL_parser->herelines++;
10778 PL_last_lop = PL_last_uni = NULL;
10780 #ifndef PERL_STRICT_CR
10781 if (PL_bufend - PL_linestart >= 2) {
10782 if ( (PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n')
10783 || (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
10785 PL_bufend[-2] = '\n';
10787 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
10789 else if (PL_bufend[-1] == '\r')
10790 PL_bufend[-1] = '\n';
10792 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10793 PL_bufend[-1] = '\n';
10796 if (indented && (PL_bufend-s) >= len) {
10797 char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len));
10800 char *backup = found;
10803 /* Only valid if it's preceded by whitespace only */
10804 while (backup != s && --backup >= s) {
10805 if (! SPACE_OR_TAB(*backup)) {
10811 /* All whitespace or none! */
10812 if (backup == found || SPACE_OR_TAB(*backup)) {
10813 Newx(indent, indent_len + 1, char);
10814 memcpy(indent, backup, indent_len);
10815 indent[indent_len] = 0;
10816 SvREFCNT_dec(PL_linestr);
10817 PL_linestr = linestr_save;
10818 PL_linestart = SvPVX(linestr_save);
10819 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10820 PL_oldbufptr = oldbufptr_save;
10821 PL_oldoldbufptr = oldoldbufptr_save;
10827 /* Didn't find it */
10828 sv_catsv(tmpstr,PL_linestr);
10831 if (*s == term && PL_bufend-s >= len
10832 && memEQ(s,PL_tokenbuf + 1,len))
10834 SvREFCNT_dec(PL_linestr);
10835 PL_linestr = linestr_save;
10836 PL_linestart = SvPVX(linestr_save);
10837 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
10838 PL_oldbufptr = oldbufptr_save;
10839 PL_oldoldbufptr = oldoldbufptr_save;
10844 sv_catsv(tmpstr,PL_linestr);
10850 PL_multi_end = origline + PL_parser->herelines;
10852 if (indented && indent) {
10853 STRLEN linecount = 1;
10854 STRLEN herelen = SvCUR(tmpstr);
10855 char *ss = SvPVX(tmpstr);
10856 char *se = ss + herelen;
10857 SV *newstr = newSV(herelen+1);
10860 /* Trim leading whitespace */
10862 /* newline only? Copy and move on */
10864 sv_catpvs(newstr,"\n");
10868 /* Found our indentation? Strip it */
10870 else if (se - ss >= indent_len
10871 && memEQ(ss, indent, indent_len))
10876 while ((ss + le) < se && *(ss + le) != '\n')
10879 sv_catpvn(newstr, ss, le);
10882 /* Line doesn't begin with our indentation? Croak */
10887 "Indentation on line %d of here-doc doesn't match delimiter",
10893 /* avoid sv_setsv() as we dont wan't to COW here */
10894 sv_setpvn(tmpstr,SvPVX(newstr),SvCUR(newstr));
10896 SvREFCNT_dec_NN(newstr);
10899 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
10900 SvPV_shrink_to_cur(tmpstr);
10904 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
10908 PL_lex_stuff = tmpstr;
10909 pl_yylval.ival = op_type;
10915 SvREFCNT_dec(tmpstr);
10916 CopLINE_set(PL_curcop, origline);
10917 missingterm(PL_tokenbuf + 1, sizeof(PL_tokenbuf) - 1);
10921 /* scan_inputsymbol
10922 takes: position of first '<' in input buffer
10923 returns: position of first char following the matching '>' in
10925 side-effects: pl_yylval and lex_op are set.
10930 <<>> read from ARGV without magic open
10931 <FH> read from filehandle
10932 <pkg::FH> read from package qualified filehandle
10933 <pkg'FH> read from package qualified filehandle
10934 <$fh> read from filehandle in $fh
10935 <*.h> filename glob
10940 S_scan_inputsymbol(pTHX_ char *start)
10942 char *s = start; /* current position in buffer */
10945 bool nomagicopen = FALSE;
10946 char *d = PL_tokenbuf; /* start of temp holding space */
10947 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
10949 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10951 end = (char *) memchr(s, '\n', PL_bufend - s);
10954 if (s[1] == '<' && s[2] == '>' && s[3] == '>') {
10955 nomagicopen = TRUE;
10961 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
10963 /* die if we didn't have space for the contents of the <>,
10964 or if it didn't end, or if we see a newline
10967 if (len >= (I32)sizeof PL_tokenbuf)
10968 Perl_croak(aTHX_ "Excessively long <> operator");
10970 Perl_croak(aTHX_ "Unterminated <> operator");
10975 Remember, only scalar variables are interpreted as filehandles by
10976 this code. Anything more complex (e.g., <$fh{$num}>) will be
10977 treated as a glob() call.
10978 This code makes use of the fact that except for the $ at the front,
10979 a scalar variable and a filehandle look the same.
10981 if (*d == '$' && d[1]) d++;
10983 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
10984 while (isWORDCHAR_lazy_if_safe(d, e, UTF) || *d == '\'' || *d == ':') {
10985 d += UTF ? UTF8SKIP(d) : 1;
10988 /* If we've tried to read what we allow filehandles to look like, and
10989 there's still text left, then it must be a glob() and not a getline.
10990 Use scan_str to pull out the stuff between the <> and treat it
10991 as nothing more than a string.
10994 if (d - PL_tokenbuf != len) {
10995 pl_yylval.ival = OP_GLOB;
10996 s = scan_str(start,FALSE,FALSE,FALSE,NULL);
10998 Perl_croak(aTHX_ "Glob not terminated");
11002 bool readline_overriden = FALSE;
11004 /* we're in a filehandle read situation */
11007 /* turn <> into <ARGV> */
11009 Copy("ARGV",d,5,char);
11011 /* Check whether readline() is overriden */
11012 if ((gv_readline = gv_override("readline",8)))
11013 readline_overriden = TRUE;
11015 /* if <$fh>, create the ops to turn the variable into a
11019 /* try to find it in the pad for this block, otherwise find
11020 add symbol table ops
11022 const PADOFFSET tmp = pad_findmy_pvn(d, len, 0);
11023 if (tmp != NOT_IN_PAD) {
11024 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
11025 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
11026 HEK * const stashname = HvNAME_HEK(stash);
11027 SV * const sym = sv_2mortal(newSVhek(stashname));
11028 sv_catpvs(sym, "::");
11029 sv_catpv(sym, d+1);
11034 OP * const o = newOP(OP_PADSV, 0);
11036 PL_lex_op = readline_overriden
11037 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11038 op_append_elem(OP_LIST, o,
11039 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
11040 : newUNOP(OP_READLINE, 0, o);
11048 GV_ADDMULTI | ( UTF ? SVf_UTF8 : 0 ),
11050 PL_lex_op = readline_overriden
11051 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11052 op_append_elem(OP_LIST,
11053 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
11054 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11055 : newUNOP(OP_READLINE, 0,
11056 newUNOP(OP_RV2SV, 0,
11057 newGVOP(OP_GV, 0, gv)));
11059 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
11060 pl_yylval.ival = OP_NULL;
11063 /* If it's none of the above, it must be a literal filehandle
11064 (<Foo::BAR> or <FOO>) so build a simple readline OP */
11066 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
11067 PL_lex_op = readline_overriden
11068 ? newUNOP(OP_ENTERSUB, OPf_STACKED,
11069 op_append_elem(OP_LIST,
11070 newGVOP(OP_GV, 0, gv),
11071 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
11072 : newUNOP(OP_READLINE, nomagicopen ? OPf_SPECIAL : 0, newGVOP(OP_GV, 0, gv));
11073 pl_yylval.ival = OP_NULL;
11083 start position in buffer
11084 keep_bracketed_quoted preserve \ quoting of embedded delimiters, but
11085 only if they are of the open/close form
11086 keep_delims preserve the delimiters around the string
11087 re_reparse compiling a run-time /(?{})/:
11088 collapse // to /, and skip encoding src
11089 delimp if non-null, this is set to the position of
11090 the closing delimiter, or just after it if
11091 the closing and opening delimiters differ
11092 (i.e., the opening delimiter of a substitu-
11094 returns: position to continue reading from buffer
11095 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
11096 updates the read buffer.
11098 This subroutine pulls a string out of the input. It is called for:
11099 q single quotes q(literal text)
11100 ' single quotes 'literal text'
11101 qq double quotes qq(interpolate $here please)
11102 " double quotes "interpolate $here please"
11103 qx backticks qx(/bin/ls -l)
11104 ` backticks `/bin/ls -l`
11105 qw quote words @EXPORT_OK = qw( func() $spam )
11106 m// regexp match m/this/
11107 s/// regexp substitute s/this/that/
11108 tr/// string transliterate tr/this/that/
11109 y/// string transliterate y/this/that/
11110 ($*@) sub prototypes sub foo ($)
11111 (stuff) sub attr parameters sub foo : attr(stuff)
11112 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
11114 In most of these cases (all but <>, patterns and transliterate)
11115 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
11116 calls scan_str(). s/// makes yylex() call scan_subst() which calls
11117 scan_str(). tr/// and y/// make yylex() call scan_trans() which
11120 It skips whitespace before the string starts, and treats the first
11121 character as the delimiter. If the delimiter is one of ([{< then
11122 the corresponding "close" character )]}> is used as the closing
11123 delimiter. It allows quoting of delimiters, and if the string has
11124 balanced delimiters ([{<>}]) it allows nesting.
11126 On success, the SV with the resulting string is put into lex_stuff or,
11127 if that is already non-NULL, into lex_repl. The second case occurs only
11128 when parsing the RHS of the special constructs s/// and tr/// (y///).
11129 For convenience, the terminating delimiter character is stuffed into
11134 Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int re_reparse,
11138 SV *sv; /* scalar value: string */
11139 const char *tmps; /* temp string, used for delimiter matching */
11140 char *s = start; /* current position in the buffer */
11141 char term; /* terminating character */
11142 char *to; /* current position in the sv's data */
11143 I32 brackets = 1; /* bracket nesting level */
11144 bool d_is_utf8 = FALSE; /* is there any utf8 content? */
11145 IV termcode; /* terminating char. code */
11146 U8 termstr[UTF8_MAXBYTES+1]; /* terminating string */
11147 STRLEN termlen; /* length of terminating string */
11150 /* The delimiters that have a mirror-image closing one */
11151 const char * opening_delims = "([{<";
11152 const char * closing_delims = ")]}>";
11154 /* The only non-UTF character that isn't a stand alone grapheme is
11155 * white-space, hence can't be a delimiter. */
11156 const char * non_grapheme_msg = "Use of unassigned code point or"
11157 " non-standalone grapheme for a delimiter"
11159 PERL_ARGS_ASSERT_SCAN_STR;
11161 /* skip space before the delimiter */
11166 /* mark where we are, in case we need to report errors */
11169 /* after skipping whitespace, the next character is the terminator */
11171 if (!UTF || UTF8_IS_INVARIANT(term)) {
11172 termcode = termstr[0] = term;
11176 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
11177 if (UTF && UNLIKELY(! is_grapheme((U8 *) start,
11182 yyerror(non_grapheme_msg);
11185 Copy(s, termstr, termlen, U8);
11188 /* mark where we are */
11189 PL_multi_start = CopLINE(PL_curcop);
11190 PL_multi_open = termcode;
11191 herelines = PL_parser->herelines;
11193 /* If the delimiter has a mirror-image closing one, get it */
11194 if (term && (tmps = strchr(opening_delims, term))) {
11195 termcode = termstr[0] = term = closing_delims[tmps - opening_delims];
11198 PL_multi_close = termcode;
11200 if (PL_multi_open == PL_multi_close) {
11201 keep_bracketed_quoted = FALSE;
11204 /* create a new SV to hold the contents. 79 is the SV's initial length.
11205 What a random number. */
11206 sv = newSV_type(SVt_PVIV);
11208 SvIV_set(sv, termcode);
11209 (void)SvPOK_only(sv); /* validate pointer */
11211 /* move past delimiter and try to read a complete string */
11213 sv_catpvn(sv, s, termlen);
11216 /* extend sv if need be */
11217 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
11218 /* set 'to' to the next character in the sv's string */
11219 to = SvPVX(sv)+SvCUR(sv);
11221 /* if open delimiter is the close delimiter read unbridle */
11222 if (PL_multi_open == PL_multi_close) {
11223 for (; s < PL_bufend; s++,to++) {
11224 /* embedded newlines increment the current line number */
11225 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
11226 COPLINE_INC_WITH_HERELINES;
11227 /* handle quoted delimiters */
11228 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
11229 if (!keep_bracketed_quoted
11231 || (re_reparse && s[1] == '\\'))
11234 else /* any other quotes are simply copied straight through */
11237 /* terminate when run out of buffer (the for() condition), or
11238 have found the terminator */
11239 else if (*s == term) { /* First byte of terminator matches */
11240 if (termlen == 1) /* If is the only byte, are done */
11243 /* If the remainder of the terminator matches, also are
11244 * done, after checking that is a separate grapheme */
11245 if ( s + termlen <= PL_bufend
11246 && memEQ(s + 1, (char*)termstr + 1, termlen - 1))
11249 && UNLIKELY(! is_grapheme((U8 *) start,
11254 yyerror(non_grapheme_msg);
11259 else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF) {
11267 /* if the terminator isn't the same as the start character (e.g.,
11268 matched brackets), we have to allow more in the quoting, and
11269 be prepared for nested brackets.
11272 /* read until we run out of string, or we find the terminator */
11273 for (; s < PL_bufend; s++,to++) {
11274 /* embedded newlines increment the line count */
11275 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
11276 COPLINE_INC_WITH_HERELINES;
11277 /* backslashes can escape the open or closing characters */
11278 if (*s == '\\' && s+1 < PL_bufend) {
11279 if (!keep_bracketed_quoted
11280 && ( ((UV)s[1] == PL_multi_open)
11281 || ((UV)s[1] == PL_multi_close) ))
11288 /* allow nested opens and closes */
11289 else if ((UV)*s == PL_multi_close && --brackets <= 0)
11291 else if ((UV)*s == PL_multi_open)
11293 else if (!d_is_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
11298 /* terminate the copied string and update the sv's end-of-string */
11300 SvCUR_set(sv, to - SvPVX_const(sv));
11303 * this next chunk reads more into the buffer if we're not done yet
11307 break; /* handle case where we are done yet :-) */
11309 #ifndef PERL_STRICT_CR
11310 if (to - SvPVX_const(sv) >= 2) {
11311 if ( (to[-2] == '\r' && to[-1] == '\n')
11312 || (to[-2] == '\n' && to[-1] == '\r'))
11316 SvCUR_set(sv, to - SvPVX_const(sv));
11318 else if (to[-1] == '\r')
11321 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
11325 /* if we're out of file, or a read fails, bail and reset the current
11326 line marker so we can report where the unterminated string began
11328 COPLINE_INC_WITH_HERELINES;
11329 PL_bufptr = PL_bufend;
11330 if (!lex_next_chunk(0)) {
11332 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
11335 s = start = PL_bufptr;
11338 /* at this point, we have successfully read the delimited string */
11341 sv_catpvn(sv, s, termlen);
11347 PL_multi_end = CopLINE(PL_curcop);
11348 CopLINE_set(PL_curcop, PL_multi_start);
11349 PL_parser->herelines = herelines;
11351 /* if we allocated too much space, give some back */
11352 if (SvCUR(sv) + 5 < SvLEN(sv)) {
11353 SvLEN_set(sv, SvCUR(sv) + 1);
11354 SvPV_renew(sv, SvLEN(sv));
11357 /* decide whether this is the first or second quoted string we've read
11362 PL_parser->lex_sub_repl = sv;
11365 if (delimp) *delimp = PL_multi_open == PL_multi_close ? s-termlen : s;
11371 takes: pointer to position in buffer
11372 returns: pointer to new position in buffer
11373 side-effects: builds ops for the constant in pl_yylval.op
11375 Read a number in any of the formats that Perl accepts:
11377 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
11378 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
11379 0b[01](_?[01])* binary integers
11380 0[0-7](_?[0-7])* octal integers
11381 0x[0-9A-Fa-f](_?[0-9A-Fa-f])* hexadecimal integers
11382 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*(?:\.\d*)?p[+-]?[0-9]+ hexadecimal floats
11384 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
11387 If it reads a number without a decimal point or an exponent, it will
11388 try converting the number to an integer and see if it can do so
11389 without loss of precision.
11393 Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
11395 const char *s = start; /* current position in buffer */
11396 char *d; /* destination in temp buffer */
11397 char *e; /* end of temp buffer */
11398 NV nv; /* number read, as a double */
11399 SV *sv = NULL; /* place to put the converted number */
11400 bool floatit; /* boolean: int or float? */
11401 const char *lastub = NULL; /* position of last underbar */
11402 static const char* const number_too_long = "Number too long";
11403 bool warned_about_underscore = 0;
11404 I32 shift; /* shift per digit for hex/oct/bin, hoisted here for fp */
11405 #define WARN_ABOUT_UNDERSCORE() \
11407 if (!warned_about_underscore) { \
11408 warned_about_underscore = 1; \
11409 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), \
11410 "Misplaced _ in number"); \
11413 /* Hexadecimal floating point.
11415 * In many places (where we have quads and NV is IEEE 754 double)
11416 * we can fit the mantissa bits of a NV into an unsigned quad.
11417 * (Note that UVs might not be quads even when we have quads.)
11418 * This will not work everywhere, though (either no quads, or
11419 * using long doubles), in which case we have to resort to NV,
11420 * which will probably mean horrible loss of precision due to
11421 * multiple fp operations. */
11422 bool hexfp = FALSE;
11423 int total_bits = 0;
11424 int significant_bits = 0;
11425 #if NVSIZE == 8 && defined(HAS_QUAD) && defined(Uquad_t)
11426 # define HEXFP_UQUAD
11427 Uquad_t hexfp_uquad = 0;
11428 int hexfp_frac_bits = 0;
11433 NV hexfp_mult = 1.0;
11434 UV high_non_zero = 0; /* highest digit */
11435 int non_zero_integer_digits = 0;
11437 PERL_ARGS_ASSERT_SCAN_NUM;
11439 /* We use the first character to decide what type of number this is */
11443 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
11445 /* if it starts with a 0, it could be an octal number, a decimal in
11446 0.13 disguise, or a hexadecimal number, or a binary number. */
11450 u holds the "number so far"
11451 overflowed was the number more than we can hold?
11453 Shift is used when we add a digit. It also serves as an "are
11454 we in octal/hex/binary?" indicator to disallow hex characters
11455 when in octal mode.
11459 bool overflowed = FALSE;
11460 bool just_zero = TRUE; /* just plain 0 or binary number? */
11461 bool has_digs = FALSE;
11462 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
11463 static const char* const bases[5] =
11464 { "", "binary", "", "octal", "hexadecimal" };
11465 static const char* const Bases[5] =
11466 { "", "Binary", "", "Octal", "Hexadecimal" };
11467 static const char* const maxima[5] =
11469 "0b11111111111111111111111111111111",
11473 const char *base, *Base, *max;
11475 /* check for hex */
11476 if (isALPHA_FOLD_EQ(s[1], 'x')) {
11480 } else if (isALPHA_FOLD_EQ(s[1], 'b')) {
11485 /* check for a decimal in disguise */
11486 else if (s[1] == '.' || isALPHA_FOLD_EQ(s[1], 'e'))
11488 /* so it must be octal */
11495 WARN_ABOUT_UNDERSCORE();
11499 base = bases[shift];
11500 Base = Bases[shift];
11501 max = maxima[shift];
11503 /* read the rest of the number */
11505 /* x is used in the overflow test,
11506 b is the digit we're adding on. */
11511 /* if we don't mention it, we're done */
11515 /* _ are ignored -- but warned about if consecutive */
11517 if (lastub && s == lastub + 1)
11518 WARN_ABOUT_UNDERSCORE();
11522 /* 8 and 9 are not octal */
11523 case '8': case '9':
11525 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
11529 case '2': case '3': case '4':
11530 case '5': case '6': case '7':
11532 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
11535 case '0': case '1':
11536 b = *s++ & 15; /* ASCII digit -> value of digit */
11540 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
11541 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
11542 /* make sure they said 0x */
11545 b = (*s++ & 7) + 9;
11547 /* Prepare to put the digit we have onto the end
11548 of the number so far. We check for overflows.
11555 assert(shift >= 0);
11556 x = u << shift; /* make room for the digit */
11558 total_bits += shift;
11560 if ((x >> shift) != u
11561 && !(PL_hints & HINT_NEW_BINARY)) {
11564 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11565 "Integer overflow in %s number",
11568 u = x | b; /* add the digit to the end */
11571 n *= nvshift[shift];
11572 /* If an NV has not enough bits in its
11573 * mantissa to represent an UV this summing of
11574 * small low-order numbers is a waste of time
11575 * (because the NV cannot preserve the
11576 * low-order bits anyway): we could just
11577 * remember when did we overflow and in the
11578 * end just multiply n by the right
11583 if (high_non_zero == 0 && b > 0)
11587 non_zero_integer_digits++;
11589 /* this could be hexfp, but peek ahead
11590 * to avoid matching ".." */
11591 if (UNLIKELY(HEXFP_PEEK(s))) {
11599 /* if we get here, we had success: make a scalar value from
11604 /* final misplaced underbar check */
11606 WARN_ABOUT_UNDERSCORE();
11608 if (UNLIKELY(HEXFP_PEEK(s))) {
11609 /* Do sloppy (on the underbars) but quick detection
11610 * (and value construction) for hexfp, the decimal
11611 * detection will shortly be more thorough with the
11612 * underbar checks. */
11614 significant_bits = non_zero_integer_digits * shift;
11617 #else /* HEXFP_NV */
11620 /* Ignore the leading zero bits of
11621 * the high (first) non-zero digit. */
11622 if (high_non_zero) {
11623 if (high_non_zero < 0x8)
11624 significant_bits--;
11625 if (high_non_zero < 0x4)
11626 significant_bits--;
11627 if (high_non_zero < 0x2)
11628 significant_bits--;
11635 bool accumulate = TRUE;
11637 int lim = 1 << shift;
11638 for (h++; ((isXDIGIT(*h) && (b = XDIGIT_VALUE(*h)) < lim) ||
11640 if (isXDIGIT(*h)) {
11641 significant_bits += shift;
11644 if (significant_bits < NV_MANT_DIG) {
11645 /* We are in the long "run" of xdigits,
11646 * accumulate the full four bits. */
11647 assert(shift >= 0);
11648 hexfp_uquad <<= shift;
11650 hexfp_frac_bits += shift;
11651 } else if (significant_bits - shift < NV_MANT_DIG) {
11652 /* We are at a hexdigit either at,
11653 * or straddling, the edge of mantissa.
11654 * We will try grabbing as many as
11655 * possible bits. */
11657 significant_bits - NV_MANT_DIG;
11661 hexfp_uquad <<= tail;
11662 assert((shift - tail) >= 0);
11663 hexfp_uquad |= b >> (shift - tail);
11664 hexfp_frac_bits += tail;
11666 /* Ignore the trailing zero bits
11667 * of the last non-zero xdigit.
11669 * The assumption here is that if
11670 * one has input of e.g. the xdigit
11671 * eight (0x8), there is only one
11672 * bit being input, not the full
11673 * four bits. Conversely, if one
11674 * specifies a zero xdigit, the
11675 * assumption is that one really
11676 * wants all those bits to be zero. */
11678 if ((b & 0x1) == 0x0) {
11679 significant_bits--;
11680 if ((b & 0x2) == 0x0) {
11681 significant_bits--;
11682 if ((b & 0x4) == 0x0) {
11683 significant_bits--;
11689 accumulate = FALSE;
11692 /* Keep skipping the xdigits, and
11693 * accumulating the significant bits,
11694 * but do not shift the uquad
11695 * (which would catastrophically drop
11696 * high-order bits) or accumulate the
11697 * xdigits anymore. */
11699 #else /* HEXFP_NV */
11701 nv_mult /= nvshift[shift];
11703 hexfp_nv += b * nv_mult;
11705 accumulate = FALSE;
11709 if (significant_bits >= NV_MANT_DIG)
11710 accumulate = FALSE;
11714 if ((total_bits > 0 || significant_bits > 0) &&
11715 isALPHA_FOLD_EQ(*h, 'p')) {
11716 bool negexp = FALSE;
11720 else if (*h == '-') {
11726 while (isDIGIT(*h) || *h == '_') {
11729 hexfp_exp += *h - '0';
11732 && -hexfp_exp < NV_MIN_EXP - 1) {
11733 /* NOTE: this means that the exponent
11734 * underflow warning happens for
11735 * the IEEE 754 subnormals (denormals),
11736 * because DBL_MIN_EXP etc are the lowest
11737 * possible binary (or, rather, DBL_RADIX-base)
11738 * exponent for normals, not subnormals.
11740 * This may or may not be a good thing. */
11741 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11742 "Hexadecimal float: exponent underflow");
11748 && hexfp_exp > NV_MAX_EXP - 1) {
11749 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
11750 "Hexadecimal float: exponent overflow");
11758 hexfp_exp = -hexfp_exp;
11760 hexfp_exp -= hexfp_frac_bits;
11762 hexfp_mult = Perl_pow(2.0, hexfp_exp);
11769 if (shift != 3 && !has_digs) {
11770 /* 0x or 0b with no digits, treat it as an error.
11771 Originally this backed up the parse before the b or
11772 x, but that has the potential for silent changes in
11773 behaviour, like for: "0x.3" and "0x+$foo".
11776 char *oldbp = PL_bufptr;
11777 if (*d) ++d; /* so the user sees the bad non-digit */
11778 PL_bufptr = (char *)d; /* so yyerror reports the context */
11779 yyerror(Perl_form(aTHX_ "No digits found for %s literal",
11780 shift == 4 ? "hexadecimal" : "binary"));
11785 if (n > 4294967295.0)
11786 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11787 "%s number > %s non-portable",
11793 if (u > 0xffffffff)
11794 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11795 "%s number > %s non-portable",
11800 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
11801 sv = new_constant(start, s - start, "integer",
11802 sv, NULL, NULL, 0, NULL);
11803 else if (PL_hints & HINT_NEW_BINARY)
11804 sv = new_constant(start, s - start, "binary",
11805 sv, NULL, NULL, 0, NULL);
11810 handle decimal numbers.
11811 we're also sent here when we read a 0 as the first digit
11813 case '1': case '2': case '3': case '4': case '5':
11814 case '6': case '7': case '8': case '9': case '.':
11817 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
11835 NOT_REACHED; /* NOTREACHED */
11839 /* read next group of digits and _ and copy into d */
11842 || UNLIKELY(hexfp && isXDIGIT(*s)))
11844 /* skip underscores, checking for misplaced ones
11848 if (lastub && s == lastub + 1)
11849 WARN_ABOUT_UNDERSCORE();
11853 /* check for end of fixed-length buffer */
11855 Perl_croak(aTHX_ "%s", number_too_long);
11856 /* if we're ok, copy the character */
11861 /* final misplaced underbar check */
11862 if (lastub && s == lastub + 1)
11863 WARN_ABOUT_UNDERSCORE();
11865 /* read a decimal portion if there is one. avoid
11866 3..5 being interpreted as the number 3. followed
11869 if (*s == '.' && s[1] != '.') {
11874 WARN_ABOUT_UNDERSCORE();
11878 /* copy, ignoring underbars, until we run out of digits.
11882 || UNLIKELY(hexfp && isXDIGIT(*s));
11885 /* fixed length buffer check */
11887 Perl_croak(aTHX_ "%s", number_too_long);
11889 if (lastub && s == lastub + 1)
11890 WARN_ABOUT_UNDERSCORE();
11896 /* fractional part ending in underbar? */
11898 WARN_ABOUT_UNDERSCORE();
11899 if (*s == '.' && isDIGIT(s[1])) {
11900 /* oops, it's really a v-string, but without the "v" */
11906 /* read exponent part, if present */
11907 if ((isALPHA_FOLD_EQ(*s, 'e')
11908 || UNLIKELY(hexfp && isALPHA_FOLD_EQ(*s, 'p')))
11909 && memCHRs("+-0123456789_", s[1]))
11911 int exp_digits = 0;
11912 const char *save_s = s;
11915 /* regardless of whether user said 3E5 or 3e5, use lower 'e',
11916 ditto for p (hexfloats) */
11917 if ((isALPHA_FOLD_EQ(*s, 'e'))) {
11918 /* At least some Mach atof()s don't grok 'E' */
11921 else if (UNLIKELY(hexfp && (isALPHA_FOLD_EQ(*s, 'p')))) {
11928 /* stray preinitial _ */
11930 WARN_ABOUT_UNDERSCORE();
11934 /* allow positive or negative exponent */
11935 if (*s == '+' || *s == '-')
11938 /* stray initial _ */
11940 WARN_ABOUT_UNDERSCORE();
11944 /* read digits of exponent */
11945 while (isDIGIT(*s) || *s == '_') {
11949 Perl_croak(aTHX_ "%s", number_too_long);
11953 if (((lastub && s == lastub + 1)
11954 || (!isDIGIT(s[1]) && s[1] != '_')))
11955 WARN_ABOUT_UNDERSCORE();
11961 /* no exponent digits, the [eEpP] could be for something else,
11962 * though in practice we don't get here for p since that's preparsed
11963 * earlier, and results in only the 0xX being consumed, so behave similarly
11964 * for decimal floats and consume only the D.DD, leaving the [eE] to the
11977 We try to do an integer conversion first if no characters
11978 indicating "float" have been found.
11983 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
11985 if (flags == IS_NUMBER_IN_UV) {
11987 sv = newSViv(uv); /* Prefer IVs over UVs. */
11990 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11991 if (uv <= (UV) IV_MIN)
11992 sv = newSViv(-(IV)uv);
11999 /* terminate the string */
12001 if (UNLIKELY(hexfp)) {
12002 # ifdef NV_MANT_DIG
12003 if (significant_bits > NV_MANT_DIG)
12004 Perl_ck_warner(aTHX_ packWARN(WARN_OVERFLOW),
12005 "Hexadecimal float: mantissa overflow");
12008 nv = hexfp_uquad * hexfp_mult;
12009 #else /* HEXFP_NV */
12010 nv = hexfp_nv * hexfp_mult;
12013 nv = Atof(PL_tokenbuf);
12019 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12020 const char *const key = floatit ? "float" : "integer";
12021 const STRLEN keylen = floatit ? 5 : 7;
12022 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12023 key, keylen, sv, NULL, NULL, 0, NULL);
12027 /* if it starts with a v, it could be a v-string */
12030 sv = newSV(5); /* preallocate storage space */
12031 ENTER_with_name("scan_vstring");
12033 s = scan_vstring(s, PL_bufend, sv);
12034 SvREFCNT_inc_simple_void_NN(sv);
12035 LEAVE_with_name("scan_vstring");
12039 /* make the op for the constant and return */
12042 lvalp->opval = newSVOP(OP_CONST, 0, sv);
12044 lvalp->opval = NULL;
12050 S_scan_formline(pTHX_ char *s)
12052 SV * const stuff = newSVpvs("");
12053 bool needargs = FALSE;
12054 bool eofmt = FALSE;
12056 PERL_ARGS_ASSERT_SCAN_FORMLINE;
12058 while (!needargs) {
12062 #ifdef PERL_STRICT_CR
12063 while (SPACE_OR_TAB(*t))
12066 while (SPACE_OR_TAB(*t) || *t == '\r')
12069 if (*t == '\n' || t == PL_bufend) {
12074 eol = (char *) memchr(s,'\n',PL_bufend-s);
12079 for (t = s; t < eol; t++) {
12080 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
12082 goto enough; /* ~~ must be first line in formline */
12084 if (*t == '@' || *t == '^')
12088 sv_catpvn(stuff, s, eol-s);
12089 #ifndef PERL_STRICT_CR
12090 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
12091 char *end = SvPVX(stuff) + SvCUR(stuff);
12094 SvCUR_set(stuff, SvCUR(stuff) - 1);
12102 if ((PL_rsfp || PL_parser->filtered)
12103 && PL_parser->form_lex_state == LEX_NORMAL) {
12105 PL_bufptr = PL_bufend;
12106 COPLINE_INC_WITH_HERELINES;
12107 got_some = lex_next_chunk(0);
12108 CopLINE_dec(PL_curcop);
12113 incline(s, PL_bufend);
12116 if (!SvCUR(stuff) || needargs)
12117 PL_lex_state = PL_parser->form_lex_state;
12118 if (SvCUR(stuff)) {
12119 PL_expect = XSTATE;
12121 const char *s2 = s;
12122 while (isSPACE(*s2) && *s2 != '\n')
12125 PL_expect = XTERMBLOCK;
12126 NEXTVAL_NEXTTOKE.ival = 0;
12129 NEXTVAL_NEXTTOKE.ival = 0;
12130 force_next(FORMLBRACK);
12133 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
12136 NEXTVAL_NEXTTOKE.opval = newSVOP(OP_CONST, 0, stuff);
12140 SvREFCNT_dec(stuff);
12142 PL_lex_formbrack = 0;
12148 Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
12150 const I32 oldsavestack_ix = PL_savestack_ix;
12151 CV* const outsidecv = PL_compcv;
12153 SAVEI32(PL_subline);
12154 save_item(PL_subname);
12155 SAVESPTR(PL_compcv);
12157 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
12158 CvFLAGS(PL_compcv) |= flags;
12160 PL_subline = CopLINE(PL_curcop);
12161 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
12162 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
12163 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
12164 if (outsidecv && CvPADLIST(outsidecv))
12165 CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
12167 return oldsavestack_ix;
12171 /* Do extra initialisation of a CV (typically one just created by
12172 * start_subparse()) if that CV is for a named sub
12176 Perl_init_named_cv(pTHX_ CV *cv, OP *nameop)
12178 PERL_ARGS_ASSERT_INIT_NAMED_CV;
12180 if (nameop->op_type == OP_CONST) {
12181 const char *const name = SvPV_nolen_const(((SVOP*)nameop)->op_sv);
12182 if ( strEQ(name, "BEGIN")
12183 || strEQ(name, "END")
12184 || strEQ(name, "INIT")
12185 || strEQ(name, "CHECK")
12186 || strEQ(name, "UNITCHECK")
12191 /* State subs inside anonymous subs need to be
12192 clonable themselves. */
12193 if ( CvANON(CvOUTSIDE(cv))
12194 || CvCLONE(CvOUTSIDE(cv))
12195 || !PadnameIsSTATE(PadlistNAMESARRAY(CvPADLIST(
12197 ))[nameop->op_targ])
12204 S_yywarn(pTHX_ const char *const s, U32 flags)
12206 PERL_ARGS_ASSERT_YYWARN;
12208 PL_in_eval |= EVAL_WARNONLY;
12209 yyerror_pv(s, flags);
12214 Perl_abort_execution(pTHX_ const char * const msg, const char * const name)
12216 PERL_ARGS_ASSERT_ABORT_EXECUTION;
12219 Perl_croak(aTHX_ "%s%s had compilation errors.\n", msg, name);
12222 "%sExecution of %s aborted due to compilation errors.\n", msg, name);
12224 NOT_REACHED; /* NOTREACHED */
12230 /* Called, after at least one error has been found, to abort the parse now,
12231 * instead of trying to forge ahead */
12233 yyerror_pvn(NULL, 0, 0);
12237 Perl_yyerror(pTHX_ const char *const s)
12239 PERL_ARGS_ASSERT_YYERROR;
12240 return yyerror_pvn(s, strlen(s), 0);
12244 Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
12246 PERL_ARGS_ASSERT_YYERROR_PV;
12247 return yyerror_pvn(s, strlen(s), flags);
12251 Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
12253 const char *context = NULL;
12256 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
12257 int yychar = PL_parser->yychar;
12259 /* Output error message 's' with length 'len'. 'flags' are SV flags that
12260 * apply. If the number of errors found is large enough, it abandons
12261 * parsing. If 's' is NULL, there is no message, and it abandons
12262 * processing unconditionally */
12265 if (!yychar || (yychar == ';' && !PL_rsfp))
12266 sv_catpvs(where_sv, "at EOF");
12267 else if ( PL_oldoldbufptr
12268 && PL_bufptr > PL_oldoldbufptr
12269 && PL_bufptr - PL_oldoldbufptr < 200
12270 && PL_oldoldbufptr != PL_oldbufptr
12271 && PL_oldbufptr != PL_bufptr)
12275 The code below is removed for NetWare because it
12276 abends/crashes on NetWare when the script has error such as
12277 not having the closing quotes like:
12278 if ($var eq "value)
12279 Checking of white spaces is anyway done in NetWare code.
12282 while (isSPACE(*PL_oldoldbufptr))
12285 context = PL_oldoldbufptr;
12286 contlen = PL_bufptr - PL_oldoldbufptr;
12288 else if ( PL_oldbufptr
12289 && PL_bufptr > PL_oldbufptr
12290 && PL_bufptr - PL_oldbufptr < 200
12291 && PL_oldbufptr != PL_bufptr) {
12294 The code below is removed for NetWare because it
12295 abends/crashes on NetWare when the script has error such as
12296 not having the closing quotes like:
12297 if ($var eq "value)
12298 Checking of white spaces is anyway done in NetWare code.
12301 while (isSPACE(*PL_oldbufptr))
12304 context = PL_oldbufptr;
12305 contlen = PL_bufptr - PL_oldbufptr;
12307 else if (yychar > 255)
12308 sv_catpvs(where_sv, "next token ???");
12309 else if (yychar == YYEMPTY) {
12310 if (PL_lex_state == LEX_NORMAL)
12311 sv_catpvs(where_sv, "at end of line");
12312 else if (PL_lex_inpat)
12313 sv_catpvs(where_sv, "within pattern");
12315 sv_catpvs(where_sv, "within string");
12318 sv_catpvs(where_sv, "next char ");
12320 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
12321 else if (isPRINT_LC(yychar)) {
12322 const char string = yychar;
12323 sv_catpvn(where_sv, &string, 1);
12326 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
12328 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
12329 Perl_sv_catpvf(aTHX_ msg, " at %s line %" IVdf ", ",
12330 OutCopFILE(PL_curcop),
12331 (IV)(PL_parser->preambling == NOLINE
12332 ? CopLINE(PL_curcop)
12333 : PL_parser->preambling));
12335 Perl_sv_catpvf(aTHX_ msg, "near \"%" UTF8f "\"\n",
12336 UTF8fARG(UTF, contlen, context));
12338 Perl_sv_catpvf(aTHX_ msg, "%" SVf "\n", SVfARG(where_sv));
12339 if ( PL_multi_start < PL_multi_end
12340 && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1)
12342 Perl_sv_catpvf(aTHX_ msg,
12343 " (Might be a runaway multi-line %c%c string starting on"
12344 " line %" IVdf ")\n",
12345 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
12348 if (PL_in_eval & EVAL_WARNONLY) {
12349 PL_in_eval &= ~EVAL_WARNONLY;
12350 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%" SVf, SVfARG(msg));
12356 if (s == NULL || PL_error_count >= 10) {
12357 const char * msg = "";
12358 const char * const name = OutCopFILE(PL_curcop);
12361 SV * errsv = ERRSV;
12362 if (SvCUR(errsv)) {
12363 msg = Perl_form(aTHX_ "%" SVf, SVfARG(errsv));
12368 abort_execution(msg, name);
12371 Perl_croak(aTHX_ "%s%s has too many errors.\n", msg, name);
12375 PL_in_my_stash = NULL;
12380 S_swallow_bom(pTHX_ U8 *s)
12382 const STRLEN slen = SvCUR(PL_linestr);
12384 PERL_ARGS_ASSERT_SWALLOW_BOM;
12388 if (s[1] == 0xFE) {
12389 /* UTF-16 little-endian? (or UTF-32LE?) */
12390 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
12391 /* diag_listed_as: Unsupported script encoding %s */
12392 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
12393 #ifndef PERL_NO_UTF16_FILTER
12395 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
12398 if (PL_bufend > (char*)s) {
12399 s = add_utf16_textfilter(s, TRUE);
12402 /* diag_listed_as: Unsupported script encoding %s */
12403 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
12408 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
12409 #ifndef PERL_NO_UTF16_FILTER
12411 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
12414 if (PL_bufend > (char *)s) {
12415 s = add_utf16_textfilter(s, FALSE);
12418 /* diag_listed_as: Unsupported script encoding %s */
12419 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
12423 case BOM_UTF8_FIRST_BYTE: {
12424 if (memBEGINs(s+1, slen - 1, BOM_UTF8_TAIL)) {
12426 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
12428 s += sizeof(BOM_UTF8) - 1; /* UTF-8 */
12435 if (s[2] == 0xFE && s[3] == 0xFF) {
12436 /* UTF-32 big-endian */
12437 /* diag_listed_as: Unsupported script encoding %s */
12438 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
12441 else if (s[2] == 0 && s[3] != 0) {
12444 * are a good indicator of UTF-16BE. */
12445 #ifndef PERL_NO_UTF16_FILTER
12447 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
12449 s = add_utf16_textfilter(s, FALSE);
12451 /* diag_listed_as: Unsupported script encoding %s */
12452 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
12459 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
12462 * are a good indicator of UTF-16LE. */
12463 #ifndef PERL_NO_UTF16_FILTER
12465 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
12467 s = add_utf16_textfilter(s, TRUE);
12469 /* diag_listed_as: Unsupported script encoding %s */
12470 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
12478 #ifndef PERL_NO_UTF16_FILTER
12480 S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
12482 SV *const filter = FILTER_DATA(idx);
12483 /* We re-use this each time round, throwing the contents away before we
12485 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
12486 SV *const utf8_buffer = filter;
12487 IV status = IoPAGE(filter);
12488 const bool reverse = cBOOL(IoLINES(filter));
12491 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
12493 /* As we're automatically added, at the lowest level, and hence only called
12494 from this file, we can be sure that we're not called in block mode. Hence
12495 don't bother writing code to deal with block mode. */
12497 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
12500 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%" IVdf ")", status);
12502 DEBUG_P(PerlIO_printf(Perl_debug_log,
12503 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
12504 FPTR2DPTR(void *, S_utf16_textfilter),
12505 reverse ? 'l' : 'b', idx, maxlen, status,
12506 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12513 /* First, look in our buffer of existing UTF-8 data: */
12514 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
12518 } else if (status == 0) {
12520 IoPAGE(filter) = 0;
12521 nl = SvEND(utf8_buffer);
12524 STRLEN got = nl - SvPVX(utf8_buffer);
12525 /* Did we have anything to append? */
12527 sv_catpvn(sv, SvPVX(utf8_buffer), got);
12528 /* Everything else in this code works just fine if SVp_POK isn't
12529 set. This, however, needs it, and we need it to work, else
12530 we loop infinitely because the buffer is never consumed. */
12531 sv_chop(utf8_buffer, nl);
12535 /* OK, not a complete line there, so need to read some more UTF-16.
12536 Read an extra octect if the buffer currently has an odd number. */
12540 if (SvCUR(utf16_buffer) >= 2) {
12541 /* Location of the high octet of the last complete code point.
12542 Gosh, UTF-16 is a pain. All the benefits of variable length,
12543 *coupled* with all the benefits of partial reads and
12545 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
12546 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
12548 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
12552 /* We have the first half of a surrogate. Read more. */
12553 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
12556 status = FILTER_READ(idx + 1, utf16_buffer,
12557 160 + (SvCUR(utf16_buffer) & 1));
12558 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%" IVdf " SvCUR(sv)=%" UVuf "\n", status, (UV)SvCUR(utf16_buffer)));
12559 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
12562 IoPAGE(filter) = status;
12567 /* 'chars' isn't quite the right name, as code points above 0xFFFF
12568 * require 4 bytes per char */
12569 chars = SvCUR(utf16_buffer) >> 1;
12570 have = SvCUR(utf8_buffer);
12572 /* Assume the worst case size as noted by the functions: twice the
12573 * number of input bytes */
12574 SvGROW(utf8_buffer, have + chars * 4 + 1);
12577 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
12578 (U8*)SvPVX_const(utf8_buffer) + have,
12579 chars * 2, &newlen);
12581 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
12582 (U8*)SvPVX_const(utf8_buffer) + have,
12583 chars * 2, &newlen);
12585 SvCUR_set(utf8_buffer, have + newlen);
12588 /* No need to keep this SV "well-formed" with a '\0' after the end, as
12589 it's private to us, and utf16_to_utf8{,reversed} take a
12590 (pointer,length) pair, rather than a NUL-terminated string. */
12591 if(SvCUR(utf16_buffer) & 1) {
12592 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
12593 SvCUR_set(utf16_buffer, 1);
12595 SvCUR_set(utf16_buffer, 0);
12598 DEBUG_P(PerlIO_printf(Perl_debug_log,
12599 "utf16_textfilter: returns, status=%" IVdf " utf16=%" UVuf " utf8=%" UVuf "\n",
12601 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
12602 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
12607 S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
12609 SV *filter = filter_add(S_utf16_textfilter, NULL);
12611 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
12613 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
12615 IoLINES(filter) = reversed;
12616 IoPAGE(filter) = 1; /* Not EOF */
12618 /* Sadly, we have to return a valid pointer, come what may, so we have to
12619 ignore any error return from this. */
12620 SvCUR_set(PL_linestr, 0);
12621 if (FILTER_READ(0, PL_linestr, 0)) {
12622 SvUTF8_on(PL_linestr);
12624 SvUTF8_on(PL_linestr);
12626 PL_bufend = SvEND(PL_linestr);
12627 return (U8*)SvPVX(PL_linestr);
12632 Returns a pointer to the next character after the parsed
12633 vstring, as well as updating the passed in sv.
12635 Function must be called like
12637 sv = sv_2mortal(newSV(5));
12638 s = scan_vstring(s,e,sv);
12640 where s and e are the start and end of the string.
12641 The sv should already be large enough to store the vstring
12642 passed in, for performance reasons.
12644 This function may croak if fatal warnings are enabled in the
12645 calling scope, hence the sv_2mortal in the example (to prevent
12646 a leak). Make sure to do SvREFCNT_inc afterwards if you use
12652 Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
12654 const char *pos = s;
12655 const char *start = s;
12657 PERL_ARGS_ASSERT_SCAN_VSTRING;
12659 if (*pos == 'v') pos++; /* get past 'v' */
12660 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12662 if ( *pos != '.') {
12663 /* this may not be a v-string if followed by => */
12664 const char *next = pos;
12665 while (next < e && isSPACE(*next))
12667 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
12668 /* return string not v-string */
12669 sv_setpvn(sv,(char *)s,pos-s);
12670 return (char *)pos;
12674 if (!isALPHA(*pos)) {
12675 U8 tmpbuf[UTF8_MAXBYTES+1];
12678 s++; /* get past 'v' */
12683 /* this is atoi() that tolerates underscores */
12686 const char *end = pos;
12688 while (--end >= s) {
12690 const UV orev = rev;
12691 rev += (*end - '0') * mult;
12694 /* diag_listed_as: Integer overflow in %s number */
12695 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12696 "Integer overflow in decimal number");
12700 /* Append native character for the rev point */
12701 tmpend = uvchr_to_utf8(tmpbuf, rev);
12702 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
12703 if (!UVCHR_IS_INVARIANT(rev))
12705 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
12711 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
12715 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
12722 Perl_keyword_plugin_standard(pTHX_
12723 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
12725 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
12726 PERL_UNUSED_CONTEXT;
12727 PERL_UNUSED_ARG(keyword_ptr);
12728 PERL_UNUSED_ARG(keyword_len);
12729 PERL_UNUSED_ARG(op_ptr);
12730 return KEYWORD_PLUGIN_DECLINE;
12734 =for apidoc wrap_keyword_plugin
12736 Puts a C function into the chain of keyword plugins. This is the
12737 preferred way to manipulate the L</PL_keyword_plugin> variable.
12738 C<new_plugin> is a pointer to the C function that is to be added to the
12739 keyword plugin chain, and C<old_plugin_p> points to the storage location
12740 where a pointer to the next function in the chain will be stored. The
12741 value of C<new_plugin> is written into the L</PL_keyword_plugin> variable,
12742 while the value previously stored there is written to C<*old_plugin_p>.
12744 L</PL_keyword_plugin> is global to an entire process, and a module wishing
12745 to hook keyword parsing may find itself invoked more than once per
12746 process, typically in different threads. To handle that situation, this
12747 function is idempotent. The location C<*old_plugin_p> must initially
12748 (once per process) contain a null pointer. A C variable of static
12749 duration (declared at file scope, typically also marked C<static> to give
12750 it internal linkage) will be implicitly initialised appropriately, if it
12751 does not have an explicit initialiser. This function will only actually
12752 modify the plugin chain if it finds C<*old_plugin_p> to be null. This
12753 function is also thread safe on the small scale. It uses appropriate
12754 locking to avoid race conditions in accessing L</PL_keyword_plugin>.
12756 When this function is called, the function referenced by C<new_plugin>
12757 must be ready to be called, except for C<*old_plugin_p> being unfilled.
12758 In a threading situation, C<new_plugin> may be called immediately, even
12759 before this function has returned. C<*old_plugin_p> will always be
12760 appropriately set before C<new_plugin> is called. If C<new_plugin>
12761 decides not to do anything special with the identifier that it is given
12762 (which is the usual case for most calls to a keyword plugin), it must
12763 chain the plugin function referenced by C<*old_plugin_p>.
12765 Taken all together, XS code to install a keyword plugin should typically
12766 look something like this:
12768 static Perl_keyword_plugin_t next_keyword_plugin;
12769 static OP *my_keyword_plugin(pTHX_
12770 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
12772 if (memEQs(keyword_ptr, keyword_len,
12773 "my_new_keyword")) {
12776 return next_keyword_plugin(aTHX_
12777 keyword_ptr, keyword_len, op_ptr);
12781 wrap_keyword_plugin(my_keyword_plugin,
12782 &next_keyword_plugin);
12784 Direct access to L</PL_keyword_plugin> should be avoided.
12790 Perl_wrap_keyword_plugin(pTHX_
12791 Perl_keyword_plugin_t new_plugin, Perl_keyword_plugin_t *old_plugin_p)
12795 PERL_UNUSED_CONTEXT;
12796 PERL_ARGS_ASSERT_WRAP_KEYWORD_PLUGIN;
12797 if (*old_plugin_p) return;
12798 KEYWORD_PLUGIN_MUTEX_LOCK;
12799 if (!*old_plugin_p) {
12800 *old_plugin_p = PL_keyword_plugin;
12801 PL_keyword_plugin = new_plugin;
12803 KEYWORD_PLUGIN_MUTEX_UNLOCK;
12806 #define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
12808 S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
12810 SAVEI32(PL_lex_brackets);
12811 if (PL_lex_brackets > 100)
12812 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
12813 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
12814 SAVEI32(PL_lex_allbrackets);
12815 PL_lex_allbrackets = 0;
12816 SAVEI8(PL_lex_fakeeof);
12817 PL_lex_fakeeof = (U8)fakeeof;
12818 if(yyparse(gramtype) && !PL_parser->error_count)
12819 qerror(Perl_mess(aTHX_ "Parse error"));
12822 #define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
12824 S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
12828 SAVEVPTR(PL_eval_root);
12829 PL_eval_root = NULL;
12830 parse_recdescent(gramtype, fakeeof);
12836 #define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
12838 S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
12841 if (flags & ~PARSE_OPTIONAL)
12842 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
12843 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
12844 if (!exprop && !(flags & PARSE_OPTIONAL)) {
12845 if (!PL_parser->error_count)
12846 qerror(Perl_mess(aTHX_ "Parse error"));
12847 exprop = newOP(OP_NULL, 0);
12853 =for apidoc parse_arithexpr
12855 Parse a Perl arithmetic expression. This may contain operators of precedence
12856 down to the bit shift operators. The expression must be followed (and thus
12857 terminated) either by a comparison or lower-precedence operator or by
12858 something that would normally terminate an expression such as semicolon.
12859 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12860 otherwise it is mandatory. It is up to the caller to ensure that the
12861 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12862 the source of the code to be parsed and the lexical context for the
12865 The op tree representing the expression is returned. If an optional
12866 expression is absent, a null pointer is returned, otherwise the pointer
12869 If an error occurs in parsing or compilation, in most cases a valid op
12870 tree is returned anyway. The error is reflected in the parser state,
12871 normally resulting in a single exception at the top level of parsing
12872 which covers all the compilation errors that occurred. Some compilation
12873 errors, however, will throw an exception immediately.
12875 =for apidoc Amnh||PARSE_OPTIONAL
12882 Perl_parse_arithexpr(pTHX_ U32 flags)
12884 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
12888 =for apidoc parse_termexpr
12890 Parse a Perl term expression. This may contain operators of precedence
12891 down to the assignment operators. The expression must be followed (and thus
12892 terminated) either by a comma or lower-precedence operator or by
12893 something that would normally terminate an expression such as semicolon.
12894 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12895 otherwise it is mandatory. It is up to the caller to ensure that the
12896 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12897 the source of the code to be parsed and the lexical context for the
12900 The op tree representing the expression is returned. If an optional
12901 expression is absent, a null pointer is returned, otherwise the pointer
12904 If an error occurs in parsing or compilation, in most cases a valid op
12905 tree is returned anyway. The error is reflected in the parser state,
12906 normally resulting in a single exception at the top level of parsing
12907 which covers all the compilation errors that occurred. Some compilation
12908 errors, however, will throw an exception immediately.
12914 Perl_parse_termexpr(pTHX_ U32 flags)
12916 return parse_expr(LEX_FAKEEOF_COMMA, flags);
12920 =for apidoc parse_listexpr
12922 Parse a Perl list expression. This may contain operators of precedence
12923 down to the comma operator. The expression must be followed (and thus
12924 terminated) either by a low-precedence logic operator such as C<or> or by
12925 something that would normally terminate an expression such as semicolon.
12926 If C<flags> has the C<PARSE_OPTIONAL> bit set, then the expression is optional,
12927 otherwise it is mandatory. It is up to the caller to ensure that the
12928 dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12929 the source of the code to be parsed and the lexical context for the
12932 The op tree representing the expression is returned. If an optional
12933 expression is absent, a null pointer is returned, otherwise the pointer
12936 If an error occurs in parsing or compilation, in most cases a valid op
12937 tree is returned anyway. The error is reflected in the parser state,
12938 normally resulting in a single exception at the top level of parsing
12939 which covers all the compilation errors that occurred. Some compilation
12940 errors, however, will throw an exception immediately.
12946 Perl_parse_listexpr(pTHX_ U32 flags)
12948 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
12952 =for apidoc parse_fullexpr
12954 Parse a single complete Perl expression. This allows the full
12955 expression grammar, including the lowest-precedence operators such
12956 as C<or>. The expression must be followed (and thus terminated) by a
12957 token that an expression would normally be terminated by: end-of-file,
12958 closing bracketing punctuation, semicolon, or one of the keywords that
12959 signals a postfix expression-statement modifier. If C<flags> has the
12960 C<PARSE_OPTIONAL> bit set, then the expression is optional, otherwise it is
12961 mandatory. It is up to the caller to ensure that the dynamic parser
12962 state (L</PL_parser> et al) is correctly set to reflect the source of
12963 the code to be parsed and the lexical context for the expression.
12965 The op tree representing the expression is returned. If an optional
12966 expression is absent, a null pointer is returned, otherwise the pointer
12969 If an error occurs in parsing or compilation, in most cases a valid op
12970 tree is returned anyway. The error is reflected in the parser state,
12971 normally resulting in a single exception at the top level of parsing
12972 which covers all the compilation errors that occurred. Some compilation
12973 errors, however, will throw an exception immediately.
12979 Perl_parse_fullexpr(pTHX_ U32 flags)
12981 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
12985 =for apidoc parse_block
12987 Parse a single complete Perl code block. This consists of an opening
12988 brace, a sequence of statements, and a closing brace. The block
12989 constitutes a lexical scope, so C<my> variables and various compile-time
12990 effects can be contained within it. It is up to the caller to ensure
12991 that the dynamic parser state (L</PL_parser> et al) is correctly set to
12992 reflect the source of the code to be parsed and the lexical context for
12995 The op tree representing the code block is returned. This is always a
12996 real op, never a null pointer. It will normally be a C<lineseq> list,
12997 including C<nextstate> or equivalent ops. No ops to construct any kind
12998 of runtime scope are included by virtue of it being a block.
13000 If an error occurs in parsing or compilation, in most cases a valid op
13001 tree (most likely null) is returned anyway. The error is reflected in
13002 the parser state, normally resulting in a single exception at the top
13003 level of parsing which covers all the compilation errors that occurred.
13004 Some compilation errors, however, will throw an exception immediately.
13006 The C<flags> parameter is reserved for future use, and must always
13013 Perl_parse_block(pTHX_ U32 flags)
13016 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
13017 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
13021 =for apidoc parse_barestmt
13023 Parse a single unadorned Perl statement. This may be a normal imperative
13024 statement or a declaration that has compile-time effect. It does not
13025 include any label or other affixture. It is up to the caller to ensure
13026 that the dynamic parser state (L</PL_parser> et al) is correctly set to
13027 reflect the source of the code to be parsed and the lexical context for
13030 The op tree representing the statement is returned. This may be a
13031 null pointer if the statement is null, for example if it was actually
13032 a subroutine definition (which has compile-time side effects). If not
13033 null, it will be ops directly implementing the statement, suitable to
13034 pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
13035 equivalent op (except for those embedded in a scope contained entirely
13036 within the statement).
13038 If an error occurs in parsing or compilation, in most cases a valid op
13039 tree (most likely null) is returned anyway. The error is reflected in
13040 the parser state, normally resulting in a single exception at the top
13041 level of parsing which covers all the compilation errors that occurred.
13042 Some compilation errors, however, will throw an exception immediately.
13044 The C<flags> parameter is reserved for future use, and must always
13051 Perl_parse_barestmt(pTHX_ U32 flags)
13054 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
13055 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
13059 =for apidoc parse_label
13061 Parse a single label, possibly optional, of the type that may prefix a
13062 Perl statement. It is up to the caller to ensure that the dynamic parser
13063 state (L</PL_parser> et al) is correctly set to reflect the source of
13064 the code to be parsed. If C<flags> has the C<PARSE_OPTIONAL> bit set, then the
13065 label is optional, otherwise it is mandatory.
13067 The name of the label is returned in the form of a fresh scalar. If an
13068 optional label is absent, a null pointer is returned.
13070 If an error occurs in parsing, which can only occur if the label is
13071 mandatory, a valid label is returned anyway. The error is reflected in
13072 the parser state, normally resulting in a single exception at the top
13073 level of parsing which covers all the compilation errors that occurred.
13079 Perl_parse_label(pTHX_ U32 flags)
13081 if (flags & ~PARSE_OPTIONAL)
13082 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
13084 PL_parser->yychar = yylex();
13085 if (PL_parser->yychar == LABEL) {
13086 SV * const labelsv = cSVOPx(pl_yylval.opval)->op_sv;
13087 PL_parser->yychar = YYEMPTY;
13088 cSVOPx(pl_yylval.opval)->op_sv = NULL;
13089 op_free(pl_yylval.opval);
13097 STRLEN wlen, bufptr_pos;
13100 if (!isIDFIRST_lazy_if_safe(s, PL_bufend, UTF))
13102 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
13103 if (word_takes_any_delimiter(s, wlen))
13105 bufptr_pos = s - SvPVX(PL_linestr);
13107 lex_read_space(LEX_KEEP_PREVIOUS);
13109 s = SvPVX(PL_linestr) + bufptr_pos;
13110 if (t[0] == ':' && t[1] != ':') {
13111 PL_oldoldbufptr = PL_oldbufptr;
13114 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
13118 if (flags & PARSE_OPTIONAL) {
13121 qerror(Perl_mess(aTHX_ "Parse error"));
13122 return newSVpvs("x");
13129 =for apidoc parse_fullstmt
13131 Parse a single complete Perl statement. This may be a normal imperative
13132 statement or a declaration that has compile-time effect, and may include
13133 optional labels. It is up to the caller to ensure that the dynamic
13134 parser state (L</PL_parser> et al) is correctly set to reflect the source
13135 of the code to be parsed and the lexical context for the statement.
13137 The op tree representing the statement is returned. This may be a
13138 null pointer if the statement is null, for example if it was actually
13139 a subroutine definition (which has compile-time side effects). If not
13140 null, it will be the result of a L</newSTATEOP> call, normally including
13141 a C<nextstate> or equivalent op.
13143 If an error occurs in parsing or compilation, in most cases a valid op
13144 tree (most likely null) is returned anyway. The error is reflected in
13145 the parser state, normally resulting in a single exception at the top
13146 level of parsing which covers all the compilation errors that occurred.
13147 Some compilation errors, however, will throw an exception immediately.
13149 The C<flags> parameter is reserved for future use, and must always
13156 Perl_parse_fullstmt(pTHX_ U32 flags)
13159 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
13160 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
13164 =for apidoc parse_stmtseq
13166 Parse a sequence of zero or more Perl statements. These may be normal
13167 imperative statements, including optional labels, or declarations
13168 that have compile-time effect, or any mixture thereof. The statement
13169 sequence ends when a closing brace or end-of-file is encountered in a
13170 place where a new statement could have validly started. It is up to
13171 the caller to ensure that the dynamic parser state (L</PL_parser> et al)
13172 is correctly set to reflect the source of the code to be parsed and the
13173 lexical context for the statements.
13175 The op tree representing the statement sequence is returned. This may
13176 be a null pointer if the statements were all null, for example if there
13177 were no statements or if there were only subroutine definitions (which
13178 have compile-time side effects). If not null, it will be a C<lineseq>
13179 list, normally including C<nextstate> or equivalent ops.
13181 If an error occurs in parsing or compilation, in most cases a valid op
13182 tree is returned anyway. The error is reflected in the parser state,
13183 normally resulting in a single exception at the top level of parsing
13184 which covers all the compilation errors that occurred. Some compilation
13185 errors, however, will throw an exception immediately.
13187 The C<flags> parameter is reserved for future use, and must always
13194 Perl_parse_stmtseq(pTHX_ U32 flags)
13199 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
13200 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
13201 c = lex_peek_unichar(0);
13202 if (c != -1 && c != /*{*/'}')
13203 qerror(Perl_mess(aTHX_ "Parse error"));
13208 =for apidoc parse_subsignature
13210 Parse a subroutine signature declaration. This is the contents of the
13211 parentheses following a named or anonymous subroutine declaration when the
13212 C<signatures> feature is enabled. Note that this function neither expects
13213 nor consumes the opening and closing parentheses around the signature; it
13214 is the caller's job to handle these.
13216 This function must only be called during parsing of a subroutine; after
13217 L</start_subparse> has been called. It might allocate lexical variables on
13218 the pad for the current subroutine.
13220 The op tree to unpack the arguments from the stack at runtime is returned.
13221 This op tree should appear at the beginning of the compiled function. The
13222 caller may wish to use L</op_append_list> to build their function body
13223 after it, or splice it together with the body before calling L</newATTRSUB>.
13225 The C<flags> parameter is reserved for future use, and must always
13232 Perl_parse_subsignature(pTHX_ U32 flags)
13235 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_subsignature");
13236 return parse_recdescent_for_op(GRAMSUBSIGNATURE, LEX_FAKEEOF_NONEXPR);
13240 * ex: set ts=8 sts=4 sw=4 et: